diff options
author | Ingo Molnar <mingo@kernel.org> | 2016-11-01 07:47:40 +0100 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2016-11-01 07:47:40 +0100 |
commit | c29c716662d0cefc0cda4903aea5ed6794174679 (patch) | |
tree | 01d4dc47933e1bcbeca8d53b0cf5c1f70da04535 /arch | |
parent | e63650840e8b053aa09ad934877e87e9941ed135 (diff) | |
parent | 405c0759712f57b680f66aee9c55cd06ad1cbdef (diff) | |
download | op-kernel-dev-c29c716662d0cefc0cda4903aea5ed6794174679.zip op-kernel-dev-c29c716662d0cefc0cda4903aea5ed6794174679.tar.gz |
Merge branch 'core/urgent' into x86/fpu, to merge fixes
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch')
182 files changed, 2305 insertions, 1421 deletions
diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index d9ee817..940dfb4 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -157,14 +157,16 @@ put_reg(struct task_struct *task, unsigned long regno, unsigned long data) static inline int read_int(struct task_struct *task, unsigned long addr, int * data) { - int copied = access_process_vm(task, addr, data, sizeof(int), 0); + int copied = access_process_vm(task, addr, data, sizeof(int), + FOLL_FORCE); return (copied == sizeof(int)) ? 0 : -EIO; } static inline int write_int(struct task_struct *task, unsigned long addr, int data) { - int copied = access_process_vm(task, addr, &data, sizeof(int), 1); + int copied = access_process_vm(task, addr, &data, sizeof(int), + FOLL_FORCE | FOLL_WRITE); return (copied == sizeof(int)) ? 0 : -EIO; } @@ -281,7 +283,8 @@ long arch_ptrace(struct task_struct *child, long request, /* When I and D space are separate, these will need to be fixed. */ case PTRACE_PEEKTEXT: /* read word at location addr. */ case PTRACE_PEEKDATA: - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), + FOLL_FORCE); ret = -EIO; if (copied != sizeof(tmp)) break; diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index ecd1237..bd204bf 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -41,6 +41,8 @@ config ARC select PERF_USE_VMALLOC select HAVE_DEBUG_STACKOVERFLOW select HAVE_GENERIC_DMA_COHERENT + select HAVE_KERNEL_GZIP + select HAVE_KERNEL_LZMA config MIGHT_HAVE_PCI bool @@ -186,14 +188,6 @@ if SMP config ARC_HAS_COH_CACHES def_bool n -config ARC_MCIP - bool "ARConnect Multicore IP (MCIP) Support " - depends on ISA_ARCV2 - help - This IP block enables SMP in ARC-HS38 cores. - It provides for cross-core interrupts, multi-core debug - hardware semaphores, shared memory,.... - config NR_CPUS int "Maximum number of CPUs (2-4096)" range 2 4096 @@ -211,6 +205,15 @@ config ARC_SMP_HALT_ON_RESET endif #SMP +config ARC_MCIP + bool "ARConnect Multicore IP (MCIP) Support " + depends on ISA_ARCV2 + default y if SMP + help + This IP block enables SMP in ARC-HS38 cores. + It provides for cross-core interrupts, multi-core debug + hardware semaphores, shared memory,.... + menuconfig ARC_CACHE bool "Enable Cache Support" default y @@ -537,14 +540,6 @@ config ARC_DBG_TLB_PARANOIA bool "Paranoia Checks in Low Level TLB Handlers" default n -config ARC_DBG_TLB_MISS_COUNT - bool "Profile TLB Misses" - default n - select DEBUG_FS - help - Counts number of I and D TLB Misses and exports them via Debugfs - The counters can be cleared via Debugfs as well - endif config ARC_UBOOT_SUPPORT diff --git a/arch/arc/Makefile b/arch/arc/Makefile index aa82d13..864adad 100644 --- a/arch/arc/Makefile +++ b/arch/arc/Makefile @@ -50,9 +50,6 @@ atleast_gcc44 := $(call cc-ifversion, -ge, 0404, y) cflags-$(atleast_gcc44) += -fsection-anchors -cflags-$(CONFIG_ARC_HAS_LLSC) += -mlock -cflags-$(CONFIG_ARC_HAS_SWAPE) += -mswape - ifdef CONFIG_ISA_ARCV2 ifndef CONFIG_ARC_HAS_LL64 diff --git a/arch/arc/boot/Makefile b/arch/arc/boot/Makefile index e597cb34..f94cf15 100644 --- a/arch/arc/boot/Makefile +++ b/arch/arc/boot/Makefile @@ -14,9 +14,15 @@ UIMAGE_ENTRYADDR = $(LINUX_START_TEXT) suffix-y := bin suffix-$(CONFIG_KERNEL_GZIP) := gz +suffix-$(CONFIG_KERNEL_LZMA) := lzma -targets += uImage uImage.bin uImage.gz -extra-y += vmlinux.bin vmlinux.bin.gz +targets += uImage +targets += uImage.bin +targets += uImage.gz +targets += uImage.lzma +extra-y += vmlinux.bin +extra-y += vmlinux.bin.gz +extra-y += vmlinux.bin.lzma $(obj)/vmlinux.bin: vmlinux FORCE $(call if_changed,objcopy) @@ -24,12 +30,18 @@ $(obj)/vmlinux.bin: vmlinux FORCE $(obj)/vmlinux.bin.gz: $(obj)/vmlinux.bin FORCE $(call if_changed,gzip) +$(obj)/vmlinux.bin.lzma: $(obj)/vmlinux.bin FORCE + $(call if_changed,lzma) + $(obj)/uImage.bin: $(obj)/vmlinux.bin FORCE $(call if_changed,uimage,none) $(obj)/uImage.gz: $(obj)/vmlinux.bin.gz FORCE $(call if_changed,uimage,gzip) +$(obj)/uImage.lzma: $(obj)/vmlinux.bin.lzma FORCE + $(call if_changed,uimage,lzma) + $(obj)/uImage: $(obj)/uImage.$(suffix-y) @ln -sf $(notdir $<) $@ @echo ' Image $@ is ready' diff --git a/arch/arc/include/asm/arcregs.h b/arch/arc/include/asm/arcregs.h index db25c65..7f3f9f6 100644 --- a/arch/arc/include/asm/arcregs.h +++ b/arch/arc/include/asm/arcregs.h @@ -349,10 +349,11 @@ struct cpuinfo_arc { struct cpuinfo_arc_bpu bpu; struct bcr_identity core; struct bcr_isa isa; + const char *details, *name; unsigned int vec_base; struct cpuinfo_arc_ccm iccm, dccm; struct { - unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, pad1:3, + unsigned int swap:1, norm:1, minmax:1, barrel:1, crc:1, swape:1, pad1:2, fpu_sp:1, fpu_dp:1, pad2:6, debug:1, ap:1, smart:1, rtt:1, pad3:4, timer0:1, timer1:1, rtc:1, gfrc:1, pad4:4; diff --git a/arch/arc/include/asm/cache.h b/arch/arc/include/asm/cache.h index fb781e3..b3410ff 100644 --- a/arch/arc/include/asm/cache.h +++ b/arch/arc/include/asm/cache.h @@ -53,7 +53,7 @@ extern void arc_cache_init(void); extern char *arc_cache_mumbojumbo(int cpu_id, char *buf, int len); extern void read_decode_cache_bcr(void); -extern int ioc_exists; +extern int ioc_enable; extern unsigned long perip_base, perip_end; #endif /* !__ASSEMBLY__ */ diff --git a/arch/arc/include/asm/elf.h b/arch/arc/include/asm/elf.h index 7096f97..aa2d6da 100644 --- a/arch/arc/include/asm/elf.h +++ b/arch/arc/include/asm/elf.h @@ -54,7 +54,7 @@ extern int elf_check_arch(const struct elf32_hdr *); * the loader. We need to make sure that it is out of the way of the program * that it will "exec", and that there is sufficient room for the brk. */ -#define ELF_ET_DYN_BASE (2 * TASK_SIZE / 3) +#define ELF_ET_DYN_BASE (2UL * TASK_SIZE / 3) /* * When the program starts, a1 contains a pointer to a function to be diff --git a/arch/arc/include/asm/mcip.h b/arch/arc/include/asm/mcip.h index 847e3bb..c8fbe41 100644 --- a/arch/arc/include/asm/mcip.h +++ b/arch/arc/include/asm/mcip.h @@ -55,6 +55,22 @@ struct mcip_cmd { #define IDU_M_DISTRI_DEST 0x2 }; +struct mcip_bcr { +#ifdef CONFIG_CPU_BIG_ENDIAN + unsigned int pad3:8, + idu:1, llm:1, num_cores:6, + iocoh:1, gfrc:1, dbg:1, pad2:1, + msg:1, sem:1, ipi:1, pad:1, + ver:8; +#else + unsigned int ver:8, + pad:1, ipi:1, sem:1, msg:1, + pad2:1, dbg:1, gfrc:1, iocoh:1, + num_cores:6, llm:1, idu:1, + pad3:8; +#endif +}; + /* * MCIP programming model * diff --git a/arch/arc/include/asm/module.h b/arch/arc/include/asm/module.h index 518222b..6e91d8b 100644 --- a/arch/arc/include/asm/module.h +++ b/arch/arc/include/asm/module.h @@ -18,6 +18,7 @@ struct mod_arch_specific { void *unw_info; int unw_sec_idx; + const char *secstr; }; #endif diff --git a/arch/arc/include/asm/setup.h b/arch/arc/include/asm/setup.h index 48b37c6..cb954cd 100644 --- a/arch/arc/include/asm/setup.h +++ b/arch/arc/include/asm/setup.h @@ -27,11 +27,6 @@ struct id_to_str { const char *str; }; -struct cpuinfo_data { - struct id_to_str info; - int up_range; -}; - extern int root_mountflags, end_mem; void setup_processor(void); @@ -43,5 +38,6 @@ void __init setup_arch_memory(void); #define IS_USED_RUN(v) ((v) ? "" : "(not used) ") #define IS_USED_CFG(cfg) IS_USED_RUN(IS_ENABLED(cfg)) #define IS_AVAIL2(v, s, cfg) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_USED_CFG(cfg)) +#define IS_AVAIL3(v, v2, s) IS_AVAIL1(v, s), IS_AVAIL1(v, IS_DISABLED_RUN(v2)) #endif /* __ASMARC_SETUP_H */ diff --git a/arch/arc/include/asm/syscalls.h b/arch/arc/include/asm/syscalls.h index e56f9fc..772b67c 100644 --- a/arch/arc/include/asm/syscalls.h +++ b/arch/arc/include/asm/syscalls.h @@ -17,6 +17,7 @@ int sys_clone_wrapper(int, int, int, int, int); int sys_cacheflush(uint32_t, uint32_t uint32_t); int sys_arc_settls(void *); int sys_arc_gettls(void); +int sys_arc_usr_cmpxchg(int *, int, int); #include <asm-generic/syscalls.h> diff --git a/arch/arc/include/uapi/asm/unistd.h b/arch/arc/include/uapi/asm/unistd.h index 41fa2ec..9a34136 100644 --- a/arch/arc/include/uapi/asm/unistd.h +++ b/arch/arc/include/uapi/asm/unistd.h @@ -27,18 +27,19 @@ #define NR_syscalls __NR_syscalls +/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ +#define __NR_sysfs (__NR_arch_specific_syscall + 3) + /* ARC specific syscall */ #define __NR_cacheflush (__NR_arch_specific_syscall + 0) #define __NR_arc_settls (__NR_arch_specific_syscall + 1) #define __NR_arc_gettls (__NR_arch_specific_syscall + 2) +#define __NR_arc_usr_cmpxchg (__NR_arch_specific_syscall + 4) __SYSCALL(__NR_cacheflush, sys_cacheflush) __SYSCALL(__NR_arc_settls, sys_arc_settls) __SYSCALL(__NR_arc_gettls, sys_arc_gettls) - - -/* Generic syscall (fs/filesystems.c - lost in asm-generic/unistd.h */ -#define __NR_sysfs (__NR_arch_specific_syscall + 3) +__SYSCALL(__NR_arc_usr_cmpxchg, sys_arc_usr_cmpxchg) __SYSCALL(__NR_sysfs, sys_sysfs) #undef __SYSCALL diff --git a/arch/arc/kernel/mcip.c b/arch/arc/kernel/mcip.c index 72f9179..c424d5a 100644 --- a/arch/arc/kernel/mcip.c +++ b/arch/arc/kernel/mcip.c @@ -15,11 +15,12 @@ #include <asm/mcip.h> #include <asm/setup.h> -static char smp_cpuinfo_buf[128]; -static int idu_detected; - static DEFINE_RAW_SPINLOCK(mcip_lock); +#ifdef CONFIG_SMP + +static char smp_cpuinfo_buf[128]; + static void mcip_setup_per_cpu(int cpu) { smp_ipi_irq_setup(cpu, IPI_IRQ); @@ -86,21 +87,7 @@ static void mcip_ipi_clear(int irq) static void mcip_probe_n_setup(void) { - struct mcip_bcr { -#ifdef CONFIG_CPU_BIG_ENDIAN - unsigned int pad3:8, - idu:1, llm:1, num_cores:6, - iocoh:1, gfrc:1, dbg:1, pad2:1, - msg:1, sem:1, ipi:1, pad:1, - ver:8; -#else - unsigned int ver:8, - pad:1, ipi:1, sem:1, msg:1, - pad2:1, dbg:1, gfrc:1, iocoh:1, - num_cores:6, llm:1, idu:1, - pad3:8; -#endif - } mp; + struct mcip_bcr mp; READ_BCR(ARC_REG_MCIP_BCR, mp); @@ -114,7 +101,6 @@ static void mcip_probe_n_setup(void) IS_AVAIL1(mp.gfrc, "GFRC")); cpuinfo_arc700[0].extn.gfrc = mp.gfrc; - idu_detected = mp.idu; if (mp.dbg) { __mcip_cmd_data(CMD_DEBUG_SET_SELECT, 0, 0xf); @@ -130,6 +116,8 @@ struct plat_smp_ops plat_smp_ops = { .ipi_clear = mcip_ipi_clear, }; +#endif + /*************************************************************************** * ARCv2 Interrupt Distribution Unit (IDU) * @@ -295,8 +283,11 @@ idu_of_init(struct device_node *intc, struct device_node *parent) /* Read IDU BCR to confirm nr_irqs */ int nr_irqs = of_irq_count(intc); int i, irq; + struct mcip_bcr mp; + + READ_BCR(ARC_REG_MCIP_BCR, mp); - if (!idu_detected) + if (!mp.idu) panic("IDU not detected, but DeviceTree using it"); pr_info("MCIP: IDU referenced from Devicetree %d irqs\n", nr_irqs); diff --git a/arch/arc/kernel/module.c b/arch/arc/kernel/module.c index 9a28497..42e964d 100644 --- a/arch/arc/kernel/module.c +++ b/arch/arc/kernel/module.c @@ -30,17 +30,9 @@ int module_frob_arch_sections(Elf_Ehdr *hdr, Elf_Shdr *sechdrs, char *secstr, struct module *mod) { #ifdef CONFIG_ARC_DW2_UNWIND - int i; - mod->arch.unw_sec_idx = 0; mod->arch.unw_info = NULL; - - for (i = 1; i < hdr->e_shnum; i++) { - if (strcmp(secstr+sechdrs[i].sh_name, ".eh_frame") == 0) { - mod->arch.unw_sec_idx = i; - break; - } - } + mod->arch.secstr = secstr; #endif return 0; } @@ -59,29 +51,33 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, unsigned int relsec, /* sec index for relo sec */ struct module *module) { - int i, n; + int i, n, relo_type; Elf32_Rela *rel_entry = (void *)sechdrs[relsec].sh_addr; Elf32_Sym *sym_entry, *sym_sec; - Elf32_Addr relocation; - Elf32_Addr location; - Elf32_Addr sec_to_patch; - int relo_type; - - sec_to_patch = sechdrs[sechdrs[relsec].sh_info].sh_addr; + Elf32_Addr relocation, location, tgt_addr; + unsigned int tgtsec; + + /* + * @relsec has relocations e.g. .rela.init.text + * @tgtsec is section to patch e.g. .init.text + */ + tgtsec = sechdrs[relsec].sh_info; + tgt_addr = sechdrs[tgtsec].sh_addr; sym_sec = (Elf32_Sym *) sechdrs[symindex].sh_addr; n = sechdrs[relsec].sh_size / sizeof(*rel_entry); - pr_debug("\n========== Module Sym reloc ===========================\n"); - pr_debug("Section to fixup %x\n", sec_to_patch); + pr_debug("\nSection to fixup %s @%x\n", + module->arch.secstr + sechdrs[tgtsec].sh_name, tgt_addr); pr_debug("=========================================================\n"); - pr_debug("rela->r_off | rela->addend | sym->st_value | ADDR | VALUE\n"); + pr_debug("r_off\tr_add\tst_value ADDRESS VALUE\n"); pr_debug("=========================================================\n"); /* Loop thru entries in relocation section */ for (i = 0; i < n; i++) { + const char *s; /* This is where to make the change */ - location = sec_to_patch + rel_entry[i].r_offset; + location = tgt_addr + rel_entry[i].r_offset; /* This is the symbol it is referring to. Note that all undefined symbols have been resolved. */ @@ -89,10 +85,15 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, relocation = sym_entry->st_value + rel_entry[i].r_addend; - pr_debug("\t%x\t\t%x\t\t%x %x %x [%s]\n", - rel_entry[i].r_offset, rel_entry[i].r_addend, - sym_entry->st_value, location, relocation, - strtab + sym_entry->st_name); + if (sym_entry->st_name == 0 && ELF_ST_TYPE (sym_entry->st_info) == STT_SECTION) { + s = module->arch.secstr + sechdrs[sym_entry->st_shndx].sh_name; + } else { + s = strtab + sym_entry->st_name; + } + + pr_debug(" %x\t%x\t%x %x %x [%s]\n", + rel_entry[i].r_offset, rel_entry[i].r_addend, + sym_entry->st_value, location, relocation, s); /* This assumes modules are built with -mlong-calls * so any branches/jumps are absolute 32 bit jmps @@ -111,6 +112,10 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, goto relo_err; } + + if (strcmp(module->arch.secstr+sechdrs[tgtsec].sh_name, ".eh_frame") == 0) + module->arch.unw_sec_idx = tgtsec; + return 0; relo_err: diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index be1972b..59aa43c 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -41,6 +41,39 @@ SYSCALL_DEFINE0(arc_gettls) return task_thread_info(current)->thr_ptr; } +SYSCALL_DEFINE3(arc_usr_cmpxchg, int *, uaddr, int, expected, int, new) +{ + int uval; + int ret; + + /* + * This is only for old cores lacking LLOCK/SCOND, which by defintion + * can't possibly be SMP. Thus doesn't need to be SMP safe. + * And this also helps reduce the overhead for serializing in + * the UP case + */ + WARN_ON_ONCE(IS_ENABLED(CONFIG_SMP)); + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(int))) + return -EFAULT; + + preempt_disable(); + + ret = __get_user(uval, uaddr); + if (ret) + goto done; + + if (uval != expected) + ret = -EAGAIN; + else + ret = __put_user(new, uaddr); + +done: + preempt_enable(); + + return ret; +} + void arch_cpu_idle(void) { /* sleep, but enable all interrupts before committing */ diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 3df7f9c..0385df7 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -40,6 +40,29 @@ struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ struct cpuinfo_arc cpuinfo_arc700[NR_CPUS]; +static const struct id_to_str arc_cpu_rel[] = { +#ifdef CONFIG_ISA_ARCOMPACT + { 0x34, "R4.10"}, + { 0x35, "R4.11"}, +#else + { 0x51, "R2.0" }, + { 0x52, "R2.1" }, + { 0x53, "R3.0" }, +#endif + { 0x00, NULL } +}; + +static const struct id_to_str arc_cpu_nm[] = { +#ifdef CONFIG_ISA_ARCOMPACT + { 0x20, "ARC 600" }, + { 0x30, "ARC 770" }, /* 750 identified seperately */ +#else + { 0x40, "ARC EM" }, + { 0x50, "ARC HS38" }, +#endif + { 0x00, "Unknown" } +}; + static void read_decode_ccm_bcr(struct cpuinfo_arc *cpu) { if (is_isa_arcompact()) { @@ -92,11 +115,26 @@ static void read_arc_build_cfg_regs(void) struct bcr_timer timer; struct bcr_generic bcr; struct cpuinfo_arc *cpu = &cpuinfo_arc700[smp_processor_id()]; + const struct id_to_str *tbl; + FIX_PTR(cpu); READ_BCR(AUX_IDENTITY, cpu->core); READ_BCR(ARC_REG_ISA_CFG_BCR, cpu->isa); + for (tbl = &arc_cpu_rel[0]; tbl->id != 0; tbl++) { + if (cpu->core.family == tbl->id) { + cpu->details = tbl->str; + break; + } + } + + for (tbl = &arc_cpu_nm[0]; tbl->id != 0; tbl++) { + if ((cpu->core.family & 0xF0) == tbl->id) + break; + } + cpu->name = tbl->str; + READ_BCR(ARC_REG_TIMERS_BCR, timer); cpu->extn.timer0 = timer.t0; cpu->extn.timer1 = timer.t1; @@ -111,6 +149,9 @@ static void read_arc_build_cfg_regs(void) cpu->extn.swap = read_aux_reg(ARC_REG_SWAP_BCR) ? 1 : 0; /* 1,3 */ cpu->extn.crc = read_aux_reg(ARC_REG_CRC_BCR) ? 1 : 0; cpu->extn.minmax = read_aux_reg(ARC_REG_MIXMAX_BCR) > 1 ? 1 : 0; /* 2 */ + cpu->extn.swape = (cpu->core.family >= 0x34) ? 1 : + IS_ENABLED(CONFIG_ARC_HAS_SWAPE); + READ_BCR(ARC_REG_XY_MEM_BCR, cpu->extn_xymem); /* Read CCM BCRs for boot reporting even if not enabled in Kconfig */ @@ -160,64 +201,38 @@ static void read_arc_build_cfg_regs(void) cpu->extn.rtt = bcr.ver ? 1 : 0; cpu->extn.debug = cpu->extn.ap | cpu->extn.smart | cpu->extn.rtt; -} -static const struct cpuinfo_data arc_cpu_tbl[] = { -#ifdef CONFIG_ISA_ARCOMPACT - { {0x20, "ARC 600" }, 0x2F}, - { {0x30, "ARC 700" }, 0x33}, - { {0x34, "ARC 700 R4.10"}, 0x34}, - { {0x35, "ARC 700 R4.11"}, 0x35}, -#else - { {0x50, "ARC HS38 R2.0"}, 0x51}, - { {0x52, "ARC HS38 R2.1"}, 0x52}, - { {0x53, "ARC HS38 R3.0"}, 0x53}, -#endif - { {0x00, NULL } } -}; + /* some hacks for lack of feature BCR info in old ARC700 cores */ + if (is_isa_arcompact()) { + if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ + cpu->isa.atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); + else + cpu->isa.atomic = cpu->isa.atomic1; + cpu->isa.be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); + + /* there's no direct way to distinguish 750 vs. 770 */ + if (unlikely(cpu->core.family < 0x34 || cpu->mmu.ver < 3)) + cpu->name = "ARC750"; + } +} static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) { struct cpuinfo_arc *cpu = &cpuinfo_arc700[cpu_id]; struct bcr_identity *core = &cpu->core; - const struct cpuinfo_data *tbl; - char *isa_nm; - int i, be, atomic; - int n = 0; + int i, n = 0; FIX_PTR(cpu); - if (is_isa_arcompact()) { - isa_nm = "ARCompact"; - be = IS_ENABLED(CONFIG_CPU_BIG_ENDIAN); - - atomic = cpu->isa.atomic1; - if (!cpu->isa.ver) /* ISA BCR absent, use Kconfig info */ - atomic = IS_ENABLED(CONFIG_ARC_HAS_LLSC); - } else { - isa_nm = "ARCv2"; - be = cpu->isa.be; - atomic = cpu->isa.atomic; - } - n += scnprintf(buf + n, len - n, "\nIDENTITY\t: ARCVER [%#02x] ARCNUM [%#02x] CHIPID [%#4x]\n", core->family, core->cpu_id, core->chip_id); - for (tbl = &arc_cpu_tbl[0]; tbl->info.id != 0; tbl++) { - if ((core->family >= tbl->info.id) && - (core->family <= tbl->up_range)) { - n += scnprintf(buf + n, len - n, - "processor [%d]\t: %s (%s ISA) %s\n", - cpu_id, tbl->info.str, isa_nm, - IS_AVAIL1(be, "[Big-Endian]")); - break; - } - } - - if (tbl->info.id == 0) - n += scnprintf(buf + n, len - n, "UNKNOWN ARC Processor\n"); + n += scnprintf(buf + n, len - n, "processor [%d]\t: %s %s (%s ISA) %s\n", + cpu_id, cpu->name, cpu->details, + is_isa_arcompact() ? "ARCompact" : "ARCv2", + IS_AVAIL1(cpu->isa.be, "[Big-Endian]")); n += scnprintf(buf + n, len - n, "Timers\t\t: %s%s%s%s\nISA Extn\t: ", IS_AVAIL1(cpu->extn.timer0, "Timer0 "), @@ -226,7 +241,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) CONFIG_ARC_HAS_RTC)); n += i = scnprintf(buf + n, len - n, "%s%s%s%s%s", - IS_AVAIL2(atomic, "atomic ", CONFIG_ARC_HAS_LLSC), + IS_AVAIL2(cpu->isa.atomic, "atomic ", CONFIG_ARC_HAS_LLSC), IS_AVAIL2(cpu->isa.ldd, "ll64 ", CONFIG_ARC_HAS_LL64), IS_AVAIL1(cpu->isa.unalign, "unalign (not used)")); @@ -253,7 +268,7 @@ static char *arc_cpu_mumbojumbo(int cpu_id, char *buf, int len) IS_AVAIL1(cpu->extn.swap, "swap "), IS_AVAIL1(cpu->extn.minmax, "minmax "), IS_AVAIL1(cpu->extn.crc, "crc "), - IS_AVAIL2(1, "swape", CONFIG_ARC_HAS_SWAPE)); + IS_AVAIL2(cpu->extn.swape, "swape", CONFIG_ARC_HAS_SWAPE)); if (cpu->bpu.ver) n += scnprintf(buf + n, len - n, @@ -272,9 +287,7 @@ static char *arc_extn_mumbojumbo(int cpu_id, char *buf, int len) FIX_PTR(cpu); - n += scnprintf(buf + n, len - n, - "Vector Table\t: %#x\nPeripherals\t: %#lx:%#lx\n", - cpu->vec_base, perip_base, perip_end); + n += scnprintf(buf + n, len - n, "Vector Table\t: %#x\n", cpu->vec_base); if (cpu->extn.fpu_sp || cpu->extn.fpu_dp) n += scnprintf(buf + n, len - n, "FPU\t\t: %s%s\n", @@ -507,7 +520,7 @@ static void *c_start(struct seq_file *m, loff_t *pos) * way to pass it w/o having to kmalloc/free a 2 byte string. * Encode cpu-id as 0xFFcccc, which is decoded by show routine. */ - return *pos < num_possible_cpus() ? cpu_to_ptr(*pos) : NULL; + return *pos < nr_cpu_ids ? cpu_to_ptr(*pos) : NULL; } static void *c_next(struct seq_file *m, void *v, loff_t *pos) diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 934150e..82f9bc8 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -237,113 +237,3 @@ void show_kernel_fault_diag(const char *str, struct pt_regs *regs, if (!user_mode(regs)) show_stacktrace(current, regs); } - -#ifdef CONFIG_DEBUG_FS - -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/mount.h> -#include <linux/pagemap.h> -#include <linux/init.h> -#include <linux/namei.h> -#include <linux/debugfs.h> - -static struct dentry *test_dentry; -static struct dentry *test_dir; -static struct dentry *test_u32_dentry; - -static u32 clr_on_read = 1; - -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT -u32 numitlb, numdtlb, num_pte_not_present; - -static int fill_display_data(char *kbuf) -{ - size_t num = 0; - num += sprintf(kbuf + num, "I-TLB Miss %x\n", numitlb); - num += sprintf(kbuf + num, "D-TLB Miss %x\n", numdtlb); - num += sprintf(kbuf + num, "PTE not present %x\n", num_pte_not_present); - - if (clr_on_read) - numitlb = numdtlb = num_pte_not_present = 0; - - return num; -} - -static int tlb_stats_open(struct inode *inode, struct file *file) -{ - file->private_data = (void *)__get_free_page(GFP_KERNEL); - return 0; -} - -/* called on user read(): display the counters */ -static ssize_t tlb_stats_output(struct file *file, /* file descriptor */ - char __user *user_buf, /* user buffer */ - size_t len, /* length of buffer */ - loff_t *offset) /* offset in the file */ -{ - size_t num; - char *kbuf = (char *)file->private_data; - - /* All of the data can he shoved in one iteration */ - if (*offset != 0) - return 0; - - num = fill_display_data(kbuf); - - /* simple_read_from_buffer() is helper for copy to user space - It copies up to @2 (num) bytes from kernel buffer @4 (kbuf) at offset - @3 (offset) into the user space address starting at @1 (user_buf). - @5 (len) is max size of user buffer - */ - return simple_read_from_buffer(user_buf, num, offset, kbuf, len); -} - -/* called on user write : clears the counters */ -static ssize_t tlb_stats_clear(struct file *file, const char __user *user_buf, - size_t length, loff_t *offset) -{ - numitlb = numdtlb = num_pte_not_present = 0; - return length; -} - -static int tlb_stats_close(struct inode *inode, struct file *file) -{ - free_page((unsigned long)(file->private_data)); - return 0; -} - -static const struct file_operations tlb_stats_file_ops = { - .read = tlb_stats_output, - .write = tlb_stats_clear, - .open = tlb_stats_open, - .release = tlb_stats_close -}; -#endif - -static int __init arc_debugfs_init(void) -{ - test_dir = debugfs_create_dir("arc", NULL); - -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - test_dentry = debugfs_create_file("tlb_stats", 0444, test_dir, NULL, - &tlb_stats_file_ops); -#endif - - test_u32_dentry = - debugfs_create_u32("clr_on_read", 0444, test_dir, &clr_on_read); - - return 0; -} - -module_init(arc_debugfs_init); - -static void __exit arc_debugfs_exit(void) -{ - debugfs_remove(test_u32_dentry); - debugfs_remove(test_dentry); - debugfs_remove(test_dir); -} -module_exit(arc_debugfs_exit); - -#endif diff --git a/arch/arc/mm/cache.c b/arch/arc/mm/cache.c index 97dddbe..2b96cfc 100644 --- a/arch/arc/mm/cache.c +++ b/arch/arc/mm/cache.c @@ -22,8 +22,8 @@ #include <asm/setup.h> static int l2_line_sz; -int ioc_exists; -volatile int slc_enable = 1, ioc_enable = 1; +static int ioc_exists; +int slc_enable = 1, ioc_enable = 1; unsigned long perip_base = ARC_UNCACHED_ADDR_SPACE; /* legacy value for boot */ unsigned long perip_end = 0xFFFFFFFF; /* legacy value */ @@ -53,18 +53,15 @@ char *arc_cache_mumbojumbo(int c, char *buf, int len) PR_CACHE(&cpuinfo_arc700[c].icache, CONFIG_ARC_HAS_ICACHE, "I-Cache"); PR_CACHE(&cpuinfo_arc700[c].dcache, CONFIG_ARC_HAS_DCACHE, "D-Cache"); - if (!is_isa_arcv2()) - return buf; - p = &cpuinfo_arc700[c].slc; if (p->ver) n += scnprintf(buf + n, len - n, "SLC\t\t: %uK, %uB Line%s\n", p->sz_k, p->line_len, IS_USED_RUN(slc_enable)); - if (ioc_exists) - n += scnprintf(buf + n, len - n, "IOC\t\t:%s\n", - IS_DISABLED_RUN(ioc_enable)); + n += scnprintf(buf + n, len - n, "Peripherals\t: %#lx%s%s\n", + perip_base, + IS_AVAIL3(ioc_exists, ioc_enable, ", IO-Coherency ")); return buf; } @@ -113,8 +110,10 @@ static void read_decode_cache_bcr_arcv2(int cpu) } READ_BCR(ARC_REG_CLUSTER_BCR, cbcr); - if (cbcr.c && ioc_enable) + if (cbcr.c) ioc_exists = 1; + else + ioc_enable = 0; /* HS 2.0 didn't have AUX_VOL */ if (cpuinfo_arc700[cpu].core.family > 0x51) { @@ -1002,7 +1001,7 @@ void arc_cache_init(void) read_aux_reg(ARC_REG_SLC_CTRL) | SLC_CTRL_DISABLE); } - if (is_isa_arcv2() && ioc_exists) { + if (is_isa_arcv2() && ioc_enable) { /* IO coherency base - 0x8z */ write_aux_reg(ARC_REG_IO_COH_AP0_BASE, 0x80000); /* IO coherency aperture size - 512Mb: 0x8z-0xAz */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 20afc65..60aab5a 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -45,7 +45,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, * -For coherent data, Read/Write to buffers terminate early in cache * (vs. always going to memory - thus are faster) */ - if ((is_isa_arcv2() && ioc_exists) || + if ((is_isa_arcv2() && ioc_enable) || (attrs & DMA_ATTR_NON_CONSISTENT)) need_coh = 0; @@ -97,7 +97,7 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr, int is_non_coh = 1; is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || - (is_isa_arcv2() && ioc_exists); + (is_isa_arcv2() && ioc_enable); if (PageHighMem(page) || !is_non_coh) iounmap((void __force __iomem *)vaddr); diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index ec868a9..bdb295e 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -793,16 +793,16 @@ char *arc_mmu_mumbojumbo(int cpu_id, char *buf, int len) char super_pg[64] = ""; if (p_mmu->s_pg_sz_m) - scnprintf(super_pg, 64, "%dM Super Page%s, ", + scnprintf(super_pg, 64, "%dM Super Page %s", p_mmu->s_pg_sz_m, IS_USED_CFG(CONFIG_TRANSPARENT_HUGEPAGE)); n += scnprintf(buf + n, len - n, - "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d %s%s\n", + "MMU [v%x]\t: %dk PAGE, %sJTLB %d (%dx%d), uDTLB %d, uITLB %d%s%s\n", p_mmu->ver, p_mmu->pg_sz_k, super_pg, p_mmu->sets * p_mmu->ways, p_mmu->sets, p_mmu->ways, p_mmu->u_dtlb, p_mmu->u_itlb, - IS_AVAIL2(p_mmu->pae, "PAE40 ", CONFIG_ARC_HAS_PAE40)); + IS_AVAIL2(p_mmu->pae, ", PAE40 ", CONFIG_ARC_HAS_PAE40)); return buf; } diff --git a/arch/arc/mm/tlbex.S b/arch/arc/mm/tlbex.S index f1967ee..b30e4e3 100644 --- a/arch/arc/mm/tlbex.S +++ b/arch/arc/mm/tlbex.S @@ -237,15 +237,6 @@ ex_saved_reg1: 2: -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - and.f 0, r0, _PAGE_PRESENT - bz 1f - ld r3, [num_pte_not_present] - add r3, r3, 1 - st r3, [num_pte_not_present] -1: -#endif - .endm ;----------------------------------------------------------------- @@ -309,12 +300,6 @@ ENTRY(EV_TLBMissI) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numitlb] - add r0, r0, 1 - st r0, [@numitlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed, r2 is setup with EFA LOAD_FAULT_PTE @@ -349,12 +334,6 @@ ENTRY(EV_TLBMissD) TLBMISS_FREEUP_REGS -#ifdef CONFIG_ARC_DBG_TLB_MISS_COUNT - ld r0, [@numdtlb] - add r0, r0, 1 - st r0, [@numdtlb] -#endif - ;---------------------------------------------------------------- ; Get the PTE corresponding to V-addr accessed ; If PTE exists, it will setup, r0 = PTE, r1 = Ptr to PTE, r2 = EFA diff --git a/arch/arm/boot/dts/ste-snowball.dts b/arch/arm/boot/dts/ste-snowball.dts index b3df1c6..386eee6 100644 --- a/arch/arm/boot/dts/ste-snowball.dts +++ b/arch/arm/boot/dts/ste-snowball.dts @@ -239,14 +239,25 @@ arm,primecell-periphid = <0x10480180>; max-frequency = <100000000>; bus-width = <4>; + cap-sd-highspeed; cap-mmc-highspeed; + sd-uhs-sdr12; + sd-uhs-sdr25; + /* All direction control is used */ + st,sig-dir-cmd; + st,sig-dir-dat0; + st,sig-dir-dat2; + st,sig-dir-dat31; + st,sig-pin-fbclk; + full-pwr-cycle; vmmc-supply = <&ab8500_ldo_aux3_reg>; vqmmc-supply = <&vmmci>; pinctrl-names = "default", "sleep"; pinctrl-0 = <&sdi0_default_mode>; pinctrl-1 = <&sdi0_sleep_mode>; - cd-gpios = <&gpio6 26 GPIO_ACTIVE_LOW>; // 218 + /* GPIO218 MMC_CD */ + cd-gpios = <&gpio6 26 GPIO_ACTIVE_LOW>; status = "okay"; }; @@ -549,7 +560,7 @@ /* VMMCI level-shifter enable */ snowball_cfg3 { pins = "GPIO217_AH12"; - ste,config = <&gpio_out_lo>; + ste,config = <&gpio_out_hi>; }; /* VMMCI level-shifter voltage select */ snowball_cfg4 { diff --git a/arch/arm/boot/dts/uniphier-pro5.dtsi b/arch/arm/boot/dts/uniphier-pro5.dtsi index 2c49c36..5357ea9 100644 --- a/arch/arm/boot/dts/uniphier-pro5.dtsi +++ b/arch/arm/boot/dts/uniphier-pro5.dtsi @@ -184,11 +184,11 @@ }; &mio_clk { - compatible = "socionext,uniphier-pro5-mio-clock"; + compatible = "socionext,uniphier-pro5-sd-clock"; }; &mio_rst { - compatible = "socionext,uniphier-pro5-mio-reset"; + compatible = "socionext,uniphier-pro5-sd-reset"; }; &peri_clk { diff --git a/arch/arm/boot/dts/uniphier-pxs2.dtsi b/arch/arm/boot/dts/uniphier-pxs2.dtsi index 8789cd5..950f07b 100644 --- a/arch/arm/boot/dts/uniphier-pxs2.dtsi +++ b/arch/arm/boot/dts/uniphier-pxs2.dtsi @@ -197,11 +197,11 @@ }; &mio_clk { - compatible = "socionext,uniphier-pxs2-mio-clock"; + compatible = "socionext,uniphier-pxs2-sd-clock"; }; &mio_rst { - compatible = "socionext,uniphier-pxs2-mio-reset"; + compatible = "socionext,uniphier-pxs2-sd-reset"; }; &peri_clk { diff --git a/arch/arm/boot/dts/vf500.dtsi b/arch/arm/boot/dts/vf500.dtsi index a3824e6..d7fdb2a 100644 --- a/arch/arm/boot/dts/vf500.dtsi +++ b/arch/arm/boot/dts/vf500.dtsi @@ -70,7 +70,7 @@ global_timer: timer@40002200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x40002200 0x20>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; interrupt-parent = <&intc>; clocks = <&clks VF610_CLK_PLATFORM_BUS>; }; diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig index 437d074..11f37ed 100644 --- a/arch/arm/configs/multi_v7_defconfig +++ b/arch/arm/configs/multi_v7_defconfig @@ -850,6 +850,7 @@ CONFIG_PWM_SUN4I=y CONFIG_PWM_TEGRA=y CONFIG_PWM_VT8500=y CONFIG_PHY_HIX5HD2_SATA=y +CONFIG_E1000E=y CONFIG_PWM_STI=y CONFIG_PWM_BCM2835=y CONFIG_PWM_BRCMSTB=m diff --git a/arch/arm/include/asm/unistd.h b/arch/arm/include/asm/unistd.h index 194b699..ada0d29 100644 --- a/arch/arm/include/asm/unistd.h +++ b/arch/arm/include/asm/unistd.h @@ -19,7 +19,7 @@ * This may need to be greater than __NR_last_syscall+1 in order to * account for the padding in the syscall table */ -#define __NR_syscalls (396) +#define __NR_syscalls (400) #define __ARCH_WANT_STAT64 #define __ARCH_WANT_SYS_GETHOSTNAME diff --git a/arch/arm/include/uapi/asm/unistd.h b/arch/arm/include/uapi/asm/unistd.h index 2cb9dc7..314100a 100644 --- a/arch/arm/include/uapi/asm/unistd.h +++ b/arch/arm/include/uapi/asm/unistd.h @@ -420,6 +420,9 @@ #define __NR_copy_file_range (__NR_SYSCALL_BASE+391) #define __NR_preadv2 (__NR_SYSCALL_BASE+392) #define __NR_pwritev2 (__NR_SYSCALL_BASE+393) +#define __NR_pkey_mprotect (__NR_SYSCALL_BASE+394) +#define __NR_pkey_alloc (__NR_SYSCALL_BASE+395) +#define __NR_pkey_free (__NR_SYSCALL_BASE+396) /* * The following SWIs are ARM private. diff --git a/arch/arm/kernel/calls.S b/arch/arm/kernel/calls.S index 703fa0f..08030b1 100644 --- a/arch/arm/kernel/calls.S +++ b/arch/arm/kernel/calls.S @@ -403,6 +403,9 @@ CALL(sys_copy_file_range) CALL(sys_preadv2) CALL(sys_pwritev2) + CALL(sys_pkey_mprotect) +/* 395 */ CALL(sys_pkey_alloc) + CALL(sys_pkey_free) #ifndef syscalls_counted .equ syscalls_padding, ((NR_syscalls + 3) & ~3) - NR_syscalls #define syscalls_counted diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 03e9273..08bb84f 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1312,6 +1312,13 @@ static int init_hyp_mode(void) goto out_err; } + err = create_hyp_mappings(kvm_ksym_ref(__bss_start), + kvm_ksym_ref(__bss_stop), PAGE_HYP_RO); + if (err) { + kvm_err("Cannot map bss section\n"); + goto out_err; + } + /* * Map the Hyp stack pages */ diff --git a/arch/arm/mach-imx/gpc.c b/arch/arm/mach-imx/gpc.c index 0df062d..b54db47 100644 --- a/arch/arm/mach-imx/gpc.c +++ b/arch/arm/mach-imx/gpc.c @@ -408,7 +408,7 @@ static struct genpd_onecell_data imx_gpc_onecell_data = { static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) { struct clk *clk; - int i; + int i, ret; imx6q_pu_domain.reg = pu_reg; @@ -430,13 +430,22 @@ static int imx_gpc_genpd_init(struct device *dev, struct regulator *pu_reg) if (!IS_ENABLED(CONFIG_PM_GENERIC_DOMAINS)) return 0; - pm_genpd_init(&imx6q_pu_domain.base, NULL, false); - return of_genpd_add_provider_onecell(dev->of_node, + for (i = 0; i < ARRAY_SIZE(imx_gpc_domains); i++) + pm_genpd_init(imx_gpc_domains[i], NULL, false); + + ret = of_genpd_add_provider_onecell(dev->of_node, &imx_gpc_onecell_data); + if (ret) + goto power_off; + + return 0; +power_off: + imx6q_pm_pu_power_off(&imx6q_pu_domain.base); clk_err: while (i--) clk_put(imx6q_pu_domain.clk[i]); + imx6q_pu_domain.reg = NULL; return -EINVAL; } diff --git a/arch/arm/mach-imx/mach-imx6q.c b/arch/arm/mach-imx/mach-imx6q.c index 97fd251..45801b2 100644 --- a/arch/arm/mach-imx/mach-imx6q.c +++ b/arch/arm/mach-imx/mach-imx6q.c @@ -173,7 +173,7 @@ static void __init imx6q_enet_phy_init(void) ksz9021rn_phy_fixup); phy_register_fixup_for_uid(PHY_ID_KSZ9031, MICREL_PHY_ID_MASK, ksz9031rn_phy_fixup); - phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffff, + phy_register_fixup_for_uid(PHY_ID_AR8031, 0xffffffef, ar8031_phy_fixup); phy_register_fixup_for_uid(PHY_ID_AR8035, 0xffffffef, ar8035_phy_fixup); diff --git a/arch/arm/mach-mvebu/Kconfig b/arch/arm/mach-mvebu/Kconfig index f9b6bd3..541647f 100644 --- a/arch/arm/mach-mvebu/Kconfig +++ b/arch/arm/mach-mvebu/Kconfig @@ -23,6 +23,7 @@ config MACH_MVEBU_V7 select CACHE_L2X0 select ARM_CPU_SUSPEND select MACH_MVEBU_ANY + select MVEBU_CLK_COREDIV config MACH_ARMADA_370 bool "Marvell Armada 370 boards" @@ -32,7 +33,6 @@ config MACH_ARMADA_370 select CPU_PJ4B select MACH_MVEBU_V7 select PINCTRL_ARMADA_370 - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 370 SoC with device tree. @@ -50,7 +50,6 @@ config MACH_ARMADA_375 select HAVE_SMP select MACH_MVEBU_V7 select PINCTRL_ARMADA_375 - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 375 SoC with device tree. @@ -68,7 +67,6 @@ config MACH_ARMADA_38X select HAVE_SMP select MACH_MVEBU_V7 select PINCTRL_ARMADA_38X - select MVEBU_CLK_COREDIV help Say 'Y' here if you want your kernel to support boards based on the Marvell Armada 380/385 SoC with device tree. diff --git a/arch/arm/mach-uniphier/Kconfig b/arch/arm/mach-uniphier/Kconfig index 82dddee..3930fbb 100644 --- a/arch/arm/mach-uniphier/Kconfig +++ b/arch/arm/mach-uniphier/Kconfig @@ -1,6 +1,7 @@ config ARCH_UNIPHIER bool "Socionext UniPhier SoCs" depends on ARCH_MULTI_V7 + select ARCH_HAS_RESET_CONTROLLER select ARM_AMBA select ARM_GLOBAL_TIMER select ARM_GIC diff --git a/arch/arm/mm/abort-lv4t.S b/arch/arm/mm/abort-lv4t.S index 6d8e8e3..4cdfab3 100644 --- a/arch/arm/mm/abort-lv4t.S +++ b/arch/arm/mm/abort-lv4t.S @@ -7,7 +7,7 @@ * : r4 = aborted context pc * : r5 = aborted context psr * - * Returns : r4-r5, r10-r11, r13 preserved + * Returns : r4-r5, r9-r11, r13 preserved * * Purpose : obtain information about current aborted instruction. * Note: we read user space. This means we might cause a data @@ -48,7 +48,10 @@ ENTRY(v4t_late_abort) /* c */ b do_DataAbort @ ldc rd, [rn], #m @ Same as ldr rd, [rn], #m /* d */ b do_DataAbort @ ldc rd, [rn, #m] /* e */ b .data_unknown -/* f */ +/* f */ b .data_unknown + +.data_unknown_r9: + ldr r9, [sp], #4 .data_unknown: @ Part of jumptable mov r0, r4 mov r1, r8 @@ -57,6 +60,7 @@ ENTRY(v4t_late_abort) .data_arm_ldmstm: tst r8, #1 << 21 @ check writeback bit beq do_DataAbort @ no writeback -> no fixup + str r9, [sp, #-4]! mov r7, #0x11 orr r7, r7, #0x1100 and r6, r8, r7 @@ -75,12 +79,14 @@ ENTRY(v4t_late_abort) subne r7, r7, r6, lsl #2 @ Undo increment addeq r7, r7, r6, lsl #2 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrhpre: tst r8, #1 << 21 @ Check writeback bit beq do_DataAbort @ No writeback -> no fixup .data_arm_lateldrhpost: + str r9, [sp, #-4]! and r9, r8, #0x00f @ get Rm / low nibble of immediate value tst r8, #1 << 22 @ if (immediate offset) andne r6, r8, #0xf00 @ { immediate high nibble @@ -93,6 +99,7 @@ ENTRY(v4t_late_abort) subne r7, r7, r6 @ Undo incrmenet addeq r7, r7, r6 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrpreconst: @@ -101,12 +108,14 @@ ENTRY(v4t_late_abort) .data_arm_lateldrpostconst: movs r6, r8, lsl #20 @ Get offset beq do_DataAbort @ zero -> no fixup + str r9, [sp, #-4]! and r9, r8, #15 << 16 @ Extract 'n' from instruction ldr r7, [r2, r9, lsr #14] @ Get register 'Rn' tst r8, #1 << 23 @ Check U bit subne r7, r7, r6, lsr #20 @ Undo increment addeq r7, r7, r6, lsr #20 @ Undo decrement str r7, [r2, r9, lsr #14] @ Put register 'Rn' + ldr r9, [sp], #4 b do_DataAbort .data_arm_lateldrprereg: @@ -115,6 +124,7 @@ ENTRY(v4t_late_abort) .data_arm_lateldrpostreg: and r7, r8, #15 @ Extract 'm' from instruction ldr r6, [r2, r7, lsl #2] @ Get register 'Rm' + str r9, [sp, #-4]! mov r9, r8, lsr #7 @ get shift count ands r9, r9, #31 and r7, r8, #0x70 @ get shift type @@ -126,33 +136,33 @@ ENTRY(v4t_late_abort) b .data_arm_apply_r6_and_rn b .data_arm_apply_r6_and_rn @ 1: LSL #0 nop - b .data_unknown @ 2: MUL? + b .data_unknown_r9 @ 2: MUL? nop - b .data_unknown @ 3: MUL? + b .data_unknown_r9 @ 3: MUL? nop mov r6, r6, lsr r9 @ 4: LSR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, lsr #32 @ 5: LSR #32 b .data_arm_apply_r6_and_rn - b .data_unknown @ 6: MUL? + b .data_unknown_r9 @ 6: MUL? nop - b .data_unknown @ 7: MUL? + b .data_unknown_r9 @ 7: MUL? nop mov r6, r6, asr r9 @ 8: ASR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, asr #32 @ 9: ASR #32 b .data_arm_apply_r6_and_rn - b .data_unknown @ A: MUL? + b .data_unknown_r9 @ A: MUL? nop - b .data_unknown @ B: MUL? + b .data_unknown_r9 @ B: MUL? nop mov r6, r6, ror r9 @ C: ROR #!0 b .data_arm_apply_r6_and_rn mov r6, r6, rrx @ D: RRX b .data_arm_apply_r6_and_rn - b .data_unknown @ E: MUL? + b .data_unknown_r9 @ E: MUL? nop - b .data_unknown @ F: MUL? + b .data_unknown_r9 @ F: MUL? .data_thumb_abort: ldrh r8, [r4] @ read instruction @@ -190,6 +200,7 @@ ENTRY(v4t_late_abort) .data_thumb_pushpop: tst r8, #1 << 10 beq .data_unknown + str r9, [sp, #-4]! and r6, r8, #0x55 @ hweight8(r8) + R bit and r9, r8, #0xaa add r6, r6, r9, lsr #1 @@ -204,9 +215,11 @@ ENTRY(v4t_late_abort) addeq r7, r7, r6, lsl #2 @ increment SP if PUSH subne r7, r7, r6, lsl #2 @ decrement SP if POP str r7, [r2, #13 << 2] + ldr r9, [sp], #4 b do_DataAbort .data_thumb_ldmstm: + str r9, [sp, #-4]! and r6, r8, #0x55 @ hweight8(r8) and r9, r8, #0xaa add r6, r6, r9, lsr #1 @@ -219,4 +232,5 @@ ENTRY(v4t_late_abort) and r6, r6, #15 @ number of regs to transfer sub r7, r7, r6, lsl #2 @ always decrement str r7, [r2, r9, lsr #6] + ldr r9, [sp], #4 b do_DataAbort diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 30398db..969ef88 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -915,7 +915,7 @@ config RANDOMIZE_BASE config RANDOMIZE_MODULE_REGION_FULL bool "Randomize the module region independently from the core kernel" - depends on RANDOMIZE_BASE + depends on RANDOMIZE_BASE && !DYNAMIC_FTRACE default y help Randomizes the location of the module region without considering the diff --git a/arch/arm64/Kconfig.platforms b/arch/arm64/Kconfig.platforms index cfbdf02..101794f 100644 --- a/arch/arm64/Kconfig.platforms +++ b/arch/arm64/Kconfig.platforms @@ -190,6 +190,7 @@ config ARCH_THUNDER config ARCH_UNIPHIER bool "Socionext UniPhier SoC Family" + select ARCH_HAS_RESET_CONTROLLER select PINCTRL help This enables support for Socionext UniPhier SoC family. diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index ab51aed..3635b86 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -15,7 +15,7 @@ CPPFLAGS_vmlinux.lds = -DTEXT_OFFSET=$(TEXT_OFFSET) GZFLAGS :=-9 ifneq ($(CONFIG_RELOCATABLE),) -LDFLAGS_vmlinux += -pie -Bsymbolic +LDFLAGS_vmlinux += -pie -shared -Bsymbolic endif ifeq ($(CONFIG_ARM64_ERRATUM_843419),y) diff --git a/arch/arm64/boot/dts/broadcom/ns2-svk.dts b/arch/arm64/boot/dts/broadcom/ns2-svk.dts index 2d7872a..b09f3bc 100644 --- a/arch/arm64/boot/dts/broadcom/ns2-svk.dts +++ b/arch/arm64/boot/dts/broadcom/ns2-svk.dts @@ -164,6 +164,8 @@ nand-ecc-mode = "hw"; nand-ecc-strength = <8>; nand-ecc-step-size = <512>; + nand-bus-width = <16>; + brcm,nand-oob-sector-size = <16>; #address-cells = <1>; #size-cells = <1>; }; diff --git a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi index 220ac70..97d331e 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi @@ -123,6 +123,7 @@ <1 14 0xf08>, /* Physical Non-Secure PPI */ <1 11 0xf08>, /* Virtual PPI */ <1 10 0xf08>; /* Hypervisor PPI */ + fsl,erratum-a008585; }; pmu { diff --git a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi index 337da90..7f0dc13 100644 --- a/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi +++ b/arch/arm64/boot/dts/freescale/fsl-ls2080a.dtsi @@ -195,6 +195,7 @@ <1 14 4>, /* Physical Non-Secure PPI, active-low */ <1 11 4>, /* Virtual PPI, active-low */ <1 10 4>; /* Hypervisor PPI, active-low */ + fsl,erratum-a008585; }; pmu { diff --git a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi index e5e3ed6..602e2c2 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110-master.dtsi @@ -131,7 +131,7 @@ #address-cells = <0x1>; #size-cells = <0x0>; cell-index = <1>; - clocks = <&cpm_syscon0 0 3>; + clocks = <&cpm_syscon0 1 21>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts index 46cdddf..e5eeca2 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-geekbox.dts @@ -116,7 +116,6 @@ cap-mmc-highspeed; clock-frequency = <150000000>; disable-wp; - keep-power-in-suspend; non-removable; num-slots = <1>; vmmc-supply = <&vcc_io>; @@ -258,8 +257,6 @@ }; vcc_sd: SWITCH_REG1 { - regulator-always-on; - regulator-boot-on; regulator-name = "vcc_sd"; }; diff --git a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts index 5797933..ea0a8ec 100644 --- a/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts +++ b/arch/arm64/boot/dts/rockchip/rk3368-orion-r68-meta.dts @@ -152,8 +152,6 @@ gpio = <&gpio3 11 GPIO_ACTIVE_LOW>; regulator-min-microvolt = <1800000>; regulator-max-microvolt = <3300000>; - regulator-always-on; - regulator-boot-on; vin-supply = <&vcc_io>; }; @@ -201,7 +199,6 @@ bus-width = <8>; cap-mmc-highspeed; disable-wp; - keep-power-in-suspend; mmc-pwrseq = <&emmc_pwrseq>; mmc-hs200-1_2v; mmc-hs200-1_8v; @@ -350,7 +347,6 @@ clock-freq-min-max = <400000 50000000>; cap-sd-highspeed; card-detect-delay = <200>; - keep-power-in-suspend; num-slots = <1>; pinctrl-names = "default"; pinctrl-0 = <&sdmmc_clk &sdmmc_cmd &sdmmc_cd &sdmmc_bus4>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index 08fd7cf..56a1b2e 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -257,18 +257,18 @@ reg = <0x59801000 0x400>; }; - mioctrl@59810000 { - compatible = "socionext,uniphier-mioctrl", + sdctrl@59810000 { + compatible = "socionext,uniphier-ld20-sdctrl", "simple-mfd", "syscon"; reg = <0x59810000 0x800>; - mio_clk: clock { - compatible = "socionext,uniphier-ld20-mio-clock"; + sd_clk: clock { + compatible = "socionext,uniphier-ld20-sd-clock"; #clock-cells = <1>; }; - mio_rst: reset { - compatible = "socionext,uniphier-ld20-mio-reset"; + sd_rst: reset { + compatible = "socionext,uniphier-ld20-sd-reset"; #reset-cells = <1>; }; }; diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 758d74f..a27c324 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -94,7 +94,7 @@ struct arm64_cpu_capabilities { u16 capability; int def_scope; /* default scope */ bool (*matches)(const struct arm64_cpu_capabilities *caps, int scope); - void (*enable)(void *); /* Called on all active CPUs */ + int (*enable)(void *); /* Called on all active CPUs */ union { struct { /* To be used for erratum handling only */ u32 midr_model; diff --git a/arch/arm64/include/asm/exec.h b/arch/arm64/include/asm/exec.h index db0563c..f7865dd 100644 --- a/arch/arm64/include/asm/exec.h +++ b/arch/arm64/include/asm/exec.h @@ -18,6 +18,9 @@ #ifndef __ASM_EXEC_H #define __ASM_EXEC_H +#include <linux/sched.h> + extern unsigned long arch_align_stack(unsigned long sp); +void uao_thread_switch(struct task_struct *next); #endif /* __ASM_EXEC_H */ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index fd9d5fd..f5ea0ba 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -178,11 +178,6 @@ static inline bool kvm_vcpu_dabt_isvalid(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_ISV); } -static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) -{ - return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR); -} - static inline bool kvm_vcpu_dabt_issext(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_SSE); @@ -203,6 +198,12 @@ static inline bool kvm_vcpu_dabt_iss1tw(const struct kvm_vcpu *vcpu) return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_S1PTW); } +static inline bool kvm_vcpu_dabt_iswrite(const struct kvm_vcpu *vcpu) +{ + return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_WNR) || + kvm_vcpu_dabt_iss1tw(vcpu); /* AF/DBM update */ +} + static inline bool kvm_vcpu_dabt_is_cm(const struct kvm_vcpu *vcpu) { return !!(kvm_vcpu_get_hsr(vcpu) & ESR_ELx_CM); diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index ba62df8..b71086d 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -217,7 +217,7 @@ static inline void *phys_to_virt(phys_addr_t x) #define _virt_addr_valid(kaddr) pfn_valid(__pa(kaddr) >> PAGE_SHIFT) #else #define __virt_to_pgoff(kaddr) (((u64)(kaddr) & ~PAGE_OFFSET) / PAGE_SIZE * sizeof(struct page)) -#define __page_to_voff(kaddr) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) +#define __page_to_voff(page) (((u64)(page) & ~VMEMMAP_START) * PAGE_SIZE / sizeof(struct page)) #define page_to_virt(page) ((void *)((__page_to_voff(page)) | PAGE_OFFSET)) #define virt_to_page(vaddr) ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START)) diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index e12af67..06ff7fd 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -17,6 +17,7 @@ #define __ASM_MODULE_H #include <asm-generic/module.h> +#include <asm/memory.h> #define MODULE_ARCH_VERMAGIC "aarch64" @@ -32,6 +33,10 @@ u64 module_emit_plt_entry(struct module *mod, const Elf64_Rela *rela, Elf64_Sym *sym); #ifdef CONFIG_RANDOMIZE_BASE +#ifdef CONFIG_MODVERSIONS +#define ARCH_RELOCATES_KCRCTAB +#define reloc_start (kimage_vaddr - KIMAGE_VADDR) +#endif extern u64 module_alloc_base; #else #define module_alloc_base ((u64)_etext - MODULES_VSIZE) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 2fee2f5..5394c84 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -44,48 +44,44 @@ static inline unsigned long __percpu_##op(void *ptr, \ \ switch (size) { \ case 1: \ - do { \ - asm ("//__per_cpu_" #op "_1\n" \ - "ldxrb %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_1\n" \ + "1: ldxrb %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrb %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u8 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrb %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u8 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 2: \ - do { \ - asm ("//__per_cpu_" #op "_2\n" \ - "ldxrh %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_2\n" \ + "1: ldxrh %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxrh %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u16 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxrh %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u16 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 4: \ - do { \ - asm ("//__per_cpu_" #op "_4\n" \ - "ldxr %w[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_4\n" \ + "1: ldxr %w[ret], %[ptr]\n" \ #asm_op " %w[ret], %w[ret], %w[val]\n" \ - "stxr %w[loop], %w[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u32 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %w[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u32 *)ptr) \ + : [val] "Ir" (val)); \ break; \ case 8: \ - do { \ - asm ("//__per_cpu_" #op "_8\n" \ - "ldxr %[ret], %[ptr]\n" \ + asm ("//__per_cpu_" #op "_8\n" \ + "1: ldxr %[ret], %[ptr]\n" \ #asm_op " %[ret], %[ret], %[val]\n" \ - "stxr %w[loop], %[ret], %[ptr]\n" \ - : [loop] "=&r" (loop), [ret] "=&r" (ret), \ - [ptr] "+Q"(*(u64 *)ptr) \ - : [val] "Ir" (val)); \ - } while (loop); \ + " stxr %w[loop], %[ret], %[ptr]\n" \ + " cbnz %w[loop], 1b" \ + : [loop] "=&r" (loop), [ret] "=&r" (ret), \ + [ptr] "+Q"(*(u64 *)ptr) \ + : [val] "Ir" (val)); \ break; \ default: \ BUILD_BUG(); \ @@ -150,44 +146,40 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, switch (size) { case 1: - do { - asm ("//__percpu_xchg_1\n" - "ldxrb %w[ret], %[ptr]\n" - "stxrb %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u8 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_1\n" + "1: ldxrb %w[ret], %[ptr]\n" + " stxrb %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u8 *)ptr) + : [val] "r" (val)); break; case 2: - do { - asm ("//__percpu_xchg_2\n" - "ldxrh %w[ret], %[ptr]\n" - "stxrh %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u16 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_2\n" + "1: ldxrh %w[ret], %[ptr]\n" + " stxrh %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u16 *)ptr) + : [val] "r" (val)); break; case 4: - do { - asm ("//__percpu_xchg_4\n" - "ldxr %w[ret], %[ptr]\n" - "stxr %w[loop], %w[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u32 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_4\n" + "1: ldxr %w[ret], %[ptr]\n" + " stxr %w[loop], %w[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u32 *)ptr) + : [val] "r" (val)); break; case 8: - do { - asm ("//__percpu_xchg_8\n" - "ldxr %[ret], %[ptr]\n" - "stxr %w[loop], %[val], %[ptr]\n" - : [loop] "=&r"(loop), [ret] "=&r"(ret), - [ptr] "+Q"(*(u64 *)ptr) - : [val] "r" (val)); - } while (loop); + asm ("//__percpu_xchg_8\n" + "1: ldxr %[ret], %[ptr]\n" + " stxr %w[loop], %[val], %[ptr]\n" + " cbnz %w[loop], 1b" + : [loop] "=&r"(loop), [ret] "=&r"(ret), + [ptr] "+Q"(*(u64 *)ptr) + : [val] "r" (val)); break; default: BUILD_BUG(); diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index df2e53d..60e3482 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -188,8 +188,8 @@ static inline void spin_lock_prefetch(const void *ptr) #endif -void cpu_enable_pan(void *__unused); -void cpu_enable_uao(void *__unused); -void cpu_enable_cache_maint_trap(void *__unused); +int cpu_enable_pan(void *__unused); +int cpu_enable_uao(void *__unused); +int cpu_enable_cache_maint_trap(void *__unused); #endif /* __ASM_PROCESSOR_H */ diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index e8d46e8..6c80b36 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -286,7 +286,7 @@ asm( #define write_sysreg_s(v, r) do { \ u64 __val = (u64)v; \ - asm volatile("msr_s " __stringify(r) ", %0" : : "rZ" (__val)); \ + asm volatile("msr_s " __stringify(r) ", %x0" : : "rZ" (__val)); \ } while (0) static inline void config_sctlr_el1(u32 clear, u32 set) diff --git a/arch/arm64/include/asm/uaccess.h b/arch/arm64/include/asm/uaccess.h index bcaf6fb..55d0adb 100644 --- a/arch/arm64/include/asm/uaccess.h +++ b/arch/arm64/include/asm/uaccess.h @@ -21,6 +21,7 @@ /* * User space memory access functions */ +#include <linux/bitops.h> #include <linux/kasan-checks.h> #include <linux/string.h> #include <linux/thread_info.h> @@ -102,6 +103,13 @@ static inline void set_fs(mm_segment_t fs) flag; \ }) +/* + * When dealing with data aborts or instruction traps we may end up with + * a tagged userland pointer. Clear the tag to get a sane pointer to pass + * on to access_ok(), for instance. + */ +#define untagged_addr(addr) sign_extend64(addr, 55) + #define access_ok(type, addr, size) __range_ok(addr, size) #define user_addr_max get_fs diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 42ffdb54..b0988bb 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -280,35 +280,43 @@ static void __init register_insn_emulation_sysctl(struct ctl_table *table) /* * Error-checking SWP macros implemented using ldxr{b}/stxr{b} */ -#define __user_swpX_asm(data, addr, res, temp, B) \ + +/* Arbitrary constant to ensure forward-progress of the LL/SC loop */ +#define __SWP_LL_SC_LOOPS 4 + +#define __user_swpX_asm(data, addr, res, temp, temp2, B) \ __asm__ __volatile__( \ + " mov %w3, %w7\n" \ ALTERNATIVE("nop", SET_PSTATE_PAN(0), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ - "0: ldxr"B" %w2, [%3]\n" \ - "1: stxr"B" %w0, %w1, [%3]\n" \ + "0: ldxr"B" %w2, [%4]\n" \ + "1: stxr"B" %w0, %w1, [%4]\n" \ " cbz %w0, 2f\n" \ - " mov %w0, %w4\n" \ + " sub %w3, %w3, #1\n" \ + " cbnz %w3, 0b\n" \ + " mov %w0, %w5\n" \ " b 3f\n" \ "2:\n" \ " mov %w1, %w2\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ - "4: mov %w0, %w5\n" \ + "4: mov %w0, %w6\n" \ " b 3b\n" \ " .popsection" \ _ASM_EXTABLE(0b, 4b) \ _ASM_EXTABLE(1b, 4b) \ ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, \ CONFIG_ARM64_PAN) \ - : "=&r" (res), "+r" (data), "=&r" (temp) \ - : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT) \ + : "=&r" (res), "+r" (data), "=&r" (temp), "=&r" (temp2) \ + : "r" (addr), "i" (-EAGAIN), "i" (-EFAULT), \ + "i" (__SWP_LL_SC_LOOPS) \ : "memory") -#define __user_swp_asm(data, addr, res, temp) \ - __user_swpX_asm(data, addr, res, temp, "") -#define __user_swpb_asm(data, addr, res, temp) \ - __user_swpX_asm(data, addr, res, temp, "b") +#define __user_swp_asm(data, addr, res, temp, temp2) \ + __user_swpX_asm(data, addr, res, temp, temp2, "") +#define __user_swpb_asm(data, addr, res, temp, temp2) \ + __user_swpX_asm(data, addr, res, temp, temp2, "b") /* * Bit 22 of the instruction encoding distinguishes between @@ -328,12 +336,12 @@ static int emulate_swpX(unsigned int address, unsigned int *data, } while (1) { - unsigned long temp; + unsigned long temp, temp2; if (type == TYPE_SWPB) - __user_swpb_asm(*data, address, res, temp); + __user_swpb_asm(*data, address, res, temp, temp2); else - __user_swp_asm(*data, address, res, temp); + __user_swp_asm(*data, address, res, temp, temp2); if (likely(res != -EAGAIN) || signal_pending(current)) break; diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 0150394..b75e917 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -39,10 +39,11 @@ has_mismatched_cache_line_size(const struct arm64_cpu_capabilities *entry, (arm64_ftr_reg_ctrel0.sys_val & arm64_ftr_reg_ctrel0.strict_mask); } -static void cpu_enable_trap_ctr_access(void *__unused) +static int cpu_enable_trap_ctr_access(void *__unused) { /* Clear SCTLR_EL1.UCT */ config_sctlr_el1(SCTLR_EL1_UCT, 0); + return 0; } #define MIDR_RANGE(model, min, max) \ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d577f26..c02504e 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -19,7 +19,9 @@ #define pr_fmt(fmt) "CPU features: " fmt #include <linux/bsearch.h> +#include <linux/cpumask.h> #include <linux/sort.h> +#include <linux/stop_machine.h> #include <linux/types.h> #include <asm/cpu.h> #include <asm/cpufeature.h> @@ -941,7 +943,13 @@ void __init enable_cpu_capabilities(const struct arm64_cpu_capabilities *caps) { for (; caps->matches; caps++) if (caps->enable && cpus_have_cap(caps->capability)) - on_each_cpu(caps->enable, NULL, true); + /* + * Use stop_machine() as it schedules the work allowing + * us to modify PSTATE, instead of on_each_cpu() which + * uses an IPI, giving us a PSTATE that disappears when + * we return. + */ + stop_machine(caps->enable, NULL, cpu_online_mask); } /* diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 427f6d3..332e331 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -586,8 +586,9 @@ CPU_LE( movk x0, #0x30d0, lsl #16 ) // Clear EE and E0E on LE systems b.lt 4f // Skip if no PMU present mrs x0, pmcr_el0 // Disable debug access traps ubfx x0, x0, #11, #5 // to EL2 and allow access to - msr mdcr_el2, x0 // all PMU counters from EL1 4: + csel x0, xzr, x0, lt // all PMU counters from EL1 + msr mdcr_el2, x0 // (if they exist) /* Stage-2 translation */ msr vttbr_el2, xzr diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 27b2f138..01753cd 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -49,6 +49,7 @@ #include <asm/alternative.h> #include <asm/compat.h> #include <asm/cacheflush.h> +#include <asm/exec.h> #include <asm/fpsimd.h> #include <asm/mmu_context.h> #include <asm/processor.h> @@ -186,10 +187,19 @@ void __show_regs(struct pt_regs *regs) printk("pc : [<%016llx>] lr : [<%016llx>] pstate: %08llx\n", regs->pc, lr, regs->pstate); printk("sp : %016llx\n", sp); - for (i = top_reg; i >= 0; i--) { + + i = top_reg; + + while (i >= 0) { printk("x%-2d: %016llx ", i, regs->regs[i]); - if (i % 2 == 0) - printk("\n"); + i--; + + if (i % 2 == 0) { + pr_cont("x%-2d: %016llx ", i, regs->regs[i]); + i--; + } + + pr_cont("\n"); } printk("\n"); } @@ -301,7 +311,7 @@ static void tls_thread_switch(struct task_struct *next) } /* Restore the UAO state depending on next's addr_limit */ -static void uao_thread_switch(struct task_struct *next) +void uao_thread_switch(struct task_struct *next) { if (IS_ENABLED(CONFIG_ARM64_UAO)) { if (task_thread_info(next)->addr_limit == KERNEL_DS) diff --git a/arch/arm64/kernel/sleep.S b/arch/arm64/kernel/sleep.S index b8799e7..1bec41b 100644 --- a/arch/arm64/kernel/sleep.S +++ b/arch/arm64/kernel/sleep.S @@ -135,7 +135,7 @@ ENTRY(_cpu_resume) #ifdef CONFIG_KASAN mov x0, sp - bl kasan_unpoison_remaining_stack + bl kasan_unpoison_task_stack_below #endif ldp x19, x20, [x29, #16] diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index d3f151c..8507703 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -544,6 +544,7 @@ acpi_map_gic_cpu_interface(struct acpi_madt_generic_interrupt *processor) return; } bootcpu_valid = true; + early_map_cpu_to_node(0, acpi_numa_get_nid(0, hwid)); return; } diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index ad73414..bb0cd78 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -1,8 +1,11 @@ #include <linux/ftrace.h> #include <linux/percpu.h> #include <linux/slab.h> +#include <asm/alternative.h> #include <asm/cacheflush.h> +#include <asm/cpufeature.h> #include <asm/debug-monitors.h> +#include <asm/exec.h> #include <asm/pgtable.h> #include <asm/memory.h> #include <asm/mmu_context.h> @@ -50,6 +53,14 @@ void notrace __cpu_suspend_exit(void) set_my_cpu_offset(per_cpu_offset(cpu)); /* + * PSTATE was not saved over suspend/resume, re-enable any detected + * features that might not have been set correctly. + */ + asm(ALTERNATIVE("nop", SET_PSTATE_PAN(1), ARM64_HAS_PAN, + CONFIG_ARM64_PAN)); + uao_thread_switch(current); + + /* * Restore HW breakpoint registers to sane values * before debug exceptions are possibly reenabled * through local_dbg_restore. diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5ff020f..c9986b3 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -428,24 +428,28 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) force_signal_inject(SIGILL, ILL_ILLOPC, regs, 0); } -void cpu_enable_cache_maint_trap(void *__unused) +int cpu_enable_cache_maint_trap(void *__unused) { config_sctlr_el1(SCTLR_EL1_UCI, 0); + return 0; } #define __user_cache_maint(insn, address, res) \ - asm volatile ( \ - "1: " insn ", %1\n" \ - " mov %w0, #0\n" \ - "2:\n" \ - " .pushsection .fixup,\"ax\"\n" \ - " .align 2\n" \ - "3: mov %w0, %w2\n" \ - " b 2b\n" \ - " .popsection\n" \ - _ASM_EXTABLE(1b, 3b) \ - : "=r" (res) \ - : "r" (address), "i" (-EFAULT) ) + if (untagged_addr(address) >= user_addr_max()) \ + res = -EFAULT; \ + else \ + asm volatile ( \ + "1: " insn ", %1\n" \ + " mov %w0, #0\n" \ + "2:\n" \ + " .pushsection .fixup,\"ax\"\n" \ + " .align 2\n" \ + "3: mov %w0, %w2\n" \ + " b 2b\n" \ + " .popsection\n" \ + _ASM_EXTABLE(1b, 3b) \ + : "=r" (res) \ + : "r" (address), "i" (-EFAULT) ) static void user_cache_maint_handler(unsigned int esr, struct pt_regs *regs) { diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 53d9159..0f87883 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -29,7 +29,9 @@ #include <linux/sched.h> #include <linux/highmem.h> #include <linux/perf_event.h> +#include <linux/preempt.h> +#include <asm/bug.h> #include <asm/cpufeature.h> #include <asm/exception.h> #include <asm/debug-monitors.h> @@ -670,9 +672,17 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, NOKPROBE_SYMBOL(do_debug_exception); #ifdef CONFIG_ARM64_PAN -void cpu_enable_pan(void *__unused) +int cpu_enable_pan(void *__unused) { + /* + * We modify PSTATE. This won't work from irq context as the PSTATE + * is discarded once we return from the exception. + */ + WARN_ON_ONCE(in_interrupt()); + config_sctlr_el1(SCTLR_EL1_SPAN, 0); + asm(SET_PSTATE_PAN(1)); + return 0; } #endif /* CONFIG_ARM64_PAN */ @@ -683,8 +693,9 @@ void cpu_enable_pan(void *__unused) * We need to enable the feature at runtime (instead of adding it to * PSR_MODE_EL1h) as the feature may not be implemented by the cpu. */ -void cpu_enable_uao(void *__unused) +int cpu_enable_uao(void *__unused) { asm(SET_PSTATE_UAO(1)); + return 0; } #endif /* CONFIG_ARM64_UAO */ diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 21c489b..212c4d1 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -421,35 +421,35 @@ void __init mem_init(void) pr_notice("Virtual kernel memory layout:\n"); #ifdef CONFIG_KASAN - pr_cont(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", + pr_notice(" kasan : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(KASAN_SHADOW_START, KASAN_SHADOW_END)); #endif - pr_cont(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", + pr_notice(" modules : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(MODULES_VADDR, MODULES_END)); - pr_cont(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", + pr_notice(" vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n", MLG(VMALLOC_START, VMALLOC_END)); - pr_cont(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .text : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_text, _etext)); - pr_cont(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .rodata : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__start_rodata, __init_begin)); - pr_cont(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .init : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__init_begin, __init_end)); - pr_cont(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .data : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(_sdata, _edata)); - pr_cont(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", + pr_notice(" .bss : 0x%p" " - 0x%p" " (%6ld KB)\n", MLK_ROUNDUP(__bss_start, __bss_stop)); - pr_cont(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", + pr_notice(" fixed : 0x%16lx - 0x%16lx (%6ld KB)\n", MLK(FIXADDR_START, FIXADDR_TOP)); - pr_cont(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", + pr_notice(" PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(PCI_IO_START, PCI_IO_END)); #ifdef CONFIG_SPARSEMEM_VMEMMAP - pr_cont(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", + pr_notice(" vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n", MLG(VMEMMAP_START, VMEMMAP_START + VMEMMAP_SIZE)); - pr_cont(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", + pr_notice(" 0x%16lx - 0x%16lx (%6ld MB actual)\n", MLM((unsigned long)phys_to_page(memblock_start_of_DRAM()), (unsigned long)virt_to_page(high_memory))); #endif - pr_cont(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", + pr_notice(" memory : 0x%16lx - 0x%16lx (%6ld MB)\n", MLM(__phys_to_virt(memblock_start_of_DRAM()), (unsigned long)high_memory)); diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 778a985..4b32168 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -147,7 +147,7 @@ static int __init early_cpu_to_node(int cpu) static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) { - return node_distance(from, to); + return node_distance(early_cpu_to_node(from), early_cpu_to_node(to)); } static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, @@ -223,8 +223,11 @@ static void __init setup_node_data(int nid, u64 start_pfn, u64 end_pfn) void *nd; int tnid; - pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", - nid, start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + if (start_pfn < end_pfn) + pr_info("Initmem setup node %d [mem %#010Lx-%#010Lx]\n", nid, + start_pfn << PAGE_SHIFT, (end_pfn << PAGE_SHIFT) - 1); + else + pr_info("Initmem setup node %d [<memory-less node>]\n", nid); nd_pa = memblock_alloc_try_nid(nd_size, SMP_CACHE_BYTES, nid); nd = __va(nd_pa); diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 8b8fe67..8d79286 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -271,7 +271,7 @@ long arch_ptrace(struct task_struct *child, long request, case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, &tmp, - to_copy, 0); + to_copy, FOLL_FORCE); if (copied) break; @@ -324,7 +324,8 @@ long arch_ptrace(struct task_struct *child, long request, case BFIN_MEM_ACCESS_CORE: case BFIN_MEM_ACCESS_CORE_ONLY: copied = access_process_vm(child, addr, &data, - to_copy, 1); + to_copy, + FOLL_FORCE | FOLL_WRITE); break; case BFIN_MEM_ACCESS_DMA: if (safe_dma_memcpy(paddr, &data, to_copy)) diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index b5698c8..0068fd4 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2722,7 +2722,6 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig err = get_user_pages((unsigned long int)(oper.indata + prev_ix), noinpages, 0, /* read access only for in data */ - 0, /* no force */ inpages, NULL); @@ -2736,8 +2735,7 @@ static int cryptocop_ioctl_process(struct inode *inode, struct file *filp, unsig if (oper.do_cipher){ err = get_user_pages((unsigned long int)oper.cipher_outdata, nooutpages, - 1, /* write access for out data */ - 0, /* no force */ + FOLL_WRITE, /* write access for out data */ outpages, NULL); up_read(¤t->mm->mmap_sem); @@ -3151,7 +3149,7 @@ static void print_dma_descriptors(struct cryptocop_int_operation *iop) printk("print_dma_descriptors start\n"); printk("iop:\n"); - printk("\tsid: 0x%lld\n", iop->sid); + printk("\tsid: 0x%llx\n", iop->sid); printk("\tcdesc_out: 0x%p\n", iop->cdesc_out); printk("\tcdesc_in: 0x%p\n", iop->cdesc_in); diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index f085229..f0df654 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -147,7 +147,7 @@ long arch_ptrace(struct task_struct *child, long request, /* The trampoline page is globally mapped, no page table to traverse.*/ tmp = *(unsigned long*)addr; } else { - copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0); + copied = access_process_vm(child, addr, &tmp, sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; @@ -279,7 +279,7 @@ static int insn_size(struct task_struct *child, unsigned long pc) int opsize = 0; /* Read the opcode at pc (do what PTRACE_PEEKTEXT would do). */ - copied = access_process_vm(child, pc, &opcode, sizeof(opcode), 0); + copied = access_process_vm(child, pc, &opcode, sizeof(opcode), FOLL_FORCE); if (copied != sizeof(opcode)) return 0; diff --git a/arch/h8300/include/asm/thread_info.h b/arch/h8300/include/asm/thread_info.h index b408fe6..3cef068 100644 --- a/arch/h8300/include/asm/thread_info.h +++ b/arch/h8300/include/asm/thread_info.h @@ -31,7 +31,6 @@ struct thread_info { int cpu; /* cpu we're on */ int preempt_count; /* 0 => preemptable, <0 => BUG */ mm_segment_t addr_limit; - struct restart_block restart_block; }; /* @@ -44,9 +43,6 @@ struct thread_info { .cpu = 0, \ .preempt_count = INIT_PREEMPT_COUNT, \ .addr_limit = KERNEL_DS, \ - .restart_block = { \ - .fn = do_no_restart_syscall, \ - }, \ } #define init_thread_info (init_thread_union.thread_info) diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index ad1f81f..7138303 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -79,7 +79,7 @@ restore_sigcontext(struct sigcontext *usc, int *pd0) unsigned int er0; /* Always make any pending restarted system calls return -EINTR */ - current_thread_info()->restart_block.fn = do_no_restart_syscall; + current->restart_block.fn = do_no_restart_syscall; /* restore passed registers */ #define COPY(r) do { err |= get_user(regs->r, &usc->sc_##r); } while (0) diff --git a/arch/ia64/kernel/err_inject.c b/arch/ia64/kernel/err_inject.c index 09f8457..5ed0ea9 100644 --- a/arch/ia64/kernel/err_inject.c +++ b/arch/ia64/kernel/err_inject.c @@ -142,7 +142,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr, u64 virt_addr=simple_strtoull(buf, NULL, 16); int ret; - ret = get_user_pages(virt_addr, 1, VM_READ, 0, NULL, NULL); + ret = get_user_pages(virt_addr, 1, FOLL_WRITE, NULL, NULL); if (ret<=0) { #ifdef ERR_INJ_DEBUG printk("Virtual address %lx is not existing.\n",virt_addr); diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 6f54d51..31aa8c0 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -453,7 +453,7 @@ ia64_peek (struct task_struct *child, struct switch_stack *child_stack, return 0; } } - copied = access_process_vm(child, addr, &ret, sizeof(ret), 0); + copied = access_process_vm(child, addr, &ret, sizeof(ret), FOLL_FORCE); if (copied != sizeof(ret)) return -EIO; *val = ret; @@ -489,7 +489,8 @@ ia64_poke (struct task_struct *child, struct switch_stack *child_stack, *ia64_rse_skip_regs(krbs, regnum) = val; } } - } else if (access_process_vm(child, addr, &val, sizeof(val), 1) + } else if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE) != sizeof(val)) return -EIO; return 0; @@ -543,7 +544,8 @@ ia64_sync_user_rbs (struct task_struct *child, struct switch_stack *sw, ret = ia64_peek(child, sw, user_rbs_end, addr, &val); if (ret < 0) return ret; - if (access_process_vm(child, addr, &val, sizeof(val), 1) + if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE) != sizeof(val)) return -EIO; } @@ -559,7 +561,8 @@ ia64_sync_kernel_rbs (struct task_struct *child, struct switch_stack *sw, /* now copy word for word from user rbs to kernel rbs: */ for (addr = user_rbs_start; addr < user_rbs_end; addr += 8) { - if (access_process_vm(child, addr, &val, sizeof(val), 0) + if (access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE) != sizeof(val)) return -EIO; @@ -1156,7 +1159,8 @@ arch_ptrace (struct task_struct *child, long request, case PTRACE_PEEKTEXT: case PTRACE_PEEKDATA: /* read word at location addr */ - if (access_process_vm(child, addr, &data, sizeof(data), 0) + if (access_process_vm(child, addr, &data, sizeof(data), + FOLL_FORCE) != sizeof(data)) return -EIO; /* ensure return value is not mistaken for error code */ diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index 51f5e9a..c145605 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -493,7 +493,8 @@ unregister_all_debug_traps(struct task_struct *child) int i; for (i = 0; i < p->nr_trap; i++) - access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]), 1); + access_process_vm(child, p->addr[i], &p->insn[i], sizeof(p->insn[i]), + FOLL_FORCE | FOLL_WRITE); p->nr_trap = 0; } @@ -537,7 +538,8 @@ embed_debug_trap(struct task_struct *child, unsigned long next_pc) unsigned long next_insn, code; unsigned long addr = next_pc & ~3; - if (access_process_vm(child, addr, &next_insn, sizeof(next_insn), 0) + if (access_process_vm(child, addr, &next_insn, sizeof(next_insn), + FOLL_FORCE) != sizeof(next_insn)) { return -1; /* error */ } @@ -546,7 +548,8 @@ embed_debug_trap(struct task_struct *child, unsigned long next_pc) if (register_debug_trap(child, next_pc, next_insn, &code)) { return -1; /* error */ } - if (access_process_vm(child, addr, &code, sizeof(code), 1) + if (access_process_vm(child, addr, &code, sizeof(code), + FOLL_FORCE | FOLL_WRITE) != sizeof(code)) { return -1; /* error */ } @@ -562,7 +565,8 @@ withdraw_debug_trap(struct pt_regs *regs) addr = (regs->bpc - 2) & ~3; regs->bpc -= 2; if (unregister_debug_trap(current, addr, &code)) { - access_process_vm(current, addr, &code, sizeof(code), 1); + access_process_vm(current, addr, &code, sizeof(code), + FOLL_FORCE | FOLL_WRITE); invalidate_cache(); } } @@ -589,7 +593,8 @@ void user_enable_single_step(struct task_struct *child) /* Compute next pc. */ pc = get_stack_long(child, PT_BPC); - if (access_process_vm(child, pc&~3, &insn, sizeof(insn), 0) + if (access_process_vm(child, pc&~3, &insn, sizeof(insn), + FOLL_FORCE) != sizeof(insn)) return; diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 283b5a1..7e71a4e 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -70,7 +70,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; copied = access_process_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), 0); + sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; ret = put_user(tmp, (u32 __user *) (unsigned long) data); @@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; ret = 0; if (access_process_vm(child, (u64)addrOthers, &data, - sizeof(data), 1) == sizeof(data)) + sizeof(data), + FOLL_FORCE | FOLL_WRITE) == sizeof(data)) break; ret = -EIO; break; diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index ce96149..622037d 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -14,6 +14,7 @@ #include <linux/err.h> #include <linux/kdebug.h> #include <linux/module.h> +#include <linux/uaccess.h> #include <linux/vmalloc.h> #include <linux/fs.h> #include <linux/bootmem.h> diff --git a/arch/mips/mm/gup.c b/arch/mips/mm/gup.c index 42d124f..d8c3c15 100644 --- a/arch/mips/mm/gup.c +++ b/arch/mips/mm/gup.c @@ -287,7 +287,7 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/powerpc/boot/main.c b/arch/powerpc/boot/main.c index f7a184b..57d42d1 100644 --- a/arch/powerpc/boot/main.c +++ b/arch/powerpc/boot/main.c @@ -32,9 +32,16 @@ static struct addr_range prep_kernel(void) void *addr = 0; struct elf_info ei; long len; + int uncompressed_image = 0; - partial_decompress(vmlinuz_addr, vmlinuz_size, + len = partial_decompress(vmlinuz_addr, vmlinuz_size, elfheader, sizeof(elfheader), 0); + /* assume uncompressed data if -1 is returned */ + if (len == -1) { + uncompressed_image = 1; + memcpy(elfheader, vmlinuz_addr, sizeof(elfheader)); + printf("No valid compressed data found, assume uncompressed data\n\r"); + } if (!parse_elf64(elfheader, &ei) && !parse_elf32(elfheader, &ei)) fatal("Error: not a valid PPC32 or PPC64 ELF file!\n\r"); @@ -67,6 +74,13 @@ static struct addr_range prep_kernel(void) "device tree\n\r"); } + if (uncompressed_image) { + memcpy(addr, vmlinuz_addr + ei.elfoffset, ei.loadsize); + printf("0x%lx bytes of uncompressed data copied\n\r", + ei.loadsize); + goto out; + } + /* Finally, decompress the kernel */ printf("Decompressing (0x%p <- 0x%p:0x%p)...\n\r", addr, vmlinuz_addr, vmlinuz_addr+vmlinuz_size); @@ -82,7 +96,7 @@ static struct addr_range prep_kernel(void) len, ei.loadsize); printf("Done! Decompressed 0x%lx bytes\n\r", len); - +out: flush_cache(addr, ei.loadsize); return (struct addr_range){addr, ei.memsize}; diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index ee655ed..1e8fceb 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -53,10 +53,8 @@ static inline __sum16 csum_fold(__wsum sum) return (__force __sum16)(~((__force u32)sum + tmp) >> 16); } -static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { #ifdef __powerpc64__ unsigned long s = (__force u32)sum; @@ -83,10 +81,8 @@ static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, * computes the checksum of the TCP/UDP pseudo-header * returns a 16-bit checksum, already complemented */ -static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, - unsigned short len, - unsigned short proto, - __wsum sum) +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) { return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); } diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index 01b8a13..3919332 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -26,7 +26,7 @@ extern u64 pnv_first_deep_stop_state; std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmp cr0,r0,r0; \ +1: cmpd cr0,r0,r0; \ bne 1b; \ IDLE_INST; \ b . diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 2e4e7d8..84d49b1 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -93,6 +93,10 @@ ld reg,PACAKBASE(r13); /* get high part of &label */ \ ori reg,reg,(FIXED_SYMBOL_ABS_ADDR(label))@l; +#define __LOAD_HANDLER(reg, label) \ + ld reg,PACAKBASE(r13); \ + ori reg,reg,(ABS_ADDR(label))@l; + /* Exception register prefixes */ #define EXC_HV H #define EXC_STD @@ -208,6 +212,18 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define kvmppc_interrupt kvmppc_interrupt_pr #endif +#ifdef CONFIG_RELOCATABLE +#define BRANCH_TO_COMMON(reg, label) \ + __LOAD_HANDLER(reg, label); \ + mtctr reg; \ + bctr + +#else +#define BRANCH_TO_COMMON(reg, label) \ + b label + +#endif + #define __KVM_HANDLER_PROLOG(area, n) \ BEGIN_FTR_SECTION_NESTED(947) \ ld r10,area+EX_CFAR(r13); \ diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h index f6f68f7..99e1397 100644 --- a/arch/powerpc/include/asm/tlb.h +++ b/arch/powerpc/include/asm/tlb.h @@ -52,11 +52,23 @@ static inline int mm_is_core_local(struct mm_struct *mm) return cpumask_subset(mm_cpumask(mm), topology_sibling_cpumask(smp_processor_id())); } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return cpumask_equal(mm_cpumask(mm), + cpumask_of(smp_processor_id())); +} + #else static inline int mm_is_core_local(struct mm_struct *mm) { return 1; } + +static inline int mm_is_thread_local(struct mm_struct *mm) +{ + return 1; +} #endif #endif /* __KERNEL__ */ diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index cf12c58..e8cdfec 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -16,6 +16,10 @@ #define __NR__exit __NR_exit +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free + #ifndef __ASSEMBLY__ #include <linux/types.h> diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f129408..08ba447 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -95,19 +95,35 @@ __start_interrupts: /* No virt vectors corresponding with 0x0..0x100 */ EXC_VIRT_NONE(0x4000, 0x4100) -EXC_REAL_BEGIN(system_reset, 0x100, 0x200) - SET_SCRATCH0(r13) + #ifdef CONFIG_PPC_P7_NAP -BEGIN_FTR_SECTION - /* Running native on arch 2.06 or later, check if we are - * waking up from nap/sleep/winkle. + /* + * If running native on arch 2.06 or later, check if we are waking up + * from nap/sleep/winkle, and branch to idle handler. */ - mfspr r13,SPRN_SRR1 - rlwinm. r13,r13,47-31,30,31 - beq 9f +#define IDLETEST(n) \ + BEGIN_FTR_SECTION ; \ + mfspr r10,SPRN_SRR1 ; \ + rlwinm. r10,r10,47-31,30,31 ; \ + beq- 1f ; \ + cmpwi cr3,r10,2 ; \ + BRANCH_TO_COMMON(r10, system_reset_idle_common) ; \ +1: \ + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +#else +#define IDLETEST NOTEST +#endif - cmpwi cr3,r13,2 - GET_PACA(r13) +EXC_REAL_BEGIN(system_reset, 0x100, 0x200) + SET_SCRATCH0(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, + IDLETEST, 0x100) + +EXC_REAL_END(system_reset, 0x100, 0x200) +EXC_VIRT_NONE(0x4100, 0x4200) + +#ifdef CONFIG_PPC_P7_NAP +EXC_COMMON_BEGIN(system_reset_idle_common) bl pnv_restore_hyp_resource li r0,PNV_THREAD_RUNNING @@ -130,14 +146,8 @@ BEGIN_FTR_SECTION blt cr3,2f b pnv_wakeup_loss 2: b pnv_wakeup_noloss +#endif -9: -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) -#endif /* CONFIG_PPC_P7_NAP */ - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common, EXC_STD, - NOTEST, 0x100) -EXC_REAL_END(system_reset, 0x100, 0x200) -EXC_VIRT_NONE(0x4100, 0x4200) EXC_COMMON(system_reset_common, 0x100, system_reset_exception) #ifdef CONFIG_PPC_PSERIES @@ -817,10 +827,8 @@ EXC_VIRT(trap_0b, 0x4b00, 0x4c00, 0xb00) TRAMP_KVM(PACA_EXGEN, 0xb00) EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) - -#define LOAD_SYSCALL_HANDLER(reg) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(system_call_common))@l; +#define LOAD_SYSCALL_HANDLER(reg) \ + __LOAD_HANDLER(reg, system_call_common) /* Syscall routine is used twice, in reloc-off and reloc-on paths */ #define SYSCALL_PSERIES_1 \ diff --git a/arch/powerpc/kernel/hw_breakpoint.c b/arch/powerpc/kernel/hw_breakpoint.c index 9781c69..03d089b 100644 --- a/arch/powerpc/kernel/hw_breakpoint.c +++ b/arch/powerpc/kernel/hw_breakpoint.c @@ -275,7 +275,7 @@ int hw_breakpoint_handler(struct die_args *args) if (!stepped) { WARN(1, "Unable to handle hardware breakpoint. Breakpoint at " "0x%lx will be disabled.", info->address); - perf_event_disable(bp); + perf_event_disable_inatomic(bp); goto out; } /* diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index bd739fe..72dac0b 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -90,6 +90,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) * Threads will spin in HMT_LOW until the lock bit is cleared. * r14 - pointer to core_idle_state * r15 - used to load contents of core_idle_state + * r9 - used as a temporary variable */ core_idle_lock_held: @@ -99,6 +100,8 @@ core_idle_lock_held: bne 3b HMT_MEDIUM lwarx r15,0,r14 + andi. r9,r15,PNV_CORE_IDLE_LOCK_BIT + bne core_idle_lock_held blr /* @@ -163,12 +166,6 @@ _GLOBAL(pnv_powersave_common) std r9,_MSR(r1) std r1,PACAR1(r13) -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE - /* Tell KVM we're entering idle */ - li r4,KVM_HWTHREAD_IN_IDLE - stb r4,HSTATE_HWTHREAD_STATE(r13) -#endif - /* * Go to real mode to do the nap, as required by the architecture. * Also, we need to be in real mode before setting hwthread_state, @@ -185,6 +182,26 @@ _GLOBAL(pnv_powersave_common) .globl pnv_enter_arch207_idle_mode pnv_enter_arch207_idle_mode: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /******************************************************/ + /* N O T E W E L L ! ! ! N O T E W E L L */ + /* The following store to HSTATE_HWTHREAD_STATE(r13) */ + /* MUST occur in real mode, i.e. with the MMU off, */ + /* and the MMU must stay off until we clear this flag */ + /* and test HSTATE_HWTHREAD_REQ(r13) in the system */ + /* reset interrupt vector in exceptions-64s.S. */ + /* The reason is that another thread can switch the */ + /* MMU to a guest context whenever this flag is set */ + /* to KVM_HWTHREAD_IN_IDLE, and if the MMU was on, */ + /* that would potentially cause this thread to start */ + /* executing instructions from guest memory in */ + /* hypervisor mode, leading to a host crash or data */ + /* corruption, or worse. */ + /******************************************************/ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif stb r3,PACA_THREAD_IDLE_STATE(r13) cmpwi cr3,r3,PNV_THREAD_SLEEP bge cr3,2f @@ -250,6 +267,12 @@ enter_winkle: * r3 - requested stop state */ power_enter_stop: +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* Tell KVM we're entering idle */ + li r4,KVM_HWTHREAD_IN_IDLE + /* DO THIS IN REAL MODE! See comment above. */ + stb r4,HSTATE_HWTHREAD_STATE(r13) +#endif /* * Check if the requested state is a deep idle state. */ diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 9e7c10f..ce6dc61 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -1012,7 +1012,7 @@ void restore_tm_state(struct pt_regs *regs) /* Ensure that restore_math() will restore */ if (msr_diff & MSR_FP) current->thread.load_fp = 1; -#ifdef CONFIG_ALIVEC +#ifdef CONFIG_ALTIVEC if (cpu_has_feature(CPU_FTR_ALTIVEC) && msr_diff & MSR_VEC) current->thread.load_vec = 1; #endif diff --git a/arch/powerpc/kernel/ptrace32.c b/arch/powerpc/kernel/ptrace32.c index f52b7db3..010b7b3 100644 --- a/arch/powerpc/kernel/ptrace32.c +++ b/arch/powerpc/kernel/ptrace32.c @@ -74,7 +74,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; copied = access_process_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), 0); + sizeof(tmp), FOLL_FORCE); if (copied != sizeof(tmp)) break; ret = put_user(tmp, (u32 __user *)data); @@ -179,7 +179,8 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; ret = 0; if (access_process_vm(child, (u64)addrOthers, &tmp, - sizeof(tmp), 1) == sizeof(tmp)) + sizeof(tmp), + FOLL_FORCE | FOLL_WRITE) == sizeof(tmp)) break; ret = -EIO; break; diff --git a/arch/powerpc/kvm/book3s_hv_rm_xics.c b/arch/powerpc/kvm/book3s_hv_rm_xics.c index 82ff5de..a0ea63a 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_xics.c +++ b/arch/powerpc/kvm/book3s_hv_rm_xics.c @@ -23,6 +23,7 @@ #include <asm/ppc-opcode.h> #include <asm/pnv-pci.h> #include <asm/opal.h> +#include <asm/smp.h> #include "book3s_xics.h" diff --git a/arch/powerpc/mm/copro_fault.c b/arch/powerpc/mm/copro_fault.c index bb03542..362954f 100644 --- a/arch/powerpc/mm/copro_fault.c +++ b/arch/powerpc/mm/copro_fault.c @@ -106,6 +106,8 @@ int copro_calculate_slb(struct mm_struct *mm, u64 ea, struct copro_slb *slb) switch (REGION_ID(ea)) { case USER_REGION_ID: pr_devel("%s: 0x%llx -- USER_REGION_ID\n", __func__, ea); + if (mm == NULL) + return 1; psize = get_slice_psize(mm, ea); ssize = user_segment_size(ea); vsid = get_vsid(mm->context.id, ea, ssize); diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 75b9cd6..a51c188 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -845,7 +845,7 @@ void __init dump_numa_cpu_topology(void) return; for_each_online_node(node) { - printk(KERN_DEBUG "Node %d CPUs:", node); + pr_info("Node %d CPUs:", node); count = 0; /* @@ -856,52 +856,18 @@ void __init dump_numa_cpu_topology(void) if (cpumask_test_cpu(cpu, node_to_cpumask_map[node])) { if (count == 0) - printk(" %u", cpu); + pr_cont(" %u", cpu); ++count; } else { if (count > 1) - printk("-%u", cpu - 1); + pr_cont("-%u", cpu - 1); count = 0; } } if (count > 1) - printk("-%u", nr_cpu_ids - 1); - printk("\n"); - } -} - -static void __init dump_numa_memory_topology(void) -{ - unsigned int node; - unsigned int count; - - if (min_common_depth == -1 || !numa_enabled) - return; - - for_each_online_node(node) { - unsigned long i; - - printk(KERN_DEBUG "Node %d Memory:", node); - - count = 0; - - for (i = 0; i < memblock_end_of_DRAM(); - i += (1 << SECTION_SIZE_BITS)) { - if (early_pfn_to_nid(i >> PAGE_SHIFT) == node) { - if (count == 0) - printk(" 0x%lx", i); - ++count; - } else { - if (count > 0) - printk("-0x%lx", i); - count = 0; - } - } - - if (count > 0) - printk("-0x%lx", i); - printk("\n"); + pr_cont("-%u", nr_cpu_ids - 1); + pr_cont("\n"); } } @@ -947,8 +913,6 @@ void __init initmem_init(void) if (parse_numa_properties()) setup_nonnuma(); - else - dump_numa_memory_topology(); memblock_dump_all(); diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 0e49ec5..bda8c43 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -175,7 +175,7 @@ void radix__flush_tlb_mm(struct mm_struct *mm) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -201,7 +201,7 @@ void radix__flush_tlb_pwc(struct mmu_gather *tlb, unsigned long addr) if (unlikely(pid == MMU_NO_CONTEXT)) goto no_context; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -226,7 +226,7 @@ void radix__flush_tlb_page_psize(struct mm_struct *mm, unsigned long vmaddr, pid = mm ? mm->context.id : 0; if (unlikely(pid == MMU_NO_CONTEXT)) goto bail; - if (!mm_is_core_local(mm)) { + if (!mm_is_thread_local(mm)) { int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); if (lock_tlbie) @@ -321,7 +321,7 @@ void radix__flush_tlb_range_psize(struct mm_struct *mm, unsigned long start, { unsigned long pid; unsigned long addr; - int local = mm_is_core_local(mm); + int local = mm_is_thread_local(mm); unsigned long ap = mmu_get_ap(psize); int lock_tlbie = !mmu_has_feature(MMU_FTR_LOCKLESS_TLBIE); unsigned long page_size = 1UL << mmu_psize_defs[psize].shift; diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 64053d9..836c562 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -12,9 +12,7 @@ #ifndef __ASSEMBLY__ -unsigned long return_address(int depth); - -#define ftrace_return_address(n) return_address(n) +#define ftrace_return_address(n) __builtin_return_address(n) void _mcount(void); void ftrace_caller(void); diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index 0332317..602af69 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -192,7 +192,7 @@ struct task_struct; struct mm_struct; struct seq_file; -typedef int (*dump_trace_func_t)(void *data, unsigned long address); +typedef int (*dump_trace_func_t)(void *data, unsigned long address, int reliable); void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, unsigned long sp); diff --git a/arch/s390/include/asm/unistd.h b/arch/s390/include/asm/unistd.h index 02613ba..3066031 100644 --- a/arch/s390/include/asm/unistd.h +++ b/arch/s390/include/asm/unistd.h @@ -9,6 +9,9 @@ #include <uapi/asm/unistd.h> #define __IGNORE_time +#define __IGNORE_pkey_mprotect +#define __IGNORE_pkey_alloc +#define __IGNORE_pkey_free #define __ARCH_WANT_OLD_READDIR #define __ARCH_WANT_SYS_ALARM diff --git a/arch/s390/kernel/dis.c b/arch/s390/kernel/dis.c index 43446fa..c74c592 100644 --- a/arch/s390/kernel/dis.c +++ b/arch/s390/kernel/dis.c @@ -2014,12 +2014,12 @@ void show_code(struct pt_regs *regs) *ptr++ = '\t'; ptr += print_insn(ptr, code + start, addr); start += opsize; - printk("%s", buffer); + pr_cont("%s", buffer); ptr = buffer; ptr += sprintf(ptr, "\n "); hops++; } - printk("\n"); + pr_cont("\n"); } void print_fn_code(unsigned char *code, unsigned long len) diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 6693383..55d4fe1 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -38,10 +38,10 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, if (sp < low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; + if (func(data, sf->gprs[8], 0)) + return sp; /* Follow the backchain. */ while (1) { - if (func(data, sf->gprs[8])) - return sp; low = sp; sp = sf->back_chain; if (!sp) @@ -49,6 +49,8 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, if (sp <= low || sp > high - sizeof(*sf)) return sp; sf = (struct stack_frame *) sp; + if (func(data, sf->gprs[8], 1)) + return sp; } /* Zero backchain detected, check for interrupt frame. */ sp = (unsigned long) (sf + 1); @@ -56,7 +58,7 @@ __dump_trace(dump_trace_func_t func, void *data, unsigned long sp, return sp; regs = (struct pt_regs *) sp; if (!user_mode(regs)) { - if (func(data, regs->psw.addr)) + if (func(data, regs->psw.addr, 1)) return sp; } low = sp; @@ -85,33 +87,12 @@ void dump_trace(dump_trace_func_t func, void *data, struct task_struct *task, } EXPORT_SYMBOL_GPL(dump_trace); -struct return_address_data { - unsigned long address; - int depth; -}; - -static int __return_address(void *data, unsigned long address) -{ - struct return_address_data *rd = data; - - if (rd->depth--) - return 0; - rd->address = address; - return 1; -} - -unsigned long return_address(int depth) -{ - struct return_address_data rd = { .depth = depth + 2 }; - - dump_trace(__return_address, &rd, NULL, current_stack_pointer()); - return rd.address; -} -EXPORT_SYMBOL_GPL(return_address); - -static int show_address(void *data, unsigned long address) +static int show_address(void *data, unsigned long address, int reliable) { - printk("([<%016lx>] %pSR)\n", address, (void *)address); + if (reliable) + printk(" [<%016lx>] %pSR \n", address, (void *)address); + else + printk("([<%016lx>] %pSR)\n", address, (void *)address); return 0; } @@ -138,14 +119,14 @@ void show_stack(struct task_struct *task, unsigned long *sp) else stack = (unsigned long *)task->thread.ksp; } + printk(KERN_DEFAULT "Stack:\n"); for (i = 0; i < 20; i++) { if (((addr_t) stack & (THREAD_SIZE-1)) == 0) break; - if ((i * sizeof(long) % 32) == 0) - printk("%s ", i == 0 ? "" : "\n"); - printk("%016lx ", *stack++); + if (i % 4 == 0) + printk(KERN_DEFAULT " "); + pr_cont("%016lx%c", *stack++, i % 4 == 3 ? '\n' : ' '); } - printk("\n"); show_trace(task, (unsigned long)sp); } @@ -163,13 +144,13 @@ void show_registers(struct pt_regs *regs) mode = user_mode(regs) ? "User" : "Krnl"; printk("%s PSW : %p %p", mode, (void *)regs->psw.mask, (void *)regs->psw.addr); if (!user_mode(regs)) - printk(" (%pSR)", (void *)regs->psw.addr); - printk("\n"); + pr_cont(" (%pSR)", (void *)regs->psw.addr); + pr_cont("\n"); printk(" R:%x T:%x IO:%x EX:%x Key:%x M:%x W:%x " "P:%x AS:%x CC:%x PM:%x", psw->r, psw->t, psw->i, psw->e, psw->key, psw->m, psw->w, psw->p, psw->as, psw->cc, psw->pm); - printk(" RI:%x EA:%x", psw->ri, psw->eaba); - printk("\n%s GPRS: %016lx %016lx %016lx %016lx\n", mode, + pr_cont(" RI:%x EA:%x\n", psw->ri, psw->eaba); + printk("%s GPRS: %016lx %016lx %016lx %016lx\n", mode, regs->gprs[0], regs->gprs[1], regs->gprs[2], regs->gprs[3]); printk(" %016lx %016lx %016lx %016lx\n", regs->gprs[4], regs->gprs[5], regs->gprs[6], regs->gprs[7]); @@ -205,14 +186,14 @@ void die(struct pt_regs *regs, const char *str) printk("%s: %04x ilc:%d [#%d] ", str, regs->int_code & 0xffff, regs->int_code >> 17, ++die_counter); #ifdef CONFIG_PREEMPT - printk("PREEMPT "); + pr_cont("PREEMPT "); #endif #ifdef CONFIG_SMP - printk("SMP "); + pr_cont("SMP "); #endif if (debug_pagealloc_enabled()) - printk("DEBUG_PAGEALLOC"); - printk("\n"); + pr_cont("DEBUG_PAGEALLOC"); + pr_cont("\n"); notify_die(DIE_OOPS, str, regs, 0, regs->int_code & 0xffff, SIGSEGV); print_modules(); show_regs(regs); diff --git a/arch/s390/kernel/perf_event.c b/arch/s390/kernel/perf_event.c index 17431f6..955a7b6 100644 --- a/arch/s390/kernel/perf_event.c +++ b/arch/s390/kernel/perf_event.c @@ -222,7 +222,7 @@ static int __init service_level_perf_register(void) } arch_initcall(service_level_perf_register); -static int __perf_callchain_kernel(void *data, unsigned long address) +static int __perf_callchain_kernel(void *data, unsigned long address, int reliable) { struct perf_callchain_entry_ctx *entry = data; diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 44f84b2..355db9d 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -27,12 +27,12 @@ static int __save_address(void *data, unsigned long address, int nosched) return 1; } -static int save_address(void *data, unsigned long address) +static int save_address(void *data, unsigned long address, int reliable) { return __save_address(data, address, 0); } -static int save_address_nosched(void *data, unsigned long address) +static int save_address_nosched(void *data, unsigned long address, int reliable) { return __save_address(data, address, 1); } diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c index 1cab8a1..7a27eeb 100644 --- a/arch/s390/kvm/intercept.c +++ b/arch/s390/kvm/intercept.c @@ -119,8 +119,13 @@ static int handle_validity(struct kvm_vcpu *vcpu) vcpu->stat.exit_validity++; trace_kvm_s390_intercept_validity(vcpu, viwhy); - WARN_ONCE(true, "kvm: unhandled validity intercept 0x%x\n", viwhy); - return -EOPNOTSUPP; + KVM_EVENT(3, "validity intercept 0x%x for pid %u (kvm 0x%pK)", viwhy, + current->pid, vcpu->kvm); + + /* do not warn on invalid runtime instrumentation mode */ + WARN_ONCE(viwhy != 0x44, "kvm: unhandled validity intercept 0x%x\n", + viwhy); + return -EINVAL; } static int handle_instruction(struct kvm_vcpu *vcpu) diff --git a/arch/s390/mm/gup.c b/arch/s390/mm/gup.c index adb0c34..18d4107e 100644 --- a/arch/s390/mm/gup.c +++ b/arch/s390/mm/gup.c @@ -266,7 +266,8 @@ int get_user_pages_fast(unsigned long start, int nr_pages, int write, /* Try to get the remaining pages with get_user_pages */ start += nr << PAGE_SHIFT; pages += nr; - ret = get_user_pages_unlocked(start, nr_pages - nr, write, 0, pages); + ret = get_user_pages_unlocked(start, nr_pages - nr, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) ret = (ret < 0) ? nr : ret + nr; diff --git a/arch/s390/mm/hugetlbpage.c b/arch/s390/mm/hugetlbpage.c index cd404aa..4a0c5bc 100644 --- a/arch/s390/mm/hugetlbpage.c +++ b/arch/s390/mm/hugetlbpage.c @@ -217,6 +217,7 @@ static __init int setup_hugepagesz(char *opt) } else if (MACHINE_HAS_EDAT2 && size == PUD_SIZE) { hugetlb_add_hstate(PUD_SHIFT - PAGE_SHIFT); } else { + hugetlb_bad_size(); pr_err("hugepagesz= specifies an unsupported page size %s\n", string); return 0; diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index f56a39b..b3e9d18 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -151,36 +151,40 @@ void __init free_initrd_mem(unsigned long start, unsigned long end) #ifdef CONFIG_MEMORY_HOTPLUG int arch_add_memory(int nid, u64 start, u64 size, bool for_device) { - unsigned long normal_end_pfn = PFN_DOWN(memblock_end_of_DRAM()); - unsigned long dma_end_pfn = PFN_DOWN(MAX_DMA_ADDRESS); + unsigned long zone_start_pfn, zone_end_pfn, nr_pages; unsigned long start_pfn = PFN_DOWN(start); unsigned long size_pages = PFN_DOWN(size); - unsigned long nr_pages; - int rc, zone_enum; + pg_data_t *pgdat = NODE_DATA(nid); + struct zone *zone; + int rc, i; rc = vmem_add_mapping(start, size); if (rc) return rc; - while (size_pages > 0) { - if (start_pfn < dma_end_pfn) { - nr_pages = (start_pfn + size_pages > dma_end_pfn) ? - dma_end_pfn - start_pfn : size_pages; - zone_enum = ZONE_DMA; - } else if (start_pfn < normal_end_pfn) { - nr_pages = (start_pfn + size_pages > normal_end_pfn) ? - normal_end_pfn - start_pfn : size_pages; - zone_enum = ZONE_NORMAL; + for (i = 0; i < MAX_NR_ZONES; i++) { + zone = pgdat->node_zones + i; + if (zone_idx(zone) != ZONE_MOVABLE) { + /* Add range within existing zone limits, if possible */ + zone_start_pfn = zone->zone_start_pfn; + zone_end_pfn = zone->zone_start_pfn + + zone->spanned_pages; } else { - nr_pages = size_pages; - zone_enum = ZONE_MOVABLE; + /* Add remaining range to ZONE_MOVABLE */ + zone_start_pfn = start_pfn; + zone_end_pfn = start_pfn + size_pages; } - rc = __add_pages(nid, NODE_DATA(nid)->node_zones + zone_enum, - start_pfn, size_pages); + if (start_pfn < zone_start_pfn || start_pfn >= zone_end_pfn) + continue; + nr_pages = (start_pfn + size_pages > zone_end_pfn) ? + zone_end_pfn - start_pfn : size_pages; + rc = __add_pages(nid, zone, start_pfn, nr_pages); if (rc) break; start_pfn += nr_pages; size_pages -= nr_pages; + if (!size_pages) + break; } if (rc) vmem_remove_mapping(start, size); diff --git a/arch/s390/oprofile/init.c b/arch/s390/oprofile/init.c index 16f4c39..9a4de45 100644 --- a/arch/s390/oprofile/init.c +++ b/arch/s390/oprofile/init.c @@ -13,7 +13,7 @@ #include <linux/init.h> #include <asm/processor.h> -static int __s390_backtrace(void *data, unsigned long address) +static int __s390_backtrace(void *data, unsigned long address, int reliable) { unsigned int *depth = data; diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 5583618..4f7314d 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -131,7 +131,7 @@ read_tsk_long(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, res, sizeof(*res), 0); + copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE); return copied != sizeof(*res) ? -EIO : 0; } @@ -142,7 +142,7 @@ read_tsk_short(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, res, sizeof(*res), 0); + copied = access_process_vm(child, addr, res, sizeof(*res), FOLL_FORCE); return copied != sizeof(*res) ? -EIO : 0; } @@ -153,7 +153,8 @@ write_tsk_short(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, &val, sizeof(val), 1); + copied = access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE); return copied != sizeof(val) ? -EIO : 0; } @@ -164,7 +165,8 @@ write_tsk_long(struct task_struct *child, { int copied; - copied = access_process_vm(child, addr, &val, sizeof(val), 1); + copied = access_process_vm(child, addr, &val, sizeof(val), + FOLL_FORCE | FOLL_WRITE); return copied != sizeof(val) ? -EIO : 0; } diff --git a/arch/sh/Makefile b/arch/sh/Makefile index 0047666..336f33a 100644 --- a/arch/sh/Makefile +++ b/arch/sh/Makefile @@ -31,7 +31,7 @@ isa-y := $(isa-y)-up endif cflags-$(CONFIG_CPU_SH2) := $(call cc-option,-m2,) -cflags-$(CONFIG_CPU_J2) := $(call cc-option,-mj2,) +cflags-$(CONFIG_CPU_J2) += $(call cc-option,-mj2,) cflags-$(CONFIG_CPU_SH2A) += $(call cc-option,-m2a,) \ $(call cc-option,-m2a-nofpu,) \ $(call cc-option,-m4-nofpu,) diff --git a/arch/sh/boards/Kconfig b/arch/sh/boards/Kconfig index e9c2c42..4e21949 100644 --- a/arch/sh/boards/Kconfig +++ b/arch/sh/boards/Kconfig @@ -22,6 +22,16 @@ config SH_DEVICE_TREE have sufficient driver coverage to use this option; do not select it if you are using original SuperH hardware. +config SH_JCORE_SOC + bool "J-Core SoC" + depends on SH_DEVICE_TREE && (CPU_SH2 || CPU_J2) + select CLKSRC_JCORE_PIT + select JCORE_AIC + default y if CPU_J2 + help + Select this option to include drivers core components of the + J-Core SoC, including interrupt controllers and timers. + config SH_SOLUTION_ENGINE bool "SolutionEngine" select SOLUTION_ENGINE diff --git a/arch/sh/configs/j2_defconfig b/arch/sh/configs/j2_defconfig index 94d1eca..2eb81ebe 100644 --- a/arch/sh/configs/j2_defconfig +++ b/arch/sh/configs/j2_defconfig @@ -8,6 +8,7 @@ CONFIG_MEMORY_START=0x10000000 CONFIG_MEMORY_SIZE=0x04000000 CONFIG_CPU_BIG_ENDIAN=y CONFIG_SH_DEVICE_TREE=y +CONFIG_SH_JCORE_SOC=y CONFIG_HZ_100=y CONFIG_CMDLINE_OVERWRITE=y CONFIG_CMDLINE="console=ttyUL0 earlycon" @@ -20,6 +21,7 @@ CONFIG_INET=y CONFIG_DEVTMPFS=y CONFIG_DEVTMPFS_MOUNT=y CONFIG_NETDEVICES=y +CONFIG_SERIAL_EARLYCON=y CONFIG_SERIAL_UARTLITE=y CONFIG_SERIAL_UARTLITE_CONSOLE=y CONFIG_I2C=y diff --git a/arch/sh/mm/gup.c b/arch/sh/mm/gup.c index 40fa6c8..063c298 100644 --- a/arch/sh/mm/gup.c +++ b/arch/sh/mm/gup.c @@ -258,7 +258,8 @@ slow_irqon: pages += nr; ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/include/asm/cpudata_64.h b/arch/sparc/include/asm/cpudata_64.h index a6cfdab..5b0ed48 100644 --- a/arch/sparc/include/asm/cpudata_64.h +++ b/arch/sparc/include/asm/cpudata_64.h @@ -24,9 +24,10 @@ typedef struct { unsigned int icache_line_size; unsigned int ecache_size; unsigned int ecache_line_size; - unsigned short sock_id; + unsigned short sock_id; /* physical package */ unsigned short core_id; - int proc_id; + unsigned short max_cache_id; /* groupings of highest shared cache */ + unsigned short proc_id; /* strand (aka HW thread) id */ } cpuinfo_sparc; DECLARE_PER_CPU(cpuinfo_sparc, __cpu_data); diff --git a/arch/sparc/include/asm/spinlock_32.h b/arch/sparc/include/asm/spinlock_32.h index d9c5876..8011e79 100644 --- a/arch/sparc/include/asm/spinlock_32.h +++ b/arch/sparc/include/asm/spinlock_32.h @@ -134,7 +134,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) *(volatile __u32 *)&lp->lock = ~0U; } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " st %%g0, [%0]" diff --git a/arch/sparc/include/asm/spinlock_64.h b/arch/sparc/include/asm/spinlock_64.h index 87990b7..07c9f2e 100644 --- a/arch/sparc/include/asm/spinlock_64.h +++ b/arch/sparc/include/asm/spinlock_64.h @@ -96,7 +96,7 @@ static inline void arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long fla /* Multi-reader locks, these are much saner than the 32-bit Sparc ones... */ -static void inline arch_read_lock(arch_rwlock_t *lock) +static inline void arch_read_lock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -119,7 +119,7 @@ static void inline arch_read_lock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_read_trylock(arch_rwlock_t *lock) +static inline int arch_read_trylock(arch_rwlock_t *lock) { int tmp1, tmp2; @@ -140,7 +140,7 @@ static int inline arch_read_trylock(arch_rwlock_t *lock) return tmp1; } -static void inline arch_read_unlock(arch_rwlock_t *lock) +static inline void arch_read_unlock(arch_rwlock_t *lock) { unsigned long tmp1, tmp2; @@ -156,7 +156,7 @@ static void inline arch_read_unlock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_lock(arch_rwlock_t *lock) +static inline void arch_write_lock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2; @@ -181,7 +181,7 @@ static void inline arch_write_lock(arch_rwlock_t *lock) : "memory"); } -static void inline arch_write_unlock(arch_rwlock_t *lock) +static inline void arch_write_unlock(arch_rwlock_t *lock) { __asm__ __volatile__( " stw %%g0, [%0]" @@ -190,7 +190,7 @@ static void inline arch_write_unlock(arch_rwlock_t *lock) : "memory"); } -static int inline arch_write_trylock(arch_rwlock_t *lock) +static inline int arch_write_trylock(arch_rwlock_t *lock) { unsigned long mask, tmp1, tmp2, result; diff --git a/arch/sparc/include/asm/topology_64.h b/arch/sparc/include/asm/topology_64.h index bec481a..7b4898a 100644 --- a/arch/sparc/include/asm/topology_64.h +++ b/arch/sparc/include/asm/topology_64.h @@ -44,14 +44,20 @@ int __node_distance(int, int); #define topology_physical_package_id(cpu) (cpu_data(cpu).proc_id) #define topology_core_id(cpu) (cpu_data(cpu).core_id) #define topology_core_cpumask(cpu) (&cpu_core_sib_map[cpu]) +#define topology_core_cache_cpumask(cpu) (&cpu_core_sib_cache_map[cpu]) #define topology_sibling_cpumask(cpu) (&per_cpu(cpu_sibling_map, cpu)) #endif /* CONFIG_SMP */ extern cpumask_t cpu_core_map[NR_CPUS]; extern cpumask_t cpu_core_sib_map[NR_CPUS]; +extern cpumask_t cpu_core_sib_cache_map[NR_CPUS]; + +/** + * Return cores that shares the last level cache. + */ static inline const struct cpumask *cpu_coregroup_mask(int cpu) { - return &cpu_core_map[cpu]; + return &cpu_core_sib_cache_map[cpu]; } #endif /* _ASM_SPARC64_TOPOLOGY_H */ diff --git a/arch/sparc/include/asm/uaccess_64.h b/arch/sparc/include/asm/uaccess_64.h index b68acc5..5373136 100644 --- a/arch/sparc/include/asm/uaccess_64.h +++ b/arch/sparc/include/asm/uaccess_64.h @@ -82,7 +82,6 @@ static inline int access_ok(int type, const void __user * addr, unsigned long si return 1; } -void __ret_efault(void); void __retl_efault(void); /* Uh, these should become the main single-value transfer routines.. @@ -189,55 +188,34 @@ int __get_user_bad(void); unsigned long __must_check ___copy_from_user(void *to, const void __user *from, unsigned long size); -unsigned long copy_from_user_fixup(void *to, const void __user *from, - unsigned long size); static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long size) { - unsigned long ret; - check_object_size(to, size, false); - ret = ___copy_from_user(to, from, size); - if (unlikely(ret)) - ret = copy_from_user_fixup(to, from, size); - - return ret; + return ___copy_from_user(to, from, size); } #define __copy_from_user copy_from_user unsigned long __must_check ___copy_to_user(void __user *to, const void *from, unsigned long size); -unsigned long copy_to_user_fixup(void __user *to, const void *from, - unsigned long size); static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long size) { - unsigned long ret; - check_object_size(from, size, true); - ret = ___copy_to_user(to, from, size); - if (unlikely(ret)) - ret = copy_to_user_fixup(to, from, size); - return ret; + return ___copy_to_user(to, from, size); } #define __copy_to_user copy_to_user unsigned long __must_check ___copy_in_user(void __user *to, const void __user *from, unsigned long size); -unsigned long copy_in_user_fixup(void __user *to, void __user *from, - unsigned long size); static inline unsigned long __must_check copy_in_user(void __user *to, void __user *from, unsigned long size) { - unsigned long ret = ___copy_in_user(to, from, size); - - if (unlikely(ret)) - ret = copy_in_user_fixup(to, from, size); - return ret; + return ___copy_in_user(to, from, size); } #define __copy_in_user copy_in_user diff --git a/arch/sparc/kernel/head_64.S b/arch/sparc/kernel/head_64.S index beba6c1..6aa3da1 100644 --- a/arch/sparc/kernel/head_64.S +++ b/arch/sparc/kernel/head_64.S @@ -926,48 +926,11 @@ tlb_type: .word 0 /* Must NOT end up in BSS */ EXPORT_SYMBOL(tlb_type) .section ".fixup",#alloc,#execinstr - .globl __ret_efault, __retl_efault, __ret_one, __retl_one -ENTRY(__ret_efault) - ret - restore %g0, -EFAULT, %o0 -ENDPROC(__ret_efault) -EXPORT_SYMBOL(__ret_efault) - ENTRY(__retl_efault) retl mov -EFAULT, %o0 ENDPROC(__retl_efault) -ENTRY(__retl_one) - retl - mov 1, %o0 -ENDPROC(__retl_one) - -ENTRY(__retl_one_fp) - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_one_fp) - -ENTRY(__ret_one_asi) - wr %g0, ASI_AIUS, %asi - ret - restore %g0, 1, %o0 -ENDPROC(__ret_one_asi) - -ENTRY(__retl_one_asi) - wr %g0, ASI_AIUS, %asi - retl - mov 1, %o0 -ENDPROC(__retl_one_asi) - -ENTRY(__retl_one_asi_fp) - wr %g0, ASI_AIUS, %asi - VISExitHalf - retl - mov 1, %o0 -ENDPROC(__retl_one_asi_fp) - ENTRY(__retl_o1) retl mov %o1, %o0 diff --git a/arch/sparc/kernel/jump_label.c b/arch/sparc/kernel/jump_label.c index 59bbeff..07933b9 100644 --- a/arch/sparc/kernel/jump_label.c +++ b/arch/sparc/kernel/jump_label.c @@ -13,19 +13,30 @@ void arch_jump_label_transform(struct jump_entry *entry, enum jump_label_type type) { - u32 val; u32 *insn = (u32 *) (unsigned long) entry->code; + u32 val; if (type == JUMP_LABEL_JMP) { s32 off = (s32)entry->target - (s32)entry->code; + bool use_v9_branch = false; + + BUG_ON(off & 3); #ifdef CONFIG_SPARC64 - /* ba,pt %xcc, . + (off << 2) */ - val = 0x10680000 | ((u32) off >> 2); -#else - /* ba . + (off << 2) */ - val = 0x10800000 | ((u32) off >> 2); + if (off <= 0xfffff && off >= -0x100000) + use_v9_branch = true; #endif + if (use_v9_branch) { + /* WDISP19 - target is . + immed << 2 */ + /* ba,pt %xcc, . + off */ + val = 0x10680000 | (((u32) off >> 2) & 0x7ffff); + } else { + /* WDISP22 - target is . + immed << 2 */ + BUG_ON(off > 0x7fffff); + BUG_ON(off < -0x800000); + /* ba . + off */ + val = 0x10800000 | (((u32) off >> 2) & 0x3fffff); + } } else { val = 0x01000000; } diff --git a/arch/sparc/kernel/mdesc.c b/arch/sparc/kernel/mdesc.c index 1122886..8a6982d 100644 --- a/arch/sparc/kernel/mdesc.c +++ b/arch/sparc/kernel/mdesc.c @@ -645,13 +645,20 @@ static void __mark_core_id(struct mdesc_handle *hp, u64 node, cpu_data(*id).core_id = core_id; } -static void __mark_sock_id(struct mdesc_handle *hp, u64 node, - int sock_id) +static void __mark_max_cache_id(struct mdesc_handle *hp, u64 node, + int max_cache_id) { const u64 *id = mdesc_get_property(hp, node, "id", NULL); - if (*id < num_possible_cpus()) - cpu_data(*id).sock_id = sock_id; + if (*id < num_possible_cpus()) { + cpu_data(*id).max_cache_id = max_cache_id; + + /** + * On systems without explicit socket descriptions socket + * is max_cache_id + */ + cpu_data(*id).sock_id = max_cache_id; + } } static void mark_core_ids(struct mdesc_handle *hp, u64 mp, @@ -660,10 +667,11 @@ static void mark_core_ids(struct mdesc_handle *hp, u64 mp, find_back_node_value(hp, mp, "cpu", __mark_core_id, core_id, 10); } -static void mark_sock_ids(struct mdesc_handle *hp, u64 mp, - int sock_id) +static void mark_max_cache_ids(struct mdesc_handle *hp, u64 mp, + int max_cache_id) { - find_back_node_value(hp, mp, "cpu", __mark_sock_id, sock_id, 10); + find_back_node_value(hp, mp, "cpu", __mark_max_cache_id, + max_cache_id, 10); } static void set_core_ids(struct mdesc_handle *hp) @@ -694,14 +702,15 @@ static void set_core_ids(struct mdesc_handle *hp) } } -static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) +static int set_max_cache_ids_by_cache(struct mdesc_handle *hp, int level) { u64 mp; int idx = 1; int fnd = 0; - /* Identify unique sockets by looking for cpus backpointed to by - * shared level n caches. + /** + * Identify unique highest level of shared cache by looking for cpus + * backpointed to by shared level N caches. */ mdesc_for_each_node_by_name(hp, mp, "cache") { const u64 *cur_lvl; @@ -709,8 +718,7 @@ static int set_sock_ids_by_cache(struct mdesc_handle *hp, int level) cur_lvl = mdesc_get_property(hp, mp, "level", NULL); if (*cur_lvl != level) continue; - - mark_sock_ids(hp, mp, idx); + mark_max_cache_ids(hp, mp, idx); idx++; fnd = 1; } @@ -745,15 +753,17 @@ static void set_sock_ids(struct mdesc_handle *hp) { u64 mp; - /* If machine description exposes sockets data use it. - * Otherwise fallback to use shared L3 or L2 caches. + /** + * Find the highest level of shared cache which pre-T7 is also + * the socket. */ + if (!set_max_cache_ids_by_cache(hp, 3)) + set_max_cache_ids_by_cache(hp, 2); + + /* If machine description exposes sockets data use it.*/ mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "sockets"); if (mp != MDESC_NODE_NULL) - return set_sock_ids_by_socket(hp, mp); - - if (!set_sock_ids_by_cache(hp, 3)) - set_sock_ids_by_cache(hp, 2); + set_sock_ids_by_socket(hp, mp); } static void mark_proc_ids(struct mdesc_handle *hp, u64 mp, int proc_id) diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 9ddc492..ac082dd 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -127,7 +127,8 @@ static int get_from_target(struct task_struct *target, unsigned long uaddr, if (copy_from_user(kbuf, (void __user *) uaddr, len)) return -EFAULT; } else { - int len2 = access_process_vm(target, uaddr, kbuf, len, 0); + int len2 = access_process_vm(target, uaddr, kbuf, len, + FOLL_FORCE); if (len2 != len) return -EFAULT; } @@ -141,7 +142,8 @@ static int set_to_target(struct task_struct *target, unsigned long uaddr, if (copy_to_user((void __user *) uaddr, kbuf, len)) return -EFAULT; } else { - int len2 = access_process_vm(target, uaddr, kbuf, len, 1); + int len2 = access_process_vm(target, uaddr, kbuf, len, + FOLL_FORCE | FOLL_WRITE); if (len2 != len) return -EFAULT; } @@ -505,7 +507,8 @@ static int genregs32_get(struct task_struct *target, if (access_process_vm(target, (unsigned long) ®_window[pos], - k, sizeof(*k), 0) + k, sizeof(*k), + FOLL_FORCE) != sizeof(*k)) return -EFAULT; k++; @@ -531,12 +534,14 @@ static int genregs32_get(struct task_struct *target, if (access_process_vm(target, (unsigned long) ®_window[pos], - ®, sizeof(reg), 0) + ®, sizeof(reg), + FOLL_FORCE) != sizeof(reg)) return -EFAULT; if (access_process_vm(target, (unsigned long) u, - ®, sizeof(reg), 1) + ®, sizeof(reg), + FOLL_FORCE | FOLL_WRITE) != sizeof(reg)) return -EFAULT; pos++; @@ -615,7 +620,8 @@ static int genregs32_set(struct task_struct *target, (unsigned long) ®_window[pos], (void *) k, - sizeof(*k), 1) + sizeof(*k), + FOLL_FORCE | FOLL_WRITE) != sizeof(*k)) return -EFAULT; k++; @@ -642,13 +648,15 @@ static int genregs32_set(struct task_struct *target, if (access_process_vm(target, (unsigned long) u, - ®, sizeof(reg), 0) + ®, sizeof(reg), + FOLL_FORCE) != sizeof(reg)) return -EFAULT; if (access_process_vm(target, (unsigned long) ®_window[pos], - ®, sizeof(reg), 1) + ®, sizeof(reg), + FOLL_FORCE | FOLL_WRITE) != sizeof(reg)) return -EFAULT; pos++; diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index d3035ba..8182f7c 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -63,9 +63,13 @@ cpumask_t cpu_core_map[NR_CPUS] __read_mostly = cpumask_t cpu_core_sib_map[NR_CPUS] __read_mostly = { [0 ... NR_CPUS-1] = CPU_MASK_NONE }; +cpumask_t cpu_core_sib_cache_map[NR_CPUS] __read_mostly = { + [0 ... NR_CPUS - 1] = CPU_MASK_NONE }; + EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); EXPORT_SYMBOL(cpu_core_map); EXPORT_SYMBOL(cpu_core_sib_map); +EXPORT_SYMBOL(cpu_core_sib_cache_map); static cpumask_t smp_commenced_mask; @@ -1265,6 +1269,10 @@ void smp_fill_in_sib_core_maps(void) unsigned int j; for_each_present_cpu(j) { + if (cpu_data(i).max_cache_id == + cpu_data(j).max_cache_id) + cpumask_set_cpu(j, &cpu_core_sib_cache_map[i]); + if (cpu_data(i).sock_id == cpu_data(j).sock_id) cpumask_set_cpu(j, &cpu_core_sib_map[i]); } diff --git a/arch/sparc/lib/GENcopy_from_user.S b/arch/sparc/lib/GENcopy_from_user.S index b7d0bd6..69a439f 100644 --- a/arch/sparc/lib/GENcopy_from_user.S +++ b/arch/sparc/lib/GENcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENcopy_to_user.S b/arch/sparc/lib/GENcopy_to_user.S index 780550e..9947427 100644 --- a/arch/sparc/lib/GENcopy_to_user.S +++ b/arch/sparc/lib/GENcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/GENmemcpy.S b/arch/sparc/lib/GENmemcpy.S index 89358ee..059ea24 100644 --- a/arch/sparc/lib/GENmemcpy.S +++ b/arch/sparc/lib/GENmemcpy.S @@ -4,21 +4,18 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #define GLOBAL_SPARE %g7 #else #define GLOBAL_SPARE %g5 #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -45,6 +42,29 @@ .register %g3,#scratch .text + +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(GEN_retl_o4_1) + add %o4, %o2, %o4 + retl + add %o4, 1, %o0 +ENDPROC(GEN_retl_o4_1) +ENTRY(GEN_retl_g1_8) + add %g1, %o2, %g1 + retl + add %g1, 8, %o0 +ENDPROC(GEN_retl_g1_8) +ENTRY(GEN_retl_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(GEN_retl_o2_4) +ENTRY(GEN_retl_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(GEN_retl_o2_1) +#endif + .align 64 .globl FUNC_NAME @@ -73,8 +93,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o4_1) + EX_ST(STORE(stb, %g1, %o0),GEN_retl_o4_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -82,8 +102,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %g1 sub %o2, %g1, %o2 1: subcc %g1, 0x8, %g1 - EX_LD(LOAD(ldx, %o1, %g2)) - EX_ST(STORE(stx, %g2, %o0)) + EX_LD(LOAD(ldx, %o1, %g2),GEN_retl_g1_8) + EX_ST(STORE(stx, %g2, %o0),GEN_retl_g1_8) add %o1, 0x8, %o1 bne,pt %XCC, 1b add %o0, 0x8, %o0 @@ -100,8 +120,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1),GEN_retl_o2_4) + EX_ST(STORE(stw, %g1, %o1 + %o3),GEN_retl_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -111,8 +131,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1),GEN_retl_o2_1) + EX_ST(STORE(stb, %g1, %o1 + %o3),GEN_retl_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/Makefile b/arch/sparc/lib/Makefile index 885f00e..69912d2 100644 --- a/arch/sparc/lib/Makefile +++ b/arch/sparc/lib/Makefile @@ -38,7 +38,7 @@ lib-$(CONFIG_SPARC64) += NG4patch.o NG4copy_page.o NG4clear_page.o NG4memset.o lib-$(CONFIG_SPARC64) += GENmemcpy.o GENcopy_from_user.o GENcopy_to_user.o lib-$(CONFIG_SPARC64) += GENpatch.o GENpage.o GENbzero.o -lib-$(CONFIG_SPARC64) += copy_in_user.o user_fixup.o memmove.o +lib-$(CONFIG_SPARC64) += copy_in_user.o memmove.o lib-$(CONFIG_SPARC64) += mcount.o ipcsum.o xor.o hweight.o ffs.o obj-$(CONFIG_SPARC64) += iomap.o diff --git a/arch/sparc/lib/NG2copy_from_user.S b/arch/sparc/lib/NG2copy_from_user.S index d5242b8..b79a699 100644 --- a/arch/sparc/lib/NG2copy_from_user.S +++ b/arch/sparc/lib/NG2copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2copy_to_user.S b/arch/sparc/lib/NG2copy_to_user.S index 4e962d9..dcec55f 100644 --- a/arch/sparc/lib/NG2copy_to_user.S +++ b/arch/sparc/lib/NG2copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG2memcpy.S b/arch/sparc/lib/NG2memcpy.S index d5f585d..c629dbd 100644 --- a/arch/sparc/lib/NG2memcpy.S +++ b/arch/sparc/lib/NG2memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -32,21 +33,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -140,45 +137,110 @@ fsrc2 %x6, %f12; \ fsrc2 %x7, %f14; #define FREG_LOAD_1(base, x0) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)) + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1) #define FREG_LOAD_2(base, x0, x1) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); #define FREG_LOAD_3(base, x0, x1, x2) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); #define FREG_LOAD_4(base, x0, x1, x2, x3) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); #define FREG_LOAD_5(base, x0, x1, x2, x3, x4) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); #define FREG_LOAD_6(base, x0, x1, x2, x3, x4, x5) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); #define FREG_LOAD_7(base, x0, x1, x2, x3, x4, x5, x6) \ - EX_LD_FP(LOAD(ldd, base + 0x00, %x0)); \ - EX_LD_FP(LOAD(ldd, base + 0x08, %x1)); \ - EX_LD_FP(LOAD(ldd, base + 0x10, %x2)); \ - EX_LD_FP(LOAD(ldd, base + 0x18, %x3)); \ - EX_LD_FP(LOAD(ldd, base + 0x20, %x4)); \ - EX_LD_FP(LOAD(ldd, base + 0x28, %x5)); \ - EX_LD_FP(LOAD(ldd, base + 0x30, %x6)); + EX_LD_FP(LOAD(ldd, base + 0x00, %x0), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x08, %x1), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x10, %x2), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x18, %x3), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x20, %x4), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x28, %x5), NG2_retl_o2_plus_g1); \ + EX_LD_FP(LOAD(ldd, base + 0x30, %x6), NG2_retl_o2_plus_g1); .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi +ENTRY(NG2_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG2_retl_o2) +ENTRY(NG2_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG2_retl_o2_plus_1) +ENTRY(NG2_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG2_retl_o2_plus_4) +ENTRY(NG2_retl_o2_plus_8) + ba,pt %xcc, __restore_asi + add %o2, 8, %o0 +ENDPROC(NG2_retl_o2_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_1) + add %o4, 1, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_1) +ENTRY(NG2_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_8) +ENTRY(NG2_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_plus_o4_plus_16) +ENTRY(NG2_retl_o2_plus_g1_fp) + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_64_fp) + add %g1, 64, %g1 + ba,pt %xcc, __restore_fp + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_64_fp) +ENTRY(NG2_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG2_retl_o2_plus_g1_plus_1) +ENTRY(NG2_retl_o2_and_7_plus_o4) + and %o2, 7, %o2 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4) +ENTRY(NG2_retl_o2_and_7_plus_o4_plus_8) + and %o2, 7, %o2 + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG2_retl_o2_and_7_plus_o4_plus_8) +#endif + .align 64 .globl FUNC_NAME @@ -230,8 +292,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %o4, %o4 ! bytes to align dst sub %o2, %o4, %o2 1: subcc %o4, 1, %o4 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_o4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG2_retl_o2_plus_o4_plus_1) add %o1, 1, %o1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -281,11 +343,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop /* fall through for 0 < low bits < 8 */ 110: sub %o4, 64, %g2 - EX_LD_FP(LOAD_BLK(%g2, %f0)) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) + EX_LD_FP(LOAD_BLK(%g2, %f0), NG2_retl_o2_plus_g1) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f14, f16) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_8(f16, f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -296,10 +358,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 120: sub %o4, 56, %g2 FREG_LOAD_7(%g2, f0, f2, f4, f6, f8, f10, f12) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f12, f16, f18) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_7(f18, f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -310,10 +372,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 130: sub %o4, 48, %g2 FREG_LOAD_6(%g2, f0, f2, f4, f6, f8, f10) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f10, f16, f18, f20) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_6(f20, f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -324,10 +386,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 140: sub %o4, 40, %g2 FREG_LOAD_5(%g2, f0, f2, f4, f6, f8) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f8, f16, f18, f20, f22) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_5(f22, f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -338,10 +400,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 150: sub %o4, 32, %g2 FREG_LOAD_4(%g2, f0, f2, f4, f6) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f6, f16, f18, f20, f22, f24) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_4(f24, f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -352,10 +414,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 160: sub %o4, 24, %g2 FREG_LOAD_3(%g2, f0, f2, f4) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f4, f16, f18, f20, f22, f24, f26) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_3(f26, f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -366,10 +428,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 170: sub %o4, 16, %g2 FREG_LOAD_2(%g2, f0, f2) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f2, f16, f18, f20, f22, f24, f26, f28) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_2(f28, f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -380,10 +442,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 180: sub %o4, 8, %g2 FREG_LOAD_1(%g2, f0) -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) - EX_LD_FP(LOAD_BLK(%o4, %f16)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) + EX_LD_FP(LOAD_BLK(%o4, %f16), NG2_retl_o2_plus_g1) FREG_FROB(f0, f16, f18, f20, f22, f24, f26, f28, f30) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1) FREG_MOVE_1(f30) subcc %g1, 64, %g1 add %o4, 64, %o4 @@ -393,10 +455,10 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ nop 190: -1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3)) +1: EX_ST_FP(STORE_INIT(%g0, %o4 + %g3), NG2_retl_o2_plus_g1) subcc %g1, 64, %g1 - EX_LD_FP(LOAD_BLK(%o4, %f0)) - EX_ST_FP(STORE_BLK(%f0, %o4 + %g3)) + EX_LD_FP(LOAD_BLK(%o4, %f0), NG2_retl_o2_plus_g1_plus_64) + EX_ST_FP(STORE_BLK(%f0, %o4 + %g3), NG2_retl_o2_plus_g1_plus_64) add %o4, 64, %o4 bne,pt %xcc, 1b LOAD(prefetch, %o4 + 64, #one_read) @@ -423,28 +485,28 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, %o4 and %o2, 0xf, %o2 1: subcc %o4, 0x10, %o4 - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x08, %o1 - EX_LD(LOAD(ldx, %o1, %g1)) + EX_LD(LOAD(ldx, %o1, %g1), NG2_retl_o2_plus_o4_plus_16) sub %o1, 0x08, %o1 - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_16) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE)) + EX_ST(STORE(stx, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_o4_plus_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldx, %o1, %o5), NG2_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %o5), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -460,8 +522,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %o5), NG2_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -477,16 +539,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, GLOBAL_SPARE andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), NG2_retl_o2) sub GLOBAL_SPARE, %g1, GLOBAL_SPARE andn %o2, 0x7, %o4 sllx %g2, %g1, %g2 1: add %o1, 0x8, %o1 - EX_LD(LOAD(ldx, %o1, %g3)) + EX_LD(LOAD(ldx, %o1, %g3), NG2_retl_o2_and_7_plus_o4) subcc %o4, 0x8, %o4 srlx %g3, GLOBAL_SPARE, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), NG2_retl_o2_and_7_plus_o4_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -506,8 +568,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(lduw, %o1, %g1), NG2_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -517,8 +579,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE)) + EX_LD(LOAD(ldub, %o1, %g1), NG2_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + GLOBAL_SPARE), NG2_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/NG4copy_from_user.S b/arch/sparc/lib/NG4copy_from_user.S index 2e8ee7a..16a286c 100644 --- a/arch/sparc/lib/NG4copy_from_user.S +++ b/arch/sparc/lib/NG4copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x, y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4copy_to_user.S b/arch/sparc/lib/NG4copy_to_user.S index be0bf45..6b0276f 100644 --- a/arch/sparc/lib/NG4copy_to_user.S +++ b/arch/sparc/lib/NG4copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 2012 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi;\ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_asi_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NG4memcpy.S b/arch/sparc/lib/NG4memcpy.S index 8e13ee1..75bb93b 100644 --- a/arch/sparc/lib/NG4memcpy.S +++ b/arch/sparc/lib/NG4memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -46,22 +47,19 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x +#define EX_ST_FP(x,y) x #endif -#ifndef EX_RETVAL -#define EX_RETVAL(x) x -#endif #ifndef LOAD #define LOAD(type,addr,dest) type [addr], dest @@ -94,6 +92,158 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi_fp: + VISExitHalf +__restore_asi: + retl + wr %g0, ASI_AIUS, %asi + +ENTRY(NG4_retl_o2) + ba,pt %xcc, __restore_asi + mov %o2, %o0 +ENDPROC(NG4_retl_o2) +ENTRY(NG4_retl_o2_plus_1) + ba,pt %xcc, __restore_asi + add %o2, 1, %o0 +ENDPROC(NG4_retl_o2_plus_1) +ENTRY(NG4_retl_o2_plus_4) + ba,pt %xcc, __restore_asi + add %o2, 4, %o0 +ENDPROC(NG4_retl_o2_plus_4) +ENTRY(NG4_retl_o2_plus_o5) + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5) +ENTRY(NG4_retl_o2_plus_o5_plus_4) + add %o5, 4, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_4) +ENTRY(NG4_retl_o2_plus_o5_plus_8) + add %o5, 8, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_8) +ENTRY(NG4_retl_o2_plus_o5_plus_16) + add %o5, 16, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_16) +ENTRY(NG4_retl_o2_plus_o5_plus_24) + add %o5, 24, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_24) +ENTRY(NG4_retl_o2_plus_o5_plus_32) + add %o5, 32, %o5 + ba,pt %xcc, __restore_asi + add %o2, %o5, %o0 +ENDPROC(NG4_retl_o2_plus_o5_plus_32) +ENTRY(NG4_retl_o2_plus_g1) + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1) +ENTRY(NG4_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_1) +ENTRY(NG4_retl_o2_plus_g1_plus_8) + add %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %o2, %g1, %o0 +ENDPROC(NG4_retl_o2_plus_g1_plus_8) +ENTRY(NG4_retl_o2_plus_o4) + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4) +ENTRY(NG4_retl_o2_plus_o4_plus_8) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8) +ENTRY(NG4_retl_o2_plus_o4_plus_16) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16) +ENTRY(NG4_retl_o2_plus_o4_plus_24) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24) +ENTRY(NG4_retl_o2_plus_o4_plus_32) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32) +ENTRY(NG4_retl_o2_plus_o4_plus_40) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40) +ENTRY(NG4_retl_o2_plus_o4_plus_48) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48) +ENTRY(NG4_retl_o2_plus_o4_plus_56) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56) +ENTRY(NG4_retl_o2_plus_o4_plus_64) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64) +ENTRY(NG4_retl_o2_plus_o4_fp) + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_8_fp) + add %o4, 8, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_8_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_16_fp) + add %o4, 16, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_16_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_24_fp) + add %o4, 24, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_24_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_32_fp) + add %o4, 32, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_32_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_40_fp) + add %o4, 40, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_40_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_48_fp) + add %o4, 48, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_48_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_56_fp) + add %o4, 56, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_56_fp) +ENTRY(NG4_retl_o2_plus_o4_plus_64_fp) + add %o4, 64, %o4 + ba,pt %xcc, __restore_asi_fp + add %o2, %o4, %o0 +ENDPROC(NG4_retl_o2_plus_o4_plus_64_fp) +#endif .align 64 .globl FUNC_NAME @@ -124,12 +274,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 51f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) + +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 51: LOAD(prefetch, %o1 + 0x040, #n_reads_strong) LOAD(prefetch, %o1 + 0x080, #n_reads_strong) @@ -154,43 +305,43 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, .Llarge_aligned sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 8, %o1 subcc %g1, 8, %g1 add %o0, 8, %o0 bne,pt %icc, 1b - EX_ST(STORE(stx, %g2, %o0 - 0x08)) + EX_ST(STORE(stx, %g2, %o0 - 0x08), NG4_retl_o2_plus_g1_plus_8) .Llarge_aligned: /* len >= 0x80 && src 8-byte aligned && dest 8-byte aligned */ andn %o2, 0x3f, %o4 sub %o2, %o4, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o4) add %o1, 0x40, %o1 - EX_LD(LOAD(ldx, %o1 - 0x38, %g2)) + EX_LD(LOAD(ldx, %o1 - 0x38, %g2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD(LOAD(ldx, %o1 - 0x30, %g3)) - EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 - 0x20, %o5)) - EX_ST(STORE_INIT(%g1, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x30, %g3), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x28, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_64) + EX_LD(LOAD(ldx, %o1 - 0x20, %o5), NG4_retl_o2_plus_o4_plus_64) + EX_ST(STORE_INIT(%g1, %o0), NG4_retl_o2_plus_o4_plus_64) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_56) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x18, %g2)) - EX_ST(STORE_INIT(%g3, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x18, %g2), NG4_retl_o2_plus_o4_plus_48) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_48) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x10, %g3)) - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x10, %g3), NG4_retl_o2_plus_o4_plus_40) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_40) add %o0, 0x08, %o0 - EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE)) - EX_ST(STORE_INIT(%o5, %o0)) + EX_LD(LOAD(ldx, %o1 - 0x08, GLOBAL_SPARE), NG4_retl_o2_plus_o4_plus_32) + EX_ST(STORE_INIT(%o5, %o0), NG4_retl_o2_plus_o4_plus_32) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g2, %o0)) + EX_ST(STORE_INIT(%g2, %o0), NG4_retl_o2_plus_o4_plus_24) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(%g3, %o0)) + EX_ST(STORE_INIT(%g3, %o0), NG4_retl_o2_plus_o4_plus_16) add %o0, 0x08, %o0 - EX_ST(STORE_INIT(GLOBAL_SPARE, %o0)) + EX_ST(STORE_INIT(GLOBAL_SPARE, %o0), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b LOAD(prefetch, %o1 + 0x200, #n_reads_strong) @@ -216,17 +367,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %o4, %o2 alignaddr %o1, %g0, %g1 add %o1, %o4, %o1 - EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0)) -1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2)) + EX_LD_FP(LOAD(ldd, %g1 + 0x00, %f0), NG4_retl_o2_plus_o4) +1: EX_LD_FP(LOAD(ldd, %g1 + 0x08, %f2), NG4_retl_o2_plus_o4) subcc %o4, 0x40, %o4 - EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4)) - EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6)) - EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8)) - EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10)) - EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12)) - EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14)) + EX_LD_FP(LOAD(ldd, %g1 + 0x10, %f4), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x18, %f6), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x20, %f8), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x28, %f10), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x30, %f12), NG4_retl_o2_plus_o4_plus_64) + EX_LD_FP(LOAD(ldd, %g1 + 0x38, %f14), NG4_retl_o2_plus_o4_plus_64) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0)) + EX_LD_FP(LOAD(ldd, %g1 + 0x40, %f0), NG4_retl_o2_plus_o4_plus_64) faligndata %f2, %f4, %f18 add %g1, 0x40, %g1 faligndata %f4, %f6, %f20 @@ -235,14 +386,14 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE(std, %f16, %o0 + 0x00)) - EX_ST_FP(STORE(std, %f18, %o0 + 0x08)) - EX_ST_FP(STORE(std, %f20, %o0 + 0x10)) - EX_ST_FP(STORE(std, %f22, %o0 + 0x18)) - EX_ST_FP(STORE(std, %f24, %o0 + 0x20)) - EX_ST_FP(STORE(std, %f26, %o0 + 0x28)) - EX_ST_FP(STORE(std, %f28, %o0 + 0x30)) - EX_ST_FP(STORE(std, %f30, %o0 + 0x38)) + EX_ST_FP(STORE(std, %f16, %o0 + 0x00), NG4_retl_o2_plus_o4_plus_64) + EX_ST_FP(STORE(std, %f18, %o0 + 0x08), NG4_retl_o2_plus_o4_plus_56) + EX_ST_FP(STORE(std, %f20, %o0 + 0x10), NG4_retl_o2_plus_o4_plus_48) + EX_ST_FP(STORE(std, %f22, %o0 + 0x18), NG4_retl_o2_plus_o4_plus_40) + EX_ST_FP(STORE(std, %f24, %o0 + 0x20), NG4_retl_o2_plus_o4_plus_32) + EX_ST_FP(STORE(std, %f26, %o0 + 0x28), NG4_retl_o2_plus_o4_plus_24) + EX_ST_FP(STORE(std, %f28, %o0 + 0x30), NG4_retl_o2_plus_o4_plus_16) + EX_ST_FP(STORE(std, %f30, %o0 + 0x38), NG4_retl_o2_plus_o4_plus_8) add %o0, 0x40, %o0 bne,pt %icc, 1b LOAD(prefetch, %g1 + 0x200, #n_reads_strong) @@ -270,37 +421,38 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andncc %o2, 0x20 - 1, %o5 be,pn %icc, 2f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g2)) - EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE)) - EX_LD(LOAD(ldx, %o1 + 0x18, %o4)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x08, %g2), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x10, GLOBAL_SPARE), NG4_retl_o2_plus_o5) + EX_LD(LOAD(ldx, %o1 + 0x18, %o4), NG4_retl_o2_plus_o5) add %o1, 0x20, %o1 subcc %o5, 0x20, %o5 - EX_ST(STORE(stx, %g1, %o0 + 0x00)) - EX_ST(STORE(stx, %g2, %o0 + 0x08)) - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10)) - EX_ST(STORE(stx, %o4, %o0 + 0x18)) + EX_ST(STORE(stx, %g1, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_32) + EX_ST(STORE(stx, %g2, %o0 + 0x08), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x10), NG4_retl_o2_plus_o5_plus_24) + EX_ST(STORE(stx, %o4, %o0 + 0x18), NG4_retl_o2_plus_o5_plus_8) bne,pt %icc, 1b add %o0, 0x20, %o0 2: andcc %o2, 0x18, %o5 be,pt %icc, 3f sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1)) + +1: EX_LD(LOAD(ldx, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 add %o0, 0x08, %o0 subcc %o5, 0x08, %o5 bne,pt %icc, 1b - EX_ST(STORE(stx, %g1, %o0 - 0x08)) + EX_ST(STORE(stx, %g1, %o0 - 0x08), NG4_retl_o2_plus_o5_plus_8) 3: brz,pt %o2, .Lexit cmp %o2, 0x04 bl,pn %icc, .Ltiny nop - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 0x04, %o1 add %o0, 0x04, %o0 subcc %o2, 0x04, %o2 bne,pn %icc, .Ltiny - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_4) ba,a,pt %icc, .Lexit .Lmedium_unaligned: /* First get dest 8 byte aligned. */ @@ -309,12 +461,12 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ brz,pt %g1, 2f sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g2), NG4_retl_o2_plus_g1) add %o1, 1, %o1 subcc %g1, 1, %g1 add %o0, 1, %o0 bne,pt %icc, 1b - EX_ST(STORE(stb, %g2, %o0 - 0x01)) + EX_ST(STORE(stb, %g2, %o0 - 0x01), NG4_retl_o2_plus_g1_plus_1) 2: and %o1, 0x7, %g1 brz,pn %g1, .Lmedium_noprefetch @@ -322,16 +474,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov 64, %g2 sub %g2, %g1, %g2 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1 + 0x00, %o4)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o4), NG4_retl_o2) sllx %o4, %g1, %o4 andn %o2, 0x08 - 1, %o5 sub %o2, %o5, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x08, %g3), NG4_retl_o2_plus_o5) add %o1, 0x08, %o1 subcc %o5, 0x08, %o5 srlx %g3, %g2, GLOBAL_SPARE or GLOBAL_SPARE, %o4, GLOBAL_SPARE - EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00)) + EX_ST(STORE(stx, GLOBAL_SPARE, %o0 + 0x00), NG4_retl_o2_plus_o5_plus_8) add %o0, 0x08, %o0 bne,pt %icc, 1b sllx %g3, %g1, %o4 @@ -342,17 +494,17 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ ba,pt %icc, .Lsmall_unaligned .Ltiny: - EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) + EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x00)) - EX_LD(LOAD(ldub, %o1 + 0x01, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x00), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x01, %g1), NG4_retl_o2) subcc %o2, 1, %o2 be,pn %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x01)) - EX_LD(LOAD(ldub, %o1 + 0x02, %g1)) + EX_ST(STORE(stb, %g1, %o0 + 0x01), NG4_retl_o2_plus_1) + EX_LD(LOAD(ldub, %o1 + 0x02, %g1), NG4_retl_o2) ba,pt %icc, .Lexit - EX_ST(STORE(stb, %g1, %o0 + 0x02)) + EX_ST(STORE(stb, %g1, %o0 + 0x02), NG4_retl_o2) .Lsmall: andcc %g2, 0x3, %g0 @@ -360,22 +512,22 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x4 - 1, %o5 sub %o2, %o5, %o2 1: - EX_LD(LOAD(lduw, %o1 + 0x00, %g1)) + EX_LD(LOAD(lduw, %o1 + 0x00, %g1), NG4_retl_o2_plus_o5) add %o1, 0x04, %o1 subcc %o5, 0x04, %o5 add %o0, 0x04, %o0 bne,pt %icc, 1b - EX_ST(STORE(stw, %g1, %o0 - 0x04)) + EX_ST(STORE(stw, %g1, %o0 - 0x04), NG4_retl_o2_plus_o5_plus_4) brz,pt %o2, .Lexit nop ba,a,pt %icc, .Ltiny .Lsmall_unaligned: -1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1)) +1: EX_LD(LOAD(ldub, %o1 + 0x00, %g1), NG4_retl_o2) add %o1, 1, %o1 add %o0, 1, %o0 subcc %o2, 1, %o2 bne,pt %icc, 1b - EX_ST(STORE(stb, %g1, %o0 - 0x01)) + EX_ST(STORE(stb, %g1, %o0 - 0x01), NG4_retl_o2_plus_1) ba,a,pt %icc, .Lexit .size FUNC_NAME, .-FUNC_NAME diff --git a/arch/sparc/lib/NGcopy_from_user.S b/arch/sparc/lib/NGcopy_from_user.S index 5d1e4d1..9cd42fc 100644 --- a/arch/sparc/lib/NGcopy_from_user.S +++ b/arch/sparc/lib/NGcopy_from_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGcopy_to_user.S b/arch/sparc/lib/NGcopy_to_user.S index ff630dc..5c358af 100644 --- a/arch/sparc/lib/NGcopy_to_user.S +++ b/arch/sparc/lib/NGcopy_to_user.S @@ -3,11 +3,11 @@ * Copyright (C) 2006, 2007 David S. Miller (davem@davemloft.net) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __ret_one_asi;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/NGmemcpy.S b/arch/sparc/lib/NGmemcpy.S index 96a14ca..d88c4ed 100644 --- a/arch/sparc/lib/NGmemcpy.S +++ b/arch/sparc/lib/NGmemcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/asi.h> #include <asm/thread_info.h> #define GLOBAL_SPARE %g7 @@ -27,15 +28,11 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST(x,y) x #endif #ifndef LOAD @@ -79,6 +76,92 @@ .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_asi: + ret + wr %g0, ASI_AIUS, %asi + restore +ENTRY(NG_ret_i2_plus_i4_plus_1) + ba,pt %xcc, __restore_asi + add %i2, %i5, %i0 +ENDPROC(NG_ret_i2_plus_i4_plus_1) +ENTRY(NG_ret_i2_plus_g1) + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1) +ENTRY(NG_ret_i2_plus_g1_minus_8) + sub %g1, 8, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_8) +ENTRY(NG_ret_i2_plus_g1_minus_16) + sub %g1, 16, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_16) +ENTRY(NG_ret_i2_plus_g1_minus_24) + sub %g1, 24, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_24) +ENTRY(NG_ret_i2_plus_g1_minus_32) + sub %g1, 32, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_32) +ENTRY(NG_ret_i2_plus_g1_minus_40) + sub %g1, 40, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_40) +ENTRY(NG_ret_i2_plus_g1_minus_48) + sub %g1, 48, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_48) +ENTRY(NG_ret_i2_plus_g1_minus_56) + sub %g1, 56, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_minus_56) +ENTRY(NG_ret_i2_plus_i4) + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4) +ENTRY(NG_ret_i2_plus_i4_minus_8) + sub %i4, 8, %i4 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_plus_i4_minus_8) +ENTRY(NG_ret_i2_plus_8) + ba,pt %xcc, __restore_asi + add %i2, 8, %i0 +ENDPROC(NG_ret_i2_plus_8) +ENTRY(NG_ret_i2_plus_4) + ba,pt %xcc, __restore_asi + add %i2, 4, %i0 +ENDPROC(NG_ret_i2_plus_4) +ENTRY(NG_ret_i2_plus_1) + ba,pt %xcc, __restore_asi + add %i2, 1, %i0 +ENDPROC(NG_ret_i2_plus_1) +ENTRY(NG_ret_i2_plus_g1_plus_1) + add %g1, 1, %g1 + ba,pt %xcc, __restore_asi + add %i2, %g1, %i0 +ENDPROC(NG_ret_i2_plus_g1_plus_1) +ENTRY(NG_ret_i2) + ba,pt %xcc, __restore_asi + mov %i2, %i0 +ENDPROC(NG_ret_i2) +ENTRY(NG_ret_i2_and_7_plus_i4) + and %i2, 7, %i2 + ba,pt %xcc, __restore_asi + add %i2, %i4, %i0 +ENDPROC(NG_ret_i2_and_7_plus_i4) +#endif + .align 64 .globl FUNC_NAME @@ -126,8 +209,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %g0, %i4, %i4 ! bytes to align dst sub %i2, %i4, %i2 1: subcc %i4, 1, %i4 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %o0)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_i4_plus_1) + EX_ST(STORE(stb, %g1, %o0), NG_ret_i2_plus_i4_plus_1) add %i1, 1, %i1 bne,pt %XCC, 1b add %o0, 1, %o0 @@ -160,7 +243,7 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ and %i4, 0x7, GLOBAL_SPARE sll GLOBAL_SPARE, 3, GLOBAL_SPARE mov 64, %i5 - EX_LD(LOAD_TWIN(%i1, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1, %g2, %g3), NG_ret_i2_plus_g1) sub %i5, GLOBAL_SPARE, %i5 mov 16, %o4 mov 32, %o5 @@ -178,31 +261,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ srlx WORD3, PRE_SHIFT, TMP; \ or WORD2, TMP, WORD2; -8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +8: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g2, %o0 + 0x00)) - EX_ST(STORE_INIT(%g3, %o0 + 0x08)) + EX_ST(STORE_INIT(%g2, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%g3, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g2, %g3, %o2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g2, %o0 + 0x20)) - EX_ST(STORE_INIT(%g3, %o0 + 0x28)) + EX_ST(STORE_INIT(%g2, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%g3, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o2, %o3, %g2, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 8b @@ -211,31 +294,31 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ ba,pt %XCC, 60f add %i1, %i4, %i1 -9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3)) +9: EX_LD(LOAD_TWIN(%i1 + %o4, %o2, %o3), NG_ret_i2_plus_g1) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) LOAD(prefetch, %i1 + %i3, #one_read) - EX_ST(STORE_INIT(%g3, %o0 + 0x00)) - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) + EX_ST(STORE_INIT(%g3, %o0 + 0x00), NG_ret_i2_plus_g1) + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) - EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %o5, %g2, %g3), NG_ret_i2_plus_g1_minus_16) MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%g2, %o0 + 0x18)) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%g2, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1_minus_32) MIX_THREE_WORDS(%g3, %o2, %o3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%g3, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) + EX_ST(STORE_INIT(%g3, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) - EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3)) + EX_LD(LOAD_TWIN(%i1 + %i3, %g2, %g3), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 MIX_THREE_WORDS(%o3, %g2, %g3, %i5, GLOBAL_SPARE, %o1) - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%g2, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%g2, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 9b @@ -249,25 +332,25 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ * one twin load ahead, then add 8 back into source when * we finish the loop. */ - EX_LD(LOAD_TWIN(%i1, %o4, %o5)) + EX_LD(LOAD_TWIN(%i1, %o4, %o5), NG_ret_i2_plus_g1) mov 16, %o7 mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o5, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o2, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o3, %o0 + 0x10)) - EX_ST(STORE_INIT(%o4, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) - EX_ST(STORE_INIT(%o5, %o0 + 0x20)) - EX_ST(STORE_INIT(%o2, %o0 + 0x28)) - EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5)) + EX_ST(STORE_INIT(%o5, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o2, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o4, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o2, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_LD(LOAD_TWIN(%i1 + %o1, %o4, %o5), NG_ret_i2_plus_g1_minus_48) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o3, %o0 + 0x30)) - EX_ST(STORE_INIT(%o4, %o0 + 0x38)) + EX_ST(STORE_INIT(%o3, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o4, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -282,20 +365,20 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ mov 32, %g2 mov 48, %g3 mov 64, %o1 -1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5)) - EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3)) +1: EX_LD(LOAD_TWIN(%i1 + %g0, %o4, %o5), NG_ret_i2_plus_g1) + EX_LD(LOAD_TWIN(%i1 + %o7, %o2, %o3), NG_ret_i2_plus_g1) LOAD(prefetch, %i1 + %o1, #one_read) - EX_ST(STORE_INIT(%o4, %o0 + 0x00)) ! initializes cache line - EX_ST(STORE_INIT(%o5, %o0 + 0x08)) - EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5)) - EX_ST(STORE_INIT(%o2, %o0 + 0x10)) - EX_ST(STORE_INIT(%o3, %o0 + 0x18)) - EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3)) + EX_ST(STORE_INIT(%o4, %o0 + 0x00), NG_ret_i2_plus_g1) ! initializes cache line + EX_ST(STORE_INIT(%o5, %o0 + 0x08), NG_ret_i2_plus_g1_minus_8) + EX_LD(LOAD_TWIN(%i1 + %g2, %o4, %o5), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o2, %o0 + 0x10), NG_ret_i2_plus_g1_minus_16) + EX_ST(STORE_INIT(%o3, %o0 + 0x18), NG_ret_i2_plus_g1_minus_24) + EX_LD(LOAD_TWIN(%i1 + %g3, %o2, %o3), NG_ret_i2_plus_g1_minus_32) add %i1, 64, %i1 - EX_ST(STORE_INIT(%o4, %o0 + 0x20)) - EX_ST(STORE_INIT(%o5, %o0 + 0x28)) - EX_ST(STORE_INIT(%o2, %o0 + 0x30)) - EX_ST(STORE_INIT(%o3, %o0 + 0x38)) + EX_ST(STORE_INIT(%o4, %o0 + 0x20), NG_ret_i2_plus_g1_minus_32) + EX_ST(STORE_INIT(%o5, %o0 + 0x28), NG_ret_i2_plus_g1_minus_40) + EX_ST(STORE_INIT(%o2, %o0 + 0x30), NG_ret_i2_plus_g1_minus_48) + EX_ST(STORE_INIT(%o3, %o0 + 0x38), NG_ret_i2_plus_g1_minus_56) subcc %g1, 64, %g1 bne,pt %XCC, 1b add %o0, 64, %o0 @@ -321,28 +404,28 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ andn %i2, 0xf, %i4 and %i2, 0xf, %i2 1: subcc %i4, 0x10, %i4 - EX_LD(LOAD(ldx, %i1, %o4)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_i4) add %i1, 0x08, %i1 - EX_LD(LOAD(ldx, %i1, %g1)) + EX_LD(LOAD(ldx, %i1, %g1), NG_ret_i2_plus_i4) sub %i1, 0x08, %i1 - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_i4) add %i1, 0x8, %i1 - EX_ST(STORE(stx, %g1, %i1 + %i3)) + EX_ST(STORE(stx, %g1, %i1 + %i3), NG_ret_i2_plus_i4_minus_8) bgu,pt %XCC, 1b add %i1, 0x8, %i1 73: andcc %i2, 0x8, %g0 be,pt %XCC, 1f nop sub %i2, 0x8, %i2 - EX_LD(LOAD(ldx, %i1, %o4)) - EX_ST(STORE(stx, %o4, %i1 + %i3)) + EX_LD(LOAD(ldx, %i1, %o4), NG_ret_i2_plus_8) + EX_ST(STORE(stx, %o4, %i1 + %i3), NG_ret_i2_plus_8) add %i1, 0x8, %i1 1: andcc %i2, 0x4, %g0 be,pt %XCC, 1f nop sub %i2, 0x4, %i2 - EX_LD(LOAD(lduw, %i1, %i5)) - EX_ST(STORE(stw, %i5, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %i5), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %i5, %i1 + %i3), NG_ret_i2_plus_4) add %i1, 0x4, %i1 1: cmp %i2, 0 be,pt %XCC, 85f @@ -358,8 +441,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ sub %i2, %g1, %i2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %i1, %i5)) - EX_ST(STORE(stb, %i5, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %i5), NG_ret_i2_plus_g1_plus_1) + EX_ST(STORE(stb, %i5, %i1 + %i3), NG_ret_i2_plus_g1_plus_1) bgu,pt %icc, 1b add %i1, 1, %i1 @@ -375,16 +458,16 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 8: mov 64, %i3 andn %i1, 0x7, %i1 - EX_LD(LOAD(ldx, %i1, %g2)) + EX_LD(LOAD(ldx, %i1, %g2), NG_ret_i2) sub %i3, %g1, %i3 andn %i2, 0x7, %i4 sllx %g2, %g1, %g2 1: add %i1, 0x8, %i1 - EX_LD(LOAD(ldx, %i1, %g3)) + EX_LD(LOAD(ldx, %i1, %g3), NG_ret_i2_and_7_plus_i4) subcc %i4, 0x8, %i4 srlx %g3, %i3, %i5 or %i5, %g2, %i5 - EX_ST(STORE(stx, %i5, %o0)) + EX_ST(STORE(stx, %i5, %o0), NG_ret_i2_and_7_plus_i4) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -404,8 +487,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ 1: subcc %i2, 4, %i2 - EX_LD(LOAD(lduw, %i1, %g1)) - EX_ST(STORE(stw, %g1, %i1 + %i3)) + EX_LD(LOAD(lduw, %i1, %g1), NG_ret_i2_plus_4) + EX_ST(STORE(stw, %g1, %i1 + %i3), NG_ret_i2_plus_4) bgu,pt %XCC, 1b add %i1, 4, %i1 @@ -415,8 +498,8 @@ FUNC_NAME: /* %i0=dst, %i1=src, %i2=len */ .align 32 90: subcc %i2, 1, %i2 - EX_LD(LOAD(ldub, %i1, %g1)) - EX_ST(STORE(stb, %g1, %i1 + %i3)) + EX_LD(LOAD(ldub, %i1, %g1), NG_ret_i2_plus_1) + EX_ST(STORE(stb, %g1, %i1 + %i3), NG_ret_i2_plus_1) bgu,pt %XCC, 90b add %i1, 1, %i1 ret diff --git a/arch/sparc/lib/U1copy_from_user.S b/arch/sparc/lib/U1copy_from_user.S index ecc5692..bb6ff73 100644 --- a/arch/sparc/lib/U1copy_from_user.S +++ b/arch/sparc/lib/U1copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1copy_to_user.S b/arch/sparc/lib/U1copy_to_user.S index 9eea392..ed92ce73 100644 --- a/arch/sparc/lib/U1copy_to_user.S +++ b/arch/sparc/lib/U1copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U1memcpy.S b/arch/sparc/lib/U1memcpy.S index 97e1b21..4f0d50b 100644 --- a/arch/sparc/lib/U1memcpy.S +++ b/arch/sparc/lib/U1memcpy.S @@ -5,6 +5,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #include <asm/export.h> @@ -24,21 +25,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -79,53 +76,169 @@ faligndata %f7, %f8, %f60; \ faligndata %f8, %f9, %f62; -#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, len, jmptgt) \ - EX_LD_FP(LOAD_BLK(%src, %fdest)); \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ - add %src, 0x40, %src; \ - subcc %len, 0x40, %len; \ - be,pn %xcc, jmptgt; \ - add %dest, 0x40, %dest; \ - -#define LOOP_CHUNK1(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f0, f48, len, branch_dest) -#define LOOP_CHUNK2(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f16, f48, len, branch_dest) -#define LOOP_CHUNK3(src, dest, len, branch_dest) \ - MAIN_LOOP_CHUNK(src, dest, f32, f48, len, branch_dest) +#define MAIN_LOOP_CHUNK(src, dest, fdest, fsrc, jmptgt) \ + EX_LD_FP(LOAD_BLK(%src, %fdest), U1_gs_80_fp); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ + add %src, 0x40, %src; \ + subcc %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE; \ + be,pn %xcc, jmptgt; \ + add %dest, 0x40, %dest; \ + +#define LOOP_CHUNK1(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f0, f48, branch_dest) +#define LOOP_CHUNK2(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f16, f48, branch_dest) +#define LOOP_CHUNK3(src, dest, branch_dest) \ + MAIN_LOOP_CHUNK(src, dest, f32, f48, branch_dest) #define DO_SYNC membar #Sync; #define STORE_SYNC(dest, fsrc) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_80_fp); \ add %dest, 0x40, %dest; \ DO_SYNC #define STORE_JUMP(dest, fsrc, target) \ - EX_ST_FP(STORE_BLK(%fsrc, %dest)); \ + EX_ST_FP(STORE_BLK(%fsrc, %dest), U1_gs_40_fp); \ add %dest, 0x40, %dest; \ ba,pt %xcc, target; \ nop; -#define FINISH_VISCHUNK(dest, f0, f1, left) \ - subcc %left, 8, %left;\ - bl,pn %xcc, 95f; \ - faligndata %f0, %f1, %f48; \ - EX_ST_FP(STORE(std, %f48, %dest)); \ +#define FINISH_VISCHUNK(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ + faligndata %f0, %f1, %f48; \ + EX_ST_FP(STORE(std, %f48, %dest), U1_g3_8_fp); \ add %dest, 8, %dest; -#define UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ - subcc %left, 8, %left; \ - bl,pn %xcc, 95f; \ +#define UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ + subcc %g3, 8, %g3; \ + bl,pn %xcc, 95f; \ fsrc2 %f0, %f1; -#define UNEVEN_VISCHUNK(dest, f0, f1, left) \ - UNEVEN_VISCHUNK_LAST(dest, f0, f1, left) \ +#define UNEVEN_VISCHUNK(dest, f0, f1) \ + UNEVEN_VISCHUNK_LAST(dest, f0, f1) \ ba,a,pt %xcc, 93f; .register %g2,#scratch .register %g3,#scratch .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +ENTRY(U1_g1_1_fp) + VISExitHalf + add %g1, 1, %g1 + add %g1, %g2, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1_fp) +ENTRY(U1_g2_0_fp) + VISExitHalf + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_0_fp) +ENTRY(U1_g2_8_fp) + VISExitHalf + add %g2, 8, %g2 + retl + add %g2, %o2, %o0 +ENDPROC(U1_g2_8_fp) +ENTRY(U1_gs_0_fp) + VISExitHalf + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_0_fp) +ENTRY(U1_gs_80_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_80_fp) +ENTRY(U1_gs_40_fp) + VISExitHalf + add %GLOBAL_SPARE, 0x40, %GLOBAL_SPARE + add %GLOBAL_SPARE, %g3, %o0 + retl + add %o0, %o2, %o0 +ENDPROC(U1_gs_40_fp) +ENTRY(U1_g3_0_fp) + VISExitHalf + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_0_fp) +ENTRY(U1_g3_8_fp) + VISExitHalf + add %g3, 8, %g3 + retl + add %g3, %o2, %o0 +ENDPROC(U1_g3_8_fp) +ENTRY(U1_o2_0_fp) + VISExitHalf + retl + mov %o2, %o0 +ENDPROC(U1_o2_0_fp) +ENTRY(U1_o2_1_fp) + VISExitHalf + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1_fp) +ENTRY(U1_gs_0) + VISExitHalf + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0) +ENTRY(U1_gs_8) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x8, %o0 +ENDPROC(U1_gs_8) +ENTRY(U1_gs_10) + VISExitHalf + add %GLOBAL_SPARE, %o2, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, 0x10, %o0 +ENDPROC(U1_gs_10) +ENTRY(U1_o2_0) + retl + mov %o2, %o0 +ENDPROC(U1_o2_0) +ENTRY(U1_o2_8) + retl + add %o2, 8, %o0 +ENDPROC(U1_o2_8) +ENTRY(U1_o2_4) + retl + add %o2, 4, %o0 +ENDPROC(U1_o2_4) +ENTRY(U1_o2_1) + retl + add %o2, 1, %o0 +ENDPROC(U1_o2_1) +ENTRY(U1_g1_0) + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_0) +ENTRY(U1_g1_1) + add %g1, 1, %g1 + retl + add %g1, %o2, %o0 +ENDPROC(U1_g1_1) +ENTRY(U1_gs_0_o2_adj) + and %o2, 7, %o2 + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_0_o2_adj) +ENTRY(U1_gs_8_o2_adj) + and %o2, 7, %o2 + add %GLOBAL_SPARE, 8, %GLOBAL_SPARE + retl + add %GLOBAL_SPARE, %o2, %o0 +ENDPROC(U1_gs_8_o2_adj) +#endif + .align 64 .globl FUNC_NAME @@ -167,8 +280,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U1_g1_1_fp) + EX_ST_FP(STORE(stb, %o3, %o1 + %GLOBAL_SPARE), U1_g1_1_fp) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -179,20 +292,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U1_g2_0_fp) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U1_g2_0_fp) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U1_g2_8_fp) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -215,13 +328,13 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ add %g1, %GLOBAL_SPARE, %g1 subcc %o2, %g3, %o2 - EX_LD_FP(LOAD_BLK(%o1, %f0)) + EX_LD_FP(LOAD_BLK(%o1, %f0), U1_gs_0_fp) add %o1, 0x40, %o1 add %g1, %g3, %g1 - EX_LD_FP(LOAD_BLK(%o1, %f16)) + EX_LD_FP(LOAD_BLK(%o1, %f16), U1_gs_0_fp) add %o1, 0x40, %o1 sub %GLOBAL_SPARE, 0x80, %GLOBAL_SPARE - EX_LD_FP(LOAD_BLK(%o1, %f32)) + EX_LD_FP(LOAD_BLK(%o1, %f32), U1_gs_80_fp) add %o1, 0x40, %o1 /* There are 8 instances of the unrolled loop, @@ -241,11 +354,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: FREG_FROB(f0, f2, f4, f6, f8, f10,f12,f14,f16) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f32,f34,f36,f38,f40,f42,f44,f46,f0) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f0, %f2, %f48 1: FREG_FROB(f16,f18,f20,f22,f24,f26,f28,f30,f32) @@ -262,11 +375,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 56f) 1: FREG_FROB(f2, f4, f6, f8, f10,f12,f14,f16,f18) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f34,f36,f38,f40,f42,f44,f46,f0, f2) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f2, %f4, %f48 1: FREG_FROB(f18,f20,f22,f24,f26,f28,f30,f32,f34) @@ -283,11 +396,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 57f) 1: FREG_FROB(f4, f6, f8, f10,f12,f14,f16,f18,f20) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f36,f38,f40,f42,f44,f46,f0, f2, f4) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f4, %f6, %f48 1: FREG_FROB(f20,f22,f24,f26,f28,f30,f32,f34,f36) @@ -304,11 +417,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 58f) 1: FREG_FROB(f6, f8, f10,f12,f14,f16,f18,f20,f22) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f38,f40,f42,f44,f46,f0, f2, f4, f6) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f6, %f8, %f48 1: FREG_FROB(f22,f24,f26,f28,f30,f32,f34,f36,f38) @@ -325,11 +438,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 59f) 1: FREG_FROB(f8, f10,f12,f14,f16,f18,f20,f22,f24) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f40,f42,f44,f46,f0, f2, f4, f6, f8) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f8, %f10, %f48 1: FREG_FROB(f24,f26,f28,f30,f32,f34,f36,f38,f40) @@ -346,11 +459,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 60f) 1: FREG_FROB(f10,f12,f14,f16,f18,f20,f22,f24,f26) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f42,f44,f46,f0, f2, f4, f6, f8, f10) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f10, %f12, %f48 1: FREG_FROB(f26,f28,f30,f32,f34,f36,f38,f40,f42) @@ -367,11 +480,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 61f) 1: FREG_FROB(f12,f14,f16,f18,f20,f22,f24,f26,f28) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f44,f46,f0, f2, f4, f6, f8, f10,f12) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f12, %f14, %f48 1: FREG_FROB(f28,f30,f32,f34,f36,f38,f40,f42,f44) @@ -388,11 +501,11 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ STORE_JUMP(o0, f48, 62f) 1: FREG_FROB(f14,f16,f18,f20,f22,f24,f26,f28,f30) - LOOP_CHUNK1(o1, o0, GLOBAL_SPARE, 1f) + LOOP_CHUNK1(o1, o0, 1f) FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) - LOOP_CHUNK2(o1, o0, GLOBAL_SPARE, 2f) + LOOP_CHUNK2(o1, o0, 2f) FREG_FROB(f46,f0, f2, f4, f6, f8, f10,f12,f14) - LOOP_CHUNK3(o1, o0, GLOBAL_SPARE, 3f) + LOOP_CHUNK3(o1, o0, 3f) ba,pt %xcc, 1b+4 faligndata %f14, %f16, %f48 1: FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) @@ -408,53 +521,53 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ FREG_FROB(f30,f32,f34,f36,f38,f40,f42,f44,f46) STORE_JUMP(o0, f48, 63f) -40: FINISH_VISCHUNK(o0, f0, f2, g3) -41: FINISH_VISCHUNK(o0, f2, f4, g3) -42: FINISH_VISCHUNK(o0, f4, f6, g3) -43: FINISH_VISCHUNK(o0, f6, f8, g3) -44: FINISH_VISCHUNK(o0, f8, f10, g3) -45: FINISH_VISCHUNK(o0, f10, f12, g3) -46: FINISH_VISCHUNK(o0, f12, f14, g3) -47: UNEVEN_VISCHUNK(o0, f14, f0, g3) -48: FINISH_VISCHUNK(o0, f16, f18, g3) -49: FINISH_VISCHUNK(o0, f18, f20, g3) -50: FINISH_VISCHUNK(o0, f20, f22, g3) -51: FINISH_VISCHUNK(o0, f22, f24, g3) -52: FINISH_VISCHUNK(o0, f24, f26, g3) -53: FINISH_VISCHUNK(o0, f26, f28, g3) -54: FINISH_VISCHUNK(o0, f28, f30, g3) -55: UNEVEN_VISCHUNK(o0, f30, f0, g3) -56: FINISH_VISCHUNK(o0, f32, f34, g3) -57: FINISH_VISCHUNK(o0, f34, f36, g3) -58: FINISH_VISCHUNK(o0, f36, f38, g3) -59: FINISH_VISCHUNK(o0, f38, f40, g3) -60: FINISH_VISCHUNK(o0, f40, f42, g3) -61: FINISH_VISCHUNK(o0, f42, f44, g3) -62: FINISH_VISCHUNK(o0, f44, f46, g3) -63: UNEVEN_VISCHUNK_LAST(o0, f46, f0, g3) - -93: EX_LD_FP(LOAD(ldd, %o1, %f2)) +40: FINISH_VISCHUNK(o0, f0, f2) +41: FINISH_VISCHUNK(o0, f2, f4) +42: FINISH_VISCHUNK(o0, f4, f6) +43: FINISH_VISCHUNK(o0, f6, f8) +44: FINISH_VISCHUNK(o0, f8, f10) +45: FINISH_VISCHUNK(o0, f10, f12) +46: FINISH_VISCHUNK(o0, f12, f14) +47: UNEVEN_VISCHUNK(o0, f14, f0) +48: FINISH_VISCHUNK(o0, f16, f18) +49: FINISH_VISCHUNK(o0, f18, f20) +50: FINISH_VISCHUNK(o0, f20, f22) +51: FINISH_VISCHUNK(o0, f22, f24) +52: FINISH_VISCHUNK(o0, f24, f26) +53: FINISH_VISCHUNK(o0, f26, f28) +54: FINISH_VISCHUNK(o0, f28, f30) +55: UNEVEN_VISCHUNK(o0, f30, f0) +56: FINISH_VISCHUNK(o0, f32, f34) +57: FINISH_VISCHUNK(o0, f34, f36) +58: FINISH_VISCHUNK(o0, f36, f38) +59: FINISH_VISCHUNK(o0, f38, f40) +60: FINISH_VISCHUNK(o0, f40, f42) +61: FINISH_VISCHUNK(o0, f42, f44) +62: FINISH_VISCHUNK(o0, f44, f46) +63: UNEVEN_VISCHUNK_LAST(o0, f46, f0) + +93: EX_LD_FP(LOAD(ldd, %o1, %f2), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bl,pn %xcc, 95f add %o0, 8, %o0 - EX_LD_FP(LOAD(ldd, %o1, %f0)) + EX_LD_FP(LOAD(ldd, %o1, %f0), U1_g3_0_fp) add %o1, 8, %o1 subcc %g3, 8, %g3 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U1_g3_8_fp) bge,pt %xcc, 93b add %o0, 8, %o0 95: brz,pt %o2, 2f mov %g1, %o1 -1: EX_LD_FP(LOAD(ldub, %o1, %o3)) +1: EX_LD_FP(LOAD(ldub, %o1, %o3), U1_o2_0_fp) add %o1, 1, %o1 subcc %o2, 1, %o2 - EX_ST_FP(STORE(stb, %o3, %o0)) + EX_ST_FP(STORE(stb, %o3, %o0), U1_o2_1_fp) bne,pt %xcc, 1b add %o0, 1, %o0 @@ -470,27 +583,27 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 72: andn %o2, 0xf, %GLOBAL_SPARE and %o2, 0xf, %o2 -1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) +1: EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U1_gs_0) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U1_gs_0) subcc %GLOBAL_SPARE, 0x10, %GLOBAL_SPARE - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_gs_10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U1_gs_8) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) + EX_LD(LOAD(ldx, %o1, %o5), U1_o2_0) sub %o2, 0x8, %o2 - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_ST(STORE(stx, %o5, %o1 + %o3), U1_o2_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) + EX_LD(LOAD(lduw, %o1, %o5), U1_o2_0) sub %o2, 0x4, %o2 - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_ST(STORE(stw, %o5, %o1 + %o3), U1_o2_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -504,9 +617,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %g0, %g1, %g1 sub %o2, %g1, %o2 -1: EX_LD(LOAD(ldub, %o1, %o5)) +1: EX_LD(LOAD(ldub, %o1, %o5), U1_g1_0) subcc %g1, 1, %g1 - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U1_g1_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -522,16 +635,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U1_o2_0) sub %o3, %g1, %o3 andn %o2, 0x7, %GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U1_gs_0_o2_adj) subcc %GLOBAL_SPARE, 0x8, %GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U1_gs_8_o2_adj) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -549,9 +662,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ bne,pn %XCC, 90f sub %o0, %o1, %o3 -1: EX_LD(LOAD(lduw, %o1, %g1)) +1: EX_LD(LOAD(lduw, %o1, %g1), U1_o2_0) subcc %o2, 4, %o2 - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_ST(STORE(stw, %g1, %o1 + %o3), U1_o2_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -559,9 +672,9 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ mov EX_RETVAL(%o4), %o0 .align 32 -90: EX_LD(LOAD(ldub, %o1, %g1)) +90: EX_LD(LOAD(ldub, %o1, %g1), U1_o2_0) subcc %o2, 1, %o2 - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_ST(STORE(stb, %g1, %o1 + %o3), U1_o2_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/U3copy_from_user.S b/arch/sparc/lib/U3copy_from_user.S index 88ad73d..db73010 100644 --- a/arch/sparc/lib/U3copy_from_user.S +++ b/arch/sparc/lib/U3copy_from_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_LD(x) \ +#define EX_LD(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_LD_FP(x) \ +#define EX_LD_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3copy_to_user.S b/arch/sparc/lib/U3copy_to_user.S index 845139d..c4ee858 100644 --- a/arch/sparc/lib/U3copy_to_user.S +++ b/arch/sparc/lib/U3copy_to_user.S @@ -3,19 +3,19 @@ * Copyright (C) 1999, 2000, 2004 David S. Miller (davem@redhat.com) */ -#define EX_ST(x) \ +#define EX_ST(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, y; \ .text; \ .align 4; -#define EX_ST_FP(x) \ +#define EX_ST_FP(x,y) \ 98: x; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one_fp;\ + .word 98b, y##_fp; \ .text; \ .align 4; diff --git a/arch/sparc/lib/U3memcpy.S b/arch/sparc/lib/U3memcpy.S index 491ee69..54f9870 100644 --- a/arch/sparc/lib/U3memcpy.S +++ b/arch/sparc/lib/U3memcpy.S @@ -4,6 +4,7 @@ */ #ifdef __KERNEL__ +#include <linux/linkage.h> #include <asm/visasm.h> #include <asm/asi.h> #define GLOBAL_SPARE %g7 @@ -22,21 +23,17 @@ #endif #ifndef EX_LD -#define EX_LD(x) x +#define EX_LD(x,y) x #endif #ifndef EX_LD_FP -#define EX_LD_FP(x) x +#define EX_LD_FP(x,y) x #endif #ifndef EX_ST -#define EX_ST(x) x +#define EX_ST(x,y) x #endif #ifndef EX_ST_FP -#define EX_ST_FP(x) x -#endif - -#ifndef EX_RETVAL -#define EX_RETVAL(x) x +#define EX_ST_FP(x,y) x #endif #ifndef LOAD @@ -77,6 +74,87 @@ */ .text +#ifndef EX_RETVAL +#define EX_RETVAL(x) x +__restore_fp: + VISExitHalf + retl + nop +ENTRY(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) + add %g1, 1, %g1 + add %g2, %g1, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_g1_plus_1_fp) +ENTRY(U3_retl_o2_plus_g2_fp) + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_fp) +ENTRY(U3_retl_o2_plus_g2_plus_8_fp) + add %g2, 8, %g2 + ba,pt %xcc, __restore_fp + add %o2, %g2, %o0 +ENDPROC(U3_retl_o2_plus_g2_plus_8_fp) +ENTRY(U3_retl_o2) + retl + mov %o2, %o0 +ENDPROC(U3_retl_o2) +ENTRY(U3_retl_o2_plus_1) + retl + add %o2, 1, %o0 +ENDPROC(U3_retl_o2_plus_1) +ENTRY(U3_retl_o2_plus_4) + retl + add %o2, 4, %o0 +ENDPROC(U3_retl_o2_plus_4) +ENTRY(U3_retl_o2_plus_8) + retl + add %o2, 8, %o0 +ENDPROC(U3_retl_o2_plus_8) +ENTRY(U3_retl_o2_plus_g1_plus_1) + add %g1, 1, %g1 + retl + add %o2, %g1, %o0 +ENDPROC(U3_retl_o2_plus_g1_plus_1) +ENTRY(U3_retl_o2_fp) + ba,pt %xcc, __restore_fp + mov %o2, %o0 +ENDPROC(U3_retl_o2_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) + sll %o3, 6, %o3 + add %o3, 0x80, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x80_fp) +ENTRY(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) + sll %o3, 6, %o3 + add %o3, 0x40, %o3 + ba,pt %xcc, __restore_fp + add %o2, %o3, %o0 +ENDPROC(U3_retl_o2_plus_o3_sll_6_plus_0x40_fp) +ENTRY(U3_retl_o2_plus_GS_plus_0x10) + add GLOBAL_SPARE, 0x10, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x10) +ENTRY(U3_retl_o2_plus_GS_plus_0x08) + add GLOBAL_SPARE, 0x08, GLOBAL_SPARE + retl + add %o2, GLOBAL_SPARE, %o0 +ENDPROC(U3_retl_o2_plus_GS_plus_0x08) +ENTRY(U3_retl_o2_and_7_plus_GS) + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS) +ENTRY(U3_retl_o2_and_7_plus_GS_plus_8) + add GLOBAL_SPARE, 8, GLOBAL_SPARE + and %o2, 7, %o2 + retl + add %o2, GLOBAL_SPARE, %o2 +ENDPROC(U3_retl_o2_and_7_plus_GS_plus_8) +#endif + .align 64 /* The cheetah's flexible spine, oversized liver, enlarged heart, @@ -126,8 +204,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ and %g2, 0x38, %g2 1: subcc %g1, 0x1, %g1 - EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3)) - EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE)) + EX_LD_FP(LOAD(ldub, %o1 + 0x00, %o3), U3_retl_o2_plus_g2_plus_g1_plus_1) + EX_ST_FP(STORE(stb, %o3, %o1 + GLOBAL_SPARE), U3_retl_o2_plus_g2_plus_g1_plus_1) bgu,pt %XCC, 1b add %o1, 0x1, %o1 @@ -138,20 +216,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ be,pt %icc, 3f alignaddr %o1, %g0, %o1 - EX_LD_FP(LOAD(ldd, %o1, %f4)) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6)) + EX_LD_FP(LOAD(ldd, %o1, %f4), U3_retl_o2_plus_g2) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f6), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f4, %f6, %f0 - EX_ST_FP(STORE(std, %f0, %o0)) + EX_ST_FP(STORE(std, %f0, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %icc, 3f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x8, %f4), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f6, %f4, %f2 - EX_ST_FP(STORE(std, %f2, %o0)) + EX_ST_FP(STORE(std, %f2, %o0), U3_retl_o2_plus_g2_plus_8) bne,pt %icc, 1b add %o0, 0x8, %o0 @@ -161,25 +239,25 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ LOAD(prefetch, %o1 + 0x080, #one_read) LOAD(prefetch, %o1 + 0x0c0, #one_read) LOAD(prefetch, %o1 + 0x100, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x000, %f0), U3_retl_o2) LOAD(prefetch, %o1 + 0x140, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2) LOAD(prefetch, %o1 + 0x180, #one_read) - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2) faligndata %f10, %f12, %f26 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2) subcc GLOBAL_SPARE, 0x80, GLOBAL_SPARE add %o1, 0x40, %o1 @@ -190,26 +268,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 64 1: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) subcc %o3, 0x01, %o3 faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f8, %f10, %f24 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x80) LOAD(prefetch, %o1 + 0x1c0, #one_read) faligndata %f10, %f12, %f26 bg,pt %XCC, 1b @@ -217,29 +295,29 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ /* Finally we copy the last full 64-byte block. */ 2: - EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2)) + EX_LD_FP(LOAD(ldd, %o1 + 0x008, %f2), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f12, %f14, %f28 - EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4)) + EX_LD_FP(LOAD(ldd, %o1 + 0x010, %f4), U3_retl_o2_plus_o3_sll_6_plus_0x80) faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) - EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x80) + EX_LD_FP(LOAD(ldd, %o1 + 0x018, %f6), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f0, %f2, %f16 - EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8)) + EX_LD_FP(LOAD(ldd, %o1 + 0x020, %f8), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f2, %f4, %f18 - EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10)) + EX_LD_FP(LOAD(ldd, %o1 + 0x028, %f10), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f4, %f6, %f20 - EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12)) + EX_LD_FP(LOAD(ldd, %o1 + 0x030, %f12), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f6, %f8, %f22 - EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14)) + EX_LD_FP(LOAD(ldd, %o1 + 0x038, %f14), U3_retl_o2_plus_o3_sll_6_plus_0x40) faligndata %f8, %f10, %f24 cmp %g1, 0 be,pt %XCC, 1f add %o0, 0x40, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x040, %f0), U3_retl_o2_plus_o3_sll_6_plus_0x40) 1: faligndata %f10, %f12, %f26 faligndata %f12, %f14, %f28 faligndata %f14, %f0, %f30 - EX_ST_FP(STORE_BLK(%f16, %o0)) + EX_ST_FP(STORE_BLK(%f16, %o0), U3_retl_o2_plus_o3_sll_6_plus_0x40) add %o0, 0x40, %o0 add %o1, 0x40, %o1 membar #Sync @@ -259,20 +337,20 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g2, %o2 be,a,pt %XCC, 1f - EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x00, %f0), U3_retl_o2_plus_g2) -1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2)) +1: EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f2), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f0, %f2, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) be,pn %XCC, 2f add %o0, 0x8, %o0 - EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0)) + EX_LD_FP(LOAD(ldd, %o1 + 0x08, %f0), U3_retl_o2_plus_g2) add %o1, 0x8, %o1 subcc %g2, 0x8, %g2 faligndata %f2, %f0, %f8 - EX_ST_FP(STORE(std, %f8, %o0)) + EX_ST_FP(STORE(std, %f8, %o0), U3_retl_o2_plus_g2_plus_8) bne,pn %XCC, 1b add %o0, 0x8, %o0 @@ -292,30 +370,33 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andcc %o2, 0x8, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x8, %o1 + sub %o2, 8, %o2 1: andcc %o2, 0x4, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x4, %o1 + sub %o2, 4, %o2 1: andcc %o2, 0x2, %g0 be,pt %icc, 1f nop - EX_LD(LOAD(lduh, %o1, %o5)) - EX_ST(STORE(sth, %o5, %o1 + %o3)) + EX_LD(LOAD(lduh, %o1, %o5), U3_retl_o2) + EX_ST(STORE(sth, %o5, %o1 + %o3), U3_retl_o2) add %o1, 0x2, %o1 + sub %o2, 2, %o2 1: andcc %o2, 0x1, %g0 be,pt %icc, 85f nop - EX_LD(LOAD(ldub, %o1, %o5)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2) ba,pt %xcc, 85f - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2) .align 64 70: /* 16 < len <= 64 */ @@ -326,26 +407,26 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0xf, GLOBAL_SPARE and %o2, 0xf, %o2 1: subcc GLOBAL_SPARE, 0x10, GLOBAL_SPARE - EX_LD(LOAD(ldx, %o1 + 0x00, %o5)) - EX_LD(LOAD(ldx, %o1 + 0x08, %g1)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1 + 0x00, %o5), U3_retl_o2_plus_GS_plus_0x10) + EX_LD(LOAD(ldx, %o1 + 0x08, %g1), U3_retl_o2_plus_GS_plus_0x10) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x10) add %o1, 0x8, %o1 - EX_ST(STORE(stx, %g1, %o1 + %o3)) + EX_ST(STORE(stx, %g1, %o1 + %o3), U3_retl_o2_plus_GS_plus_0x08) bgu,pt %XCC, 1b add %o1, 0x8, %o1 73: andcc %o2, 0x8, %g0 be,pt %XCC, 1f nop sub %o2, 0x8, %o2 - EX_LD(LOAD(ldx, %o1, %o5)) - EX_ST(STORE(stx, %o5, %o1 + %o3)) + EX_LD(LOAD(ldx, %o1, %o5), U3_retl_o2_plus_8) + EX_ST(STORE(stx, %o5, %o1 + %o3), U3_retl_o2_plus_8) add %o1, 0x8, %o1 1: andcc %o2, 0x4, %g0 be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX_LD(LOAD(lduw, %o1, %o5)) - EX_ST(STORE(stw, %o5, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %o5), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %o5, %o1 + %o3), U3_retl_o2_plus_4) add %o1, 0x4, %o1 1: cmp %o2, 0 be,pt %XCC, 85f @@ -361,8 +442,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ sub %o2, %g1, %o2 1: subcc %g1, 1, %g1 - EX_LD(LOAD(ldub, %o1, %o5)) - EX_ST(STORE(stb, %o5, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %o5), U3_retl_o2_plus_g1_plus_1) + EX_ST(STORE(stb, %o5, %o1 + %o3), U3_retl_o2_plus_g1_plus_1) bgu,pt %icc, 1b add %o1, 1, %o1 @@ -378,16 +459,16 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 8: mov 64, %o3 andn %o1, 0x7, %o1 - EX_LD(LOAD(ldx, %o1, %g2)) + EX_LD(LOAD(ldx, %o1, %g2), U3_retl_o2) sub %o3, %g1, %o3 andn %o2, 0x7, GLOBAL_SPARE sllx %g2, %g1, %g2 -1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3)) +1: EX_LD(LOAD(ldx, %o1 + 0x8, %g3), U3_retl_o2_and_7_plus_GS) subcc GLOBAL_SPARE, 0x8, GLOBAL_SPARE add %o1, 0x8, %o1 srlx %g3, %o3, %o5 or %o5, %g2, %o5 - EX_ST(STORE(stx, %o5, %o0)) + EX_ST(STORE(stx, %o5, %o0), U3_retl_o2_and_7_plus_GS_plus_8) add %o0, 0x8, %o0 bgu,pt %icc, 1b sllx %g3, %g1, %g2 @@ -407,8 +488,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ 1: subcc %o2, 4, %o2 - EX_LD(LOAD(lduw, %o1, %g1)) - EX_ST(STORE(stw, %g1, %o1 + %o3)) + EX_LD(LOAD(lduw, %o1, %g1), U3_retl_o2_plus_4) + EX_ST(STORE(stw, %g1, %o1 + %o3), U3_retl_o2_plus_4) bgu,pt %XCC, 1b add %o1, 4, %o1 @@ -418,8 +499,8 @@ FUNC_NAME: /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX_LD(LOAD(ldub, %o1, %g1)) - EX_ST(STORE(stb, %g1, %o1 + %o3)) + EX_LD(LOAD(ldub, %o1, %g1), U3_retl_o2_plus_1) + EX_ST(STORE(stb, %g1, %o1 + %o3), U3_retl_o2_plus_1) bgu,pt %XCC, 90b add %o1, 1, %o1 retl diff --git a/arch/sparc/lib/copy_in_user.S b/arch/sparc/lib/copy_in_user.S index 482de09..0252b21 100644 --- a/arch/sparc/lib/copy_in_user.S +++ b/arch/sparc/lib/copy_in_user.S @@ -9,18 +9,33 @@ #define XCC xcc -#define EX(x,y) \ +#define EX(x,y,z) \ 98: x,y; \ .section __ex_table,"a";\ .align 4; \ - .word 98b, __retl_one; \ + .word 98b, z; \ .text; \ .align 4; +#define EX_O4(x,y) EX(x,y,__retl_o4_plus_8) +#define EX_O2_4(x,y) EX(x,y,__retl_o2_plus_4) +#define EX_O2_1(x,y) EX(x,y,__retl_o2_plus_1) + .register %g2,#scratch .register %g3,#scratch .text +__retl_o4_plus_8: + add %o4, %o2, %o4 + retl + add %o4, 8, %o0 +__retl_o2_plus_4: + retl + add %o2, 4, %o0 +__retl_o2_plus_1: + retl + add %o2, 1, %o0 + .align 32 /* Don't try to get too fancy here, just nice and @@ -45,8 +60,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ andn %o2, 0x7, %o4 and %o2, 0x7, %o2 1: subcc %o4, 0x8, %o4 - EX(ldxa [%o1] %asi, %o5) - EX(stxa %o5, [%o0] %asi) + EX_O4(ldxa [%o1] %asi, %o5) + EX_O4(stxa %o5, [%o0] %asi) add %o1, 0x8, %o1 bgu,pt %XCC, 1b add %o0, 0x8, %o0 @@ -54,8 +69,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ be,pt %XCC, 1f nop sub %o2, 0x4, %o2 - EX(lduwa [%o1] %asi, %o5) - EX(stwa %o5, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %o5) + EX_O2_4(stwa %o5, [%o0] %asi) add %o1, 0x4, %o1 add %o0, 0x4, %o0 1: cmp %o2, 0 @@ -71,8 +86,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ 82: subcc %o2, 4, %o2 - EX(lduwa [%o1] %asi, %g1) - EX(stwa %g1, [%o0] %asi) + EX_O2_4(lduwa [%o1] %asi, %g1) + EX_O2_4(stwa %g1, [%o0] %asi) add %o1, 4, %o1 bgu,pt %XCC, 82b add %o0, 4, %o0 @@ -83,8 +98,8 @@ ENTRY(___copy_in_user) /* %o0=dst, %o1=src, %o2=len */ .align 32 90: subcc %o2, 1, %o2 - EX(lduba [%o1] %asi, %g1) - EX(stba %g1, [%o0] %asi) + EX_O2_1(lduba [%o1] %asi, %g1) + EX_O2_1(stba %g1, [%o0] %asi) add %o1, 1, %o1 bgu,pt %XCC, 90b add %o0, 1, %o0 diff --git a/arch/sparc/lib/user_fixup.c b/arch/sparc/lib/user_fixup.c deleted file mode 100644 index ac96ae2..0000000 --- a/arch/sparc/lib/user_fixup.c +++ /dev/null @@ -1,71 +0,0 @@ -/* user_fixup.c: Fix up user copy faults. - * - * Copyright (C) 2004 David S. Miller <davem@redhat.com> - */ - -#include <linux/compiler.h> -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/module.h> - -#include <asm/uaccess.h> - -/* Calculating the exact fault address when using - * block loads and stores can be very complicated. - * - * Instead of trying to be clever and handling all - * of the cases, just fix things up simply here. - */ - -static unsigned long compute_size(unsigned long start, unsigned long size, unsigned long *offset) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long end = start + size; - - if (fault_addr < start || fault_addr >= end) { - *offset = 0; - } else { - *offset = fault_addr - start; - size = end - fault_addr; - } - return size; -} - -unsigned long copy_from_user_fixup(void *to, const void __user *from, unsigned long size) -{ - unsigned long offset; - - size = compute_size((unsigned long) from, size, &offset); - if (likely(size)) - memset(to + offset, 0, size); - - return size; -} -EXPORT_SYMBOL(copy_from_user_fixup); - -unsigned long copy_to_user_fixup(void __user *to, const void *from, unsigned long size) -{ - unsigned long offset; - - return compute_size((unsigned long) to, size, &offset); -} -EXPORT_SYMBOL(copy_to_user_fixup); - -unsigned long copy_in_user_fixup(void __user *to, void __user *from, unsigned long size) -{ - unsigned long fault_addr = current_thread_info()->fault_address; - unsigned long start = (unsigned long) to; - unsigned long end = start + size; - - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - start = (unsigned long) from; - end = start + size; - if (fault_addr >= start && fault_addr < end) - return end - fault_addr; - - return size; -} -EXPORT_SYMBOL(copy_in_user_fixup); diff --git a/arch/sparc/mm/gup.c b/arch/sparc/mm/gup.c index 4e06750..cd0e32b 100644 --- a/arch/sparc/mm/gup.c +++ b/arch/sparc/mm/gup.c @@ -238,7 +238,8 @@ slow: pages += nr; ret = get_user_pages_unlocked(start, - (end - start) >> PAGE_SHIFT, write, 0, pages); + (end - start) >> PAGE_SHIFT, pages, + write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index f2b7711..e20fbba 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -27,6 +27,20 @@ static inline int tag_compare(unsigned long tag, unsigned long vaddr) return (tag == (vaddr >> 22)); } +static void flush_tsb_kernel_range_scan(unsigned long start, unsigned long end) +{ + unsigned long idx; + + for (idx = 0; idx < KERNEL_TSB_NENTRIES; idx++) { + struct tsb *ent = &swapper_tsb[idx]; + unsigned long match = idx << 13; + + match |= (ent->tag << 22); + if (match >= start && match < end) + ent->tag = (1UL << TSB_TAG_INVALID_BIT); + } +} + /* TSB flushes need only occur on the processor initiating the address * space modification, not on each cpu the address space has run on. * Only the TLB flush needs that treatment. @@ -36,6 +50,9 @@ void flush_tsb_kernel_range(unsigned long start, unsigned long end) { unsigned long v; + if ((end - start) >> PAGE_SHIFT >= 2 * KERNEL_TSB_NENTRIES) + return flush_tsb_kernel_range_scan(start, end); + for (v = start; v < end; v += PAGE_SIZE) { unsigned long hash = tsb_hash(v, PAGE_SHIFT, KERNEL_TSB_NENTRIES); diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S index b4f4733..5d2fd6c 100644 --- a/arch/sparc/mm/ultra.S +++ b/arch/sparc/mm/ultra.S @@ -30,7 +30,7 @@ .text .align 32 .globl __flush_tlb_mm -__flush_tlb_mm: /* 18 insns */ +__flush_tlb_mm: /* 19 insns */ /* %o0=(ctx & TAG_CONTEXT_BITS), %o1=SECONDARY_CONTEXT */ ldxa [%o1] ASI_DMMU, %g2 cmp %g2, %o0 @@ -81,7 +81,7 @@ __flush_tlb_page: /* 22 insns */ .align 32 .globl __flush_tlb_pending -__flush_tlb_pending: /* 26 insns */ +__flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ rdpr %pstate, %g7 sllx %o1, 3, %o1 @@ -113,12 +113,14 @@ __flush_tlb_pending: /* 26 insns */ .align 32 .globl __flush_tlb_kernel_range -__flush_tlb_kernel_range: /* 16 insns */ +__flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, __spitfire_flush_tlb_kernel_range_slow sethi %hi(PAGE_SIZE), %o4 - sub %o1, %o0, %o3 sub %o3, %o4, %o3 or %o0, 0x20, %o0 ! Nucleus 1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP @@ -131,6 +133,41 @@ __flush_tlb_kernel_range: /* 16 insns */ retl nop nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + +__spitfire_flush_tlb_kernel_range_slow: + mov 63 * 8, %o4 +1: ldxa [%o4] ASI_ITLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_IMMU + stxa %g0, [%o4] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%o4] ASI_DTLB_DATA_ACCESS, %o3 + andcc %o3, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %o3 + stxa %g0, [%o3] ASI_DMMU + stxa %g0, [%o4] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %o4, 8, %o4 + brgez,pt %o4, 1b + nop + retl + nop __spitfire_flush_tlb_mm_slow: rdpr %pstate, %g1 @@ -285,6 +322,40 @@ __cheetah_flush_tlb_pending: /* 27 insns */ retl wrpr %g7, 0x0, %pstate +__cheetah_flush_tlb_kernel_range: /* 31 insns */ + /* %o0=start, %o1=end */ + cmp %o0, %o1 + be,pn %xcc, 2f + sub %o1, %o0, %o3 + srlx %o3, 18, %o4 + brnz,pn %o4, 3f + sethi %hi(PAGE_SIZE), %o4 + sub %o3, %o4, %o3 + or %o0, 0x20, %o0 ! Nucleus +1: stxa %g0, [%o0 + %o3] ASI_DMMU_DEMAP + stxa %g0, [%o0 + %o3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %o3, 1b + sub %o3, %o4, %o3 +2: sethi %hi(KERNBASE), %o3 + flush %o3 + retl + nop +3: mov 0x80, %o4 + stxa %g0, [%o4] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%o4] ASI_IMMU_DEMAP + membar #Sync + retl + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE __cheetah_flush_dcache_page: /* 11 insns */ sethi %hi(PAGE_OFFSET), %g1 @@ -309,19 +380,28 @@ __hypervisor_tlb_tl0_error: ret restore -__hypervisor_flush_tlb_mm: /* 10 insns */ +__hypervisor_flush_tlb_mm: /* 19 insns */ mov %o0, %o2 /* ARG2: mmu context */ mov 0, %o0 /* ARG0: CPU lists unimplemented */ mov 0, %o1 /* ARG1: CPU lists unimplemented */ mov HV_MMU_ALL, %o3 /* ARG3: flags */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_FAST_MMU_DEMAP_CTX, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o5 + jmpl %o5 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_page: /* 11 insns */ +__hypervisor_flush_tlb_page: /* 22 insns */ /* %o0 = context, %o1 = vaddr */ mov %o0, %g2 mov %o1, %o0 /* ARG0: vaddr + IMMU-bit */ @@ -330,12 +410,23 @@ __hypervisor_flush_tlb_page: /* 11 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_pending: /* 16 insns */ +__hypervisor_flush_tlb_pending: /* 27 insns */ /* %o0 = context, %o1 = nr, %o2 = vaddrs[] */ sllx %o1, 3, %g1 mov %o2, %g2 @@ -347,31 +438,57 @@ __hypervisor_flush_tlb_pending: /* 16 insns */ srlx %o0, PAGE_SHIFT, %o0 sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 1f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g1, 1b nop retl nop +1: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop + nop + nop + nop + nop + nop + nop + nop + nop -__hypervisor_flush_tlb_kernel_range: /* 16 insns */ +__hypervisor_flush_tlb_kernel_range: /* 31 insns */ /* %o0=start, %o1=end */ cmp %o0, %o1 be,pn %xcc, 2f - sethi %hi(PAGE_SIZE), %g3 - mov %o0, %g1 - sub %o1, %g1, %g2 + sub %o1, %o0, %g2 + srlx %g2, 18, %g3 + brnz,pn %g3, 4f + mov %o0, %g1 + sethi %hi(PAGE_SIZE), %g3 sub %g2, %g3, %g2 1: add %g1, %g2, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP - brnz,pn %o0, __hypervisor_tlb_tl0_error + brnz,pn %o0, 3f mov HV_MMU_UNMAP_ADDR_TRAP, %o1 brnz,pt %g2, 1b sub %g2, %g3, %g2 2: retl nop +3: sethi %hi(__hypervisor_tlb_tl0_error), %o2 + jmpl %o2 + %lo(__hypervisor_tlb_tl0_error), %g0 + nop +4: mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + brnz,pn %o0, 3b + mov HV_FAST_MMU_DEMAP_CTX, %o1 + retl + nop #ifdef DCACHE_ALIASING_POSSIBLE /* XXX Niagara and friends have an 8K cache, so no aliasing is @@ -394,43 +511,6 @@ tlb_patch_one: retl nop - .globl cheetah_patch_cachetlbops -cheetah_patch_cachetlbops: - save %sp, -128, %sp - - sethi %hi(__flush_tlb_mm), %o0 - or %o0, %lo(__flush_tlb_mm), %o0 - sethi %hi(__cheetah_flush_tlb_mm), %o1 - or %o1, %lo(__cheetah_flush_tlb_mm), %o1 - call tlb_patch_one - mov 19, %o2 - - sethi %hi(__flush_tlb_page), %o0 - or %o0, %lo(__flush_tlb_page), %o0 - sethi %hi(__cheetah_flush_tlb_page), %o1 - or %o1, %lo(__cheetah_flush_tlb_page), %o1 - call tlb_patch_one - mov 22, %o2 - - sethi %hi(__flush_tlb_pending), %o0 - or %o0, %lo(__flush_tlb_pending), %o0 - sethi %hi(__cheetah_flush_tlb_pending), %o1 - or %o1, %lo(__cheetah_flush_tlb_pending), %o1 - call tlb_patch_one - mov 27, %o2 - -#ifdef DCACHE_ALIASING_POSSIBLE - sethi %hi(__flush_dcache_page), %o0 - or %o0, %lo(__flush_dcache_page), %o0 - sethi %hi(__cheetah_flush_dcache_page), %o1 - or %o1, %lo(__cheetah_flush_dcache_page), %o1 - call tlb_patch_one - mov 11, %o2 -#endif /* DCACHE_ALIASING_POSSIBLE */ - - ret - restore - #ifdef CONFIG_SMP /* These are all called by the slaves of a cross call, at * trap level 1, with interrupts fully disabled. @@ -447,7 +527,7 @@ cheetah_patch_cachetlbops: */ .align 32 .globl xcall_flush_tlb_mm -xcall_flush_tlb_mm: /* 21 insns */ +xcall_flush_tlb_mm: /* 24 insns */ mov PRIMARY_CONTEXT, %g2 ldxa [%g2] ASI_DMMU, %g3 srlx %g3, CTX_PGSZ1_NUC_SHIFT, %g4 @@ -469,9 +549,12 @@ xcall_flush_tlb_mm: /* 21 insns */ nop nop nop + nop + nop + nop .globl xcall_flush_tlb_page -xcall_flush_tlb_page: /* 17 insns */ +xcall_flush_tlb_page: /* 20 insns */ /* %g5=context, %g1=vaddr */ mov PRIMARY_CONTEXT, %g4 ldxa [%g4] ASI_DMMU, %g2 @@ -490,15 +573,20 @@ xcall_flush_tlb_page: /* 17 insns */ retry nop nop + nop + nop + nop .globl xcall_flush_tlb_kernel_range -xcall_flush_tlb_kernel_range: /* 25 insns */ +xcall_flush_tlb_kernel_range: /* 44 insns */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 - add %g2, 1, %g2 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 sub %g3, %g2, %g3 or %g1, 0x20, %g1 ! Nucleus 1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP @@ -507,8 +595,25 @@ xcall_flush_tlb_kernel_range: /* 25 insns */ brnz,pt %g3, 1b sub %g3, %g2, %g3 retry - nop - nop +2: mov 63 * 8, %g1 +1: ldxa [%g1] ASI_ITLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 /* _PAGE_L_4U */ + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_IMMU + stxa %g0, [%g1] ASI_ITLB_DATA_ACCESS + membar #Sync +2: ldxa [%g1] ASI_DTLB_DATA_ACCESS, %g2 + andcc %g2, 0x40, %g0 + bne,pn %xcc, 2f + mov TLB_TAG_ACCESS, %g2 + stxa %g0, [%g2] ASI_DMMU + stxa %g0, [%g1] ASI_DTLB_DATA_ACCESS + membar #Sync +2: sub %g1, 8, %g1 + brgez,pt %g1, 1b + nop + retry nop nop nop @@ -637,6 +742,52 @@ xcall_fetch_glob_pmu_n4: retry +__cheetah_xcall_flush_tlb_kernel_range: /* 44 insns */ + sethi %hi(PAGE_SIZE - 1), %g2 + or %g2, %lo(PAGE_SIZE - 1), %g2 + andn %g1, %g2, %g1 + andn %g7, %g2, %g7 + sub %g7, %g1, %g3 + srlx %g3, 18, %g2 + brnz,pn %g2, 2f + add %g2, 1, %g2 + sub %g3, %g2, %g3 + or %g1, 0x20, %g1 ! Nucleus +1: stxa %g0, [%g1 + %g3] ASI_DMMU_DEMAP + stxa %g0, [%g1 + %g3] ASI_IMMU_DEMAP + membar #Sync + brnz,pt %g3, 1b + sub %g3, %g2, %g3 + retry +2: mov 0x80, %g2 + stxa %g0, [%g2] ASI_DMMU_DEMAP + membar #Sync + stxa %g0, [%g2] ASI_IMMU_DEMAP + membar #Sync + retry + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + nop + #ifdef DCACHE_ALIASING_POSSIBLE .align 32 .globl xcall_flush_dcache_page_cheetah @@ -700,7 +851,7 @@ __hypervisor_tlb_xcall_error: ba,a,pt %xcc, rtrap .globl __hypervisor_xcall_flush_tlb_mm -__hypervisor_xcall_flush_tlb_mm: /* 21 insns */ +__hypervisor_xcall_flush_tlb_mm: /* 24 insns */ /* %g5=ctx, g1,g2,g3,g4,g7=scratch, %g6=unusable */ mov %o0, %g2 mov %o1, %g3 @@ -714,7 +865,7 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov HV_FAST_MMU_DEMAP_CTX, %o5 ta HV_FAST_TRAP mov HV_FAST_MMU_DEMAP_CTX, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 @@ -723,9 +874,12 @@ __hypervisor_xcall_flush_tlb_mm: /* 21 insns */ mov %g7, %o5 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_page -__hypervisor_xcall_flush_tlb_page: /* 17 insns */ +__hypervisor_xcall_flush_tlb_page: /* 20 insns */ /* %g5=ctx, %g1=vaddr */ mov %o0, %g2 mov %o1, %g3 @@ -737,42 +891,64 @@ __hypervisor_xcall_flush_tlb_page: /* 17 insns */ sllx %o0, PAGE_SHIFT, %o0 ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,a,pn %o0, __hypervisor_tlb_xcall_error + brnz,a,pn %o0, 1f mov %o0, %g5 mov %g2, %o0 mov %g3, %o1 mov %g4, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop .globl __hypervisor_xcall_flush_tlb_kernel_range -__hypervisor_xcall_flush_tlb_kernel_range: /* 25 insns */ +__hypervisor_xcall_flush_tlb_kernel_range: /* 44 insns */ /* %g1=start, %g7=end, g2,g3,g4,g5,g6=scratch */ sethi %hi(PAGE_SIZE - 1), %g2 or %g2, %lo(PAGE_SIZE - 1), %g2 andn %g1, %g2, %g1 andn %g7, %g2, %g7 sub %g7, %g1, %g3 + srlx %g3, 18, %g7 add %g2, 1, %g2 sub %g3, %g2, %g3 mov %o0, %g2 mov %o1, %g4 - mov %o2, %g7 + brnz,pn %g7, 2f + mov %o2, %g7 1: add %g1, %g3, %o0 /* ARG0: virtual address */ mov 0, %o1 /* ARG1: mmu context */ mov HV_MMU_ALL, %o2 /* ARG2: flags */ ta HV_MMU_UNMAP_ADDR_TRAP mov HV_MMU_UNMAP_ADDR_TRAP, %g6 - brnz,pn %o0, __hypervisor_tlb_xcall_error + brnz,pn %o0, 1f mov %o0, %g5 sethi %hi(PAGE_SIZE), %o2 brnz,pt %g3, 1b sub %g3, %o2, %g3 - mov %g2, %o0 +5: mov %g2, %o0 mov %g4, %o1 mov %g7, %o2 membar #Sync retry +1: sethi %hi(__hypervisor_tlb_xcall_error), %g4 + jmpl %g4 + %lo(__hypervisor_tlb_xcall_error), %g0 + nop +2: mov %o3, %g1 + mov %o5, %g3 + mov 0, %o0 /* ARG0: CPU lists unimplemented */ + mov 0, %o1 /* ARG1: CPU lists unimplemented */ + mov 0, %o2 /* ARG2: mmu context == nucleus */ + mov HV_MMU_ALL, %o3 /* ARG3: flags */ + mov HV_FAST_MMU_DEMAP_CTX, %o5 + ta HV_FAST_TRAP + mov %g1, %o3 + brz,pt %o0, 5b + mov %g3, %o5 + mov HV_FAST_MMU_DEMAP_CTX, %g6 + ba,pt %xcc, 1b + clr %g5 /* These just get rescheduled to PIL vectors. */ .globl xcall_call_function @@ -809,6 +985,58 @@ xcall_kgdb_capture: #endif /* CONFIG_SMP */ + .globl cheetah_patch_cachetlbops +cheetah_patch_cachetlbops: + save %sp, -128, %sp + + sethi %hi(__flush_tlb_mm), %o0 + or %o0, %lo(__flush_tlb_mm), %o0 + sethi %hi(__cheetah_flush_tlb_mm), %o1 + or %o1, %lo(__cheetah_flush_tlb_mm), %o1 + call tlb_patch_one + mov 19, %o2 + + sethi %hi(__flush_tlb_page), %o0 + or %o0, %lo(__flush_tlb_page), %o0 + sethi %hi(__cheetah_flush_tlb_page), %o1 + or %o1, %lo(__cheetah_flush_tlb_page), %o1 + call tlb_patch_one + mov 22, %o2 + + sethi %hi(__flush_tlb_pending), %o0 + or %o0, %lo(__flush_tlb_pending), %o0 + sethi %hi(__cheetah_flush_tlb_pending), %o1 + or %o1, %lo(__cheetah_flush_tlb_pending), %o1 + call tlb_patch_one + mov 27, %o2 + + sethi %hi(__flush_tlb_kernel_range), %o0 + or %o0, %lo(__flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 31, %o2 + +#ifdef DCACHE_ALIASING_POSSIBLE + sethi %hi(__flush_dcache_page), %o0 + or %o0, %lo(__flush_dcache_page), %o0 + sethi %hi(__cheetah_flush_dcache_page), %o1 + or %o1, %lo(__cheetah_flush_dcache_page), %o1 + call tlb_patch_one + mov 11, %o2 +#endif /* DCACHE_ALIASING_POSSIBLE */ + +#ifdef CONFIG_SMP + sethi %hi(xcall_flush_tlb_kernel_range), %o0 + or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 + sethi %hi(__cheetah_xcall_flush_tlb_kernel_range), %o1 + or %o1, %lo(__cheetah_xcall_flush_tlb_kernel_range), %o1 + call tlb_patch_one + mov 44, %o2 +#endif /* CONFIG_SMP */ + + ret + restore .globl hypervisor_patch_cachetlbops hypervisor_patch_cachetlbops: @@ -819,28 +1047,28 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_flush_tlb_mm), %o1 call tlb_patch_one - mov 10, %o2 + mov 19, %o2 sethi %hi(__flush_tlb_page), %o0 or %o0, %lo(__flush_tlb_page), %o0 sethi %hi(__hypervisor_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_flush_tlb_page), %o1 call tlb_patch_one - mov 11, %o2 + mov 22, %o2 sethi %hi(__flush_tlb_pending), %o0 or %o0, %lo(__flush_tlb_pending), %o0 sethi %hi(__hypervisor_flush_tlb_pending), %o1 or %o1, %lo(__hypervisor_flush_tlb_pending), %o1 call tlb_patch_one - mov 16, %o2 + mov 27, %o2 sethi %hi(__flush_tlb_kernel_range), %o0 or %o0, %lo(__flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 16, %o2 + mov 31, %o2 #ifdef DCACHE_ALIASING_POSSIBLE sethi %hi(__flush_dcache_page), %o0 @@ -857,21 +1085,21 @@ hypervisor_patch_cachetlbops: sethi %hi(__hypervisor_xcall_flush_tlb_mm), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_mm), %o1 call tlb_patch_one - mov 21, %o2 + mov 24, %o2 sethi %hi(xcall_flush_tlb_page), %o0 or %o0, %lo(xcall_flush_tlb_page), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_page), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_page), %o1 call tlb_patch_one - mov 17, %o2 + mov 20, %o2 sethi %hi(xcall_flush_tlb_kernel_range), %o0 or %o0, %lo(xcall_flush_tlb_kernel_range), %o0 sethi %hi(__hypervisor_xcall_flush_tlb_kernel_range), %o1 or %o1, %lo(__hypervisor_xcall_flush_tlb_kernel_range), %o1 call tlb_patch_one - mov 25, %o2 + mov 44, %o2 #endif /* CONFIG_SMP */ ret diff --git a/arch/x86/entry/Makefile b/arch/x86/entry/Makefile index 77f28ce..9976fce 100644 --- a/arch/x86/entry/Makefile +++ b/arch/x86/entry/Makefile @@ -5,8 +5,8 @@ OBJECT_FILES_NON_STANDARD_entry_$(BITS).o := y OBJECT_FILES_NON_STANDARD_entry_64_compat.o := y -CFLAGS_syscall_64.o += -Wno-override-init -CFLAGS_syscall_32.o += -Wno-override-init +CFLAGS_syscall_64.o += $(call cc-option,-Wno-override-init,) +CFLAGS_syscall_32.o += $(call cc-option,-Wno-override-init,) obj-y := entry_$(BITS).o thunk_$(BITS).o syscall_$(BITS).o obj-y += common.o diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index ff6ef7b..2b36185 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -389,5 +389,3 @@ 380 i386 pkey_mprotect sys_pkey_mprotect 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free -#383 i386 pkey_get sys_pkey_get -#384 i386 pkey_set sys_pkey_set diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 2f024d0..e93ef0b 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -338,8 +338,6 @@ 329 common pkey_mprotect sys_pkey_mprotect 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free -#332 common pkey_get sys_pkey_get -#333 common pkey_set sys_pkey_set # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3a9eb8..a74a2db 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -3607,10 +3607,14 @@ __init int intel_pmu_init(void) /* * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: + * assume at least 3 events, when not running in a hypervisor: */ - if (version > 1) - x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); + if (version > 1) { + int assume = 3 * !boot_cpu_has(X86_FEATURE_HYPERVISOR); + + x86_pmu.num_counters_fixed = + max((int)edx.split.num_counters_fixed, assume); + } if (boot_cpu_has(X86_FEATURE_PDCM)) { u64 capabilities; @@ -3898,6 +3902,7 @@ __init int intel_pmu_init(void) break; case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: memcpy(hw_cache_event_ids, slm_hw_cache_event_ids, sizeof(hw_cache_event_ids)); memcpy(hw_cache_extra_regs, @@ -3912,7 +3917,7 @@ __init int intel_pmu_init(void) x86_pmu.flags |= PMU_FL_HAS_RSP_1; x86_pmu.flags |= PMU_FL_NO_HT_SHARING; - pr_cont("Knights Landing events, "); + pr_cont("Knights Landing/Mill events, "); break; case INTEL_FAM6_SKYLAKE_MOBILE: diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 3ca87b5..4f5ac72 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -48,7 +48,8 @@ * Scope: Core * MSR_CORE_C6_RESIDENCY: CORE C6 Residency Counter * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Core * MSR_CORE_C7_RESIDENCY: CORE C7 Residency Counter * perf code: 0x03 @@ -56,15 +57,16 @@ * Scope: Core * MSR_PKG_C2_RESIDENCY: Package C2 Residency Counter. * perf code: 0x00 - * Available model: SNB,IVB,HSW,BDW,SKL + * Available model: SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C3_RESIDENCY: Package C3 Residency Counter. * perf code: 0x01 - * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: NHM,WSM,SNB,IVB,HSW,BDW,SKL,KNL * Scope: Package (physical package) * MSR_PKG_C6_RESIDENCY: Package C6 Residency Counter. * perf code: 0x02 - * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW,SKL + * Available model: SLM,AMT,NHM,WSM,SNB,IVB,HSW,BDW + * SKL,KNL * Scope: Package (physical package) * MSR_PKG_C7_RESIDENCY: Package C7 Residency Counter. * perf code: 0x03 @@ -118,6 +120,7 @@ struct cstate_model { /* Quirk flags */ #define SLM_PKG_C6_USE_C7_MSR (1UL << 0) +#define KNL_CORE_C6_MSR (1UL << 1) struct perf_cstate_msr { u64 msr; @@ -488,6 +491,18 @@ static const struct cstate_model slm_cstates __initconst = { .quirks = SLM_PKG_C6_USE_C7_MSR, }; + +static const struct cstate_model knl_cstates __initconst = { + .core_events = BIT(PERF_CSTATE_CORE_C6_RES), + + .pkg_events = BIT(PERF_CSTATE_PKG_C2_RES) | + BIT(PERF_CSTATE_PKG_C3_RES) | + BIT(PERF_CSTATE_PKG_C6_RES), + .quirks = KNL_CORE_C6_MSR, +}; + + + #define X86_CSTATES_MODEL(model, states) \ { X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long) &(states) } @@ -523,6 +538,8 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), { }, }; MODULE_DEVICE_TABLE(x86cpu, intel_cstates_match); @@ -558,6 +575,11 @@ static int __init cstate_probe(const struct cstate_model *cm) if (cm->quirks & SLM_PKG_C6_USE_C7_MSR) pkg_msr[PERF_CSTATE_PKG_C6_RES].msr = MSR_PKG_C7_RESIDENCY; + /* KNL has different MSR for CORE C6 */ + if (cm->quirks & KNL_CORE_C6_MSR) + pkg_msr[PERF_CSTATE_CORE_C6_RES].msr = MSR_KNL_CORE_C6_RESIDENCY; + + has_cstate_core = cstate_probe_msr(cm->core_events, PERF_CSTATE_CORE_EVENT_MAX, core_msr, core_events_attrs); diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index fc6cf21..81b321a 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -458,8 +458,8 @@ void intel_pmu_lbr_del(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - if (branch_user_callstack(cpuc->br_sel) && event->ctx && - event->ctx->task_ctx_data) { + if (branch_user_callstack(cpuc->br_sel) && + event->ctx->task_ctx_data) { task_ctx = event->ctx->task_ctx_data; task_ctx->lbr_callstack_users--; } diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index b0f0e83..0a535ce 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -763,6 +763,7 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, hsw_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index d9844cc..efca268 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1349,6 +1349,7 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_X, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_BROADWELL_XEON_D, bdx_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNL, knl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_XEON_PHI_KNM, knl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 2599222..cddd5d0 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -193,6 +193,8 @@ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ +#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 9ae5ab8..34a46dc 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -64,5 +64,6 @@ /* Xeon Phi */ #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ +#define INTEL_FAM6_XEON_PHI_KNM 0x85 /* Knights Mill */ #endif /* _ASM_X86_INTEL_FAMILY_H */ diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index de25aad..d34bd37 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -351,4 +351,10 @@ extern void arch_phys_wc_del(int handle); #define arch_phys_wc_add arch_phys_wc_add #endif +#ifdef CONFIG_X86_PAT +extern int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size); +extern void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size); +#define arch_io_reserve_memtype_wc arch_io_reserve_memtype_wc +#endif + #endif /* _ASM_X86_IO_H */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 56f4c66..78f3760 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -88,7 +88,6 @@ #define MSR_IA32_RTIT_CTL 0x00000570 #define MSR_IA32_RTIT_STATUS 0x00000571 -#define MSR_IA32_RTIT_STATUS 0x00000571 #define MSR_IA32_RTIT_ADDR0_A 0x00000580 #define MSR_IA32_RTIT_ADDR0_B 0x00000581 #define MSR_IA32_RTIT_ADDR1_A 0x00000582 diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 3d33a71..a34e0d4 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -103,8 +103,10 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) ({ \ long tmp; \ struct rw_semaphore* ret; \ + register void *__sp asm(_ASM_SP); \ + \ asm volatile("# beginning down_write\n\t" \ - LOCK_PREFIX " xadd %1,(%3)\n\t" \ + LOCK_PREFIX " xadd %1,(%4)\n\t" \ /* adds 0xffff0001, returns the old value */ \ " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ /* was the active mask 0 before? */\ @@ -112,7 +114,7 @@ static inline bool __down_read_trylock(struct rw_semaphore *sem) " call " slow_path "\n" \ "1:\n" \ "# ending down_write" \ - : "+m" (sem->count), "=d" (tmp), "=a" (ret) \ + : "+m" (sem->count), "=d" (tmp), "=a" (ret), "+r" (__sp) \ : "a" (sem), "1" (RWSEM_ACTIVE_WRITE_BIAS) \ : "memory", "cc"); \ ret; \ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 2aaca53..ad6f5eb0 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -52,6 +52,15 @@ struct task_struct; #include <asm/cpufeature.h> #include <linux/atomic.h> +struct thread_info { + unsigned long flags; /* low level flags */ +}; + +#define INIT_THREAD_INFO(tsk) \ +{ \ + .flags = 0, \ +} + #define init_stack (init_thread_union.stack) #else /* !__ASSEMBLY__ */ diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 8a5abaa..931ced8 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -454,6 +454,7 @@ static void __init acpi_sci_ioapic_setup(u8 bus_irq, u16 polarity, u16 trigger, polarity = acpi_sci_flags & ACPI_MADT_POLARITY_MASK; mp_override_legacy_irq(bus_irq, polarity, trigger, gsi); + acpi_penalize_sci_irq(bus_irq, trigger, polarity); /* * stash over-ride to indicate we've been here diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 620ab06..017bda1 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -429,7 +429,7 @@ int __init save_microcode_in_initrd_amd(void) * We need the physical address of the container for both bitness since * boot_params.hdr.ramdisk_image is a physical address. */ - cont = __pa(container); + cont = __pa_nodebug(container); cont_va = container; #endif diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 8cb57df..1db8dc4 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -32,6 +32,8 @@ void init_scattered_cpuid_features(struct cpuinfo_x86 *c) static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_INTEL_PT, CR_EBX,25, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4VNNIW, CR_EDX, 2, 0x00000007, 0 }, + { X86_FEATURE_AVX512_4FMAPS, CR_EDX, 3, 0x00000007, 0 }, { X86_FEATURE_APERFMPERF, CR_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CR_ECX, 3, 0x00000006, 0 }, { X86_FEATURE_HW_PSTATE, CR_EDX, 7, 0x80000007, 0 }, diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 8116057..5130985 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -27,6 +27,7 @@ #include <asm/div64.h> #include <asm/x86_init.h> #include <asm/hypervisor.h> +#include <asm/timer.h> #include <asm/apic.h> #define CPUID_VMWARE_INFO_LEAF 0x40000000 @@ -94,6 +95,10 @@ static void __init vmware_platform_setup(void) } else { pr_warn("Failed to get TSC freq from the hypervisor\n"); } + +#ifdef CONFIG_X86_IO_APIC + no_timer_check = 1; +#endif } /* diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index b85fe5f..90e8dde 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -350,7 +350,7 @@ int __init sanitize_e820_map(struct e820entry *biosmap, int max_nr_map, * continue building up new bios map based on this * information */ - if (current_type != last_type) { + if (current_type != last_type || current_type == E820_PRAM) { if (last_type != 0) { new_bios[new_bios_entry].size = change_point[chgidx]->addr - last_addr; diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 17ad31f..c7c11cc 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -74,6 +74,8 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_MPX); setup_clear_cpu_cap(X86_FEATURE_XGETBV1); setup_clear_cpu_cap(X86_FEATURE_PKU); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); + setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); } /* diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 28cee01..d9d8d16 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -50,6 +50,7 @@ #include <linux/kallsyms.h> #include <linux/ftrace.h> #include <linux/frame.h> +#include <linux/kasan.h> #include <asm/text-patching.h> #include <asm/cacheflush.h> @@ -1057,9 +1058,10 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs) * tailcall optimization. So, to be absolutely safe * we also save and restore enough stack bytes to cover * the argument area. + * Use __memcpy() to avoid KASAN stack out-of-bounds reports as we copy + * raw stack chunk with redzones: */ - memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, - MIN_STACK_SIZE(addr)); + __memcpy(kcb->jprobes_stack, (kprobe_opcode_t *)addr, MIN_STACK_SIZE(addr)); regs->flags &= ~X86_EFLAGS_IF; trace_hardirqs_off(); regs->ip = (unsigned long)(jp->entry); @@ -1080,6 +1082,9 @@ void jprobe_return(void) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + /* Unpoison stack redzones in the frames we are going to jump over. */ + kasan_unpoison_stack_above_sp_to(kcb->jprobe_saved_sp); + asm volatile ( #ifdef CONFIG_X86_64 " xchg %%rbx,%%rsp \n" @@ -1118,7 +1123,7 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs) /* It's OK to start function graph tracing again */ unpause_graph_tracing(); *regs = kcb->jprobe_saved_regs; - memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); + __memcpy(saved_sp, kcb->jprobes_stack, MIN_STACK_SIZE(saved_sp)); preempt_enable_no_resched(); return 1; } diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S index efe73aa..7b0d3da 100644 --- a/arch/x86/kernel/mcount_64.S +++ b/arch/x86/kernel/mcount_64.S @@ -18,8 +18,10 @@ #ifdef CC_USING_FENTRY # define function_hook __fentry__ +EXPORT_SYMBOL(__fentry__) #else # define function_hook mcount +EXPORT_SYMBOL(mcount) #endif /* All cases save the original rbp (8 bytes) */ @@ -295,7 +297,6 @@ trace: jmp fgraph_trace END(function_hook) #endif /* CONFIG_DYNAMIC_FTRACE */ -EXPORT_SYMBOL(function_hook) #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_FUNCTION_GRAPH_TRACER diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 51402a7..0bee04d 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -625,8 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev) DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3, amd_disable_seq_and_redirect_scrub); -#endif - #if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE) #include <linux/jump_label.h> #include <asm/string_64.h> @@ -657,3 +655,4 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap); DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap); #endif +#endif diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index bbfbca5..9c337b0 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1221,11 +1221,16 @@ void __init setup_arch(char **cmdline_p) */ get_smp_config(); + /* + * Systems w/o ACPI and mptables might not have it mapped the local + * APIC yet, but prefill_possible_map() might need to access it. + */ + init_apic_mappings(); + prefill_possible_map(); init_cpu_to_node(); - init_apic_mappings(); io_apic_init_mappings(); kvm_guest_init(); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 40df337..ec1f756 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -105,9 +105,6 @@ void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact) /* Don't let flags to be set from userspace */ act->sa.sa_flags &= ~(SA_IA32_ABI | SA_X32_ABI); - if (user_64bit_mode(current_pt_regs())) - return; - if (in_ia32_syscall()) act->sa.sa_flags |= SA_IA32_ABI; if (in_x32_syscall()) diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 68f8cc2..c00cb64 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -261,8 +261,10 @@ static inline void __smp_reschedule_interrupt(void) __visible void smp_reschedule_interrupt(struct pt_regs *regs) { + irq_enter(); ack_APIC_irq(); __smp_reschedule_interrupt(); + irq_exit(); /* * KVM uses this interrupt to force a cpu out of guest mode */ diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 5943bb7..d29c852 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1409,15 +1409,17 @@ __init void prefill_possible_map(void) /* No boot processor was found in mptable or ACPI MADT */ if (!num_processors) { - int apicid = boot_cpu_physical_apicid; - int cpu = hard_smp_processor_id(); + if (boot_cpu_has(X86_FEATURE_APIC)) { + int apicid = boot_cpu_physical_apicid; + int cpu = hard_smp_processor_id(); - pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); + pr_warn("Boot CPU (id %d) not listed by BIOS\n", cpu); - /* Make sure boot cpu is enumerated */ - if (apic->cpu_present_to_apicid(0) == BAD_APICID && - apic->apic_id_valid(apicid)) - generic_processor_info(apicid, boot_cpu_apic_version); + /* Make sure boot cpu is enumerated */ + if (apic->cpu_present_to_apicid(0) == BAD_APICID && + apic->apic_id_valid(apicid)) + generic_processor_info(apicid, boot_cpu_apic_version); + } if (!num_processors) num_processors = 1; diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index c9a0738..a23ce84 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -57,7 +57,8 @@ static int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) unsigned char opcode[15]; unsigned long addr = convert_ip_to_linear(child, regs); - copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); + copied = access_process_vm(child, addr, opcode, sizeof(opcode), + FOLL_FORCE); for (i = 0; i < copied; i++) { switch (opcode[i]) { /* popf and iret */ diff --git a/arch/x86/kernel/unwind_guess.c b/arch/x86/kernel/unwind_guess.c index 9298993..2d721e5 100644 --- a/arch/x86/kernel/unwind_guess.c +++ b/arch/x86/kernel/unwind_guess.c @@ -47,7 +47,14 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task, get_stack_info(first_frame, state->task, &state->stack_info, &state->stack_mask); - if (!__kernel_text_address(*first_frame)) + /* + * The caller can provide the address of the first frame directly + * (first_frame) or indirectly (regs->sp) to indicate which stack frame + * to start unwinding at. Skip ahead until we reach it. + */ + if (!unwind_done(state) && + (!on_stack(&state->stack_info, first_frame, sizeof(long)) || + !__kernel_text_address(*first_frame))) unwind_next_frame(state); } EXPORT_SYMBOL_GPL(__unwind_start); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index c7220ba..1a22de7 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -594,7 +594,7 @@ static void kvm_ioapic_reset(struct kvm_ioapic *ioapic) ioapic->irr = 0; ioapic->irr_delivered = 0; ioapic->id = 0; - memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS); + memset(ioapic->irq_eoi, 0x00, sizeof(ioapic->irq_eoi)); rtc_irq_eoi_tracking_reset(ioapic); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d570026..7f9fa2d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5733,13 +5733,13 @@ static int kvmclock_cpu_online(unsigned int cpu) static void kvm_timer_init(void) { - int cpu; - max_tsc_khz = tsc_khz; if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) { #ifdef CONFIG_CPU_FREQ struct cpufreq_policy policy; + int cpu; + memset(&policy, 0, sizeof(policy)); cpu = get_cpu(); cpufreq_get_policy(&policy, cpu); diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index b8b6a60..0d4fb3e 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -435,7 +435,7 @@ slow_irqon: ret = get_user_pages_unlocked(start, (end - start) >> PAGE_SHIFT, - write, 0, pages); + pages, write ? FOLL_WRITE : 0); /* Have to be a bit careful with return values */ if (nr > 0) { diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index ddd2661..887e571 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -104,10 +104,10 @@ void __init kernel_randomize_memory(void) * consistent with the vaddr_start/vaddr_end variables. */ BUILD_BUG_ON(vaddr_start >= vaddr_end); - BUILD_BUG_ON(config_enabled(CONFIG_X86_ESPFIX64) && + BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && vaddr_end >= EFI_VA_START); - BUILD_BUG_ON((config_enabled(CONFIG_X86_ESPFIX64) || - config_enabled(CONFIG_EFI)) && + BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || + IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); BUILD_BUG_ON(vaddr_end > __START_KERNEL_map); diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 8047687..e4f8009 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -544,10 +544,9 @@ static int mpx_resolve_fault(long __user *addr, int write) { long gup_ret; int nr_pages = 1; - int force = 0; - gup_ret = get_user_pages((unsigned long)addr, nr_pages, write, - force, NULL, NULL); + gup_ret = get_user_pages((unsigned long)addr, nr_pages, + write ? FOLL_WRITE : 0, NULL, NULL); /* * get_user_pages() returns number of pages gotten. * 0 means we failed to fault in and get anything, diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index 170cc4f..83e701f 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -730,6 +730,20 @@ void io_free_memtype(resource_size_t start, resource_size_t end) free_memtype(start, end); } +int arch_io_reserve_memtype_wc(resource_size_t start, resource_size_t size) +{ + enum page_cache_mode type = _PAGE_CACHE_MODE_WC; + + return io_reserve_memtype(start, start + size, &type); +} +EXPORT_SYMBOL(arch_io_reserve_memtype_wc); + +void arch_io_free_memtype_wc(resource_size_t start, resource_size_t size) +{ + io_free_memtype(start, start + size); +} +EXPORT_SYMBOL(arch_io_free_memtype_wc); + pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, unsigned long size, pgprot_t vma_prot) { diff --git a/arch/x86/platform/uv/bios_uv.c b/arch/x86/platform/uv/bios_uv.c index b4d5e95..4a6a5a2 100644 --- a/arch/x86/platform/uv/bios_uv.c +++ b/arch/x86/platform/uv/bios_uv.c @@ -40,7 +40,15 @@ s64 uv_bios_call(enum uv_bios_cmd which, u64 a1, u64 a2, u64 a3, u64 a4, u64 a5) */ return BIOS_STATUS_UNIMPLEMENTED; - ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + /* + * If EFI_OLD_MEMMAP is set, we need to fall back to using our old EFI + * callback method, which uses efi_call() directly, with the kernel page tables: + */ + if (unlikely(test_bit(EFI_OLD_MEMMAP, &efi.flags))) + ret = efi_call((void *)__va(tab->function), (u64)which, a1, a2, a3, a4, a5); + else + ret = efi_call_virt_pointer(tab, function, (u64)which, a1, a2, a3, a4, a5); + return ret; } EXPORT_SYMBOL_GPL(uv_bios_call); diff --git a/arch/x86/um/ptrace_32.c b/arch/x86/um/ptrace_32.c index 5766ead..60a5a5a 100644 --- a/arch/x86/um/ptrace_32.c +++ b/arch/x86/um/ptrace_32.c @@ -36,7 +36,8 @@ int is_syscall(unsigned long addr) * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. */ - n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + n = access_process_vm(current, addr, &instr, sizeof(instr), + FOLL_FORCE); if (n != sizeof(instr)) { printk(KERN_ERR "is_syscall : failed to read " "instruction from 0x%lx\n", addr); diff --git a/arch/x86/um/ptrace_64.c b/arch/x86/um/ptrace_64.c index 0b5c184..e30202b 100644 --- a/arch/x86/um/ptrace_64.c +++ b/arch/x86/um/ptrace_64.c @@ -212,7 +212,8 @@ int is_syscall(unsigned long addr) * slow, but that doesn't matter, since it will be called only * in case of singlestepping, if copy_from_user failed. */ - n = access_process_vm(current, addr, &instr, sizeof(instr), 0); + n = access_process_vm(current, addr, &instr, sizeof(instr), + FOLL_FORCE); if (n != sizeof(instr)) { printk("is_syscall : failed to read instruction from " "0x%lx\n", addr); diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index c0fdd57..bdd8556 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1837,6 +1837,7 @@ static void __init init_hvm_pv_info(void) xen_domain_type = XEN_HVM_DOMAIN; } +#endif static int xen_cpu_up_prepare(unsigned int cpu) { @@ -1887,6 +1888,7 @@ static int xen_cpu_up_online(unsigned int cpu) return 0; } +#ifdef CONFIG_XEN_PVHVM #ifdef CONFIG_KEXEC_CORE static void xen_hvm_shutdown(void) { |