diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-24 18:37:03 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-02-24 18:37:03 -0800 |
commit | 9e314890292c0dd357eadef6a043704fa0b4c157 (patch) | |
tree | d70b074818b4dc45b180ea860ba66ce573129688 | |
parent | f8e6859ea9d06ae1565b21278c4e10fbce5f1eab (diff) | |
parent | a4d4426635804379d618dd28e29f574a2bc11184 (diff) | |
download | op-kernel-dev-9e314890292c0dd357eadef6a043704fa0b4c157.zip op-kernel-dev-9e314890292c0dd357eadef6a043704fa0b4c157.tar.gz |
Merge tag 'openrisc-for-linus' of git://github.com/openrisc/linux
Pull OpenRISC updates from Stafford Horne:
"Highlights include:
- optimized memset and memcpy routines, ~20% boot time saving
- support for cpu idling
- adding support for l.swa and l.lwa atomic operations (in spec from
2014)
- use atomics to implement: bitops, cmpxchg, futex
- the atomics are in preparation for SMP support"
* tag 'openrisc-for-linus' of git://github.com/openrisc/linux: (25 commits)
openrisc: head: Init r0 to 0 on start
openrisc: Export ioremap symbols used by modules
arch/openrisc/lib/memcpy.c: use correct OR1200 option
openrisc: head: Remove unused strings
openrisc: head: Move init strings to rodata section
openrisc: entry: Fix delay slot detection
openrisc: entry: Whitespace and comment cleanups
scripts/checkstack.pl: Add openrisc support
MAINTAINERS: Add the openrisc official repository
openrisc: Add .gitignore
openrisc: Add optimized memcpy routine
openrisc: Add optimized memset
openrisc: Initial support for the idle state
openrisc: Fix the bitmask for the unit present register
openrisc: remove unnecessary stddef.h include
openrisc: add futex_atomic_* implementations
openrisc: add optimized atomic operations
openrisc: add cmpxchg and xchg implementations
openrisc: add atomic bitops
openrisc: add l.lwa/l.swa emulation
...
26 files changed, 1064 insertions, 187 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 4b03c47..8f05fac 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9315,6 +9315,7 @@ OPENRISC ARCHITECTURE M: Jonas Bonn <jonas@southpole.se> M: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> M: Stafford Horne <shorne@gmail.com> +T: git git://github.com/openrisc/linux.git L: openrisc@lists.librecores.org W: http://openrisc.io S: Maintained diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index 8d22015..1e95920 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig @@ -12,6 +12,7 @@ config OPENRISC select HAVE_MEMBLOCK select GPIOLIB select HAVE_ARCH_TRACEHOOK + select SPARSE_IRQ select GENERIC_IRQ_CHIP select GENERIC_IRQ_PROBE select GENERIC_IRQ_SHOW diff --git a/arch/openrisc/TODO.openrisc b/arch/openrisc/TODO.openrisc index 0eb04c8..c43d4e1 100644 --- a/arch/openrisc/TODO.openrisc +++ b/arch/openrisc/TODO.openrisc @@ -10,4 +10,3 @@ that are due for investigation shortly, i.e. our TODO list: or1k and this change is slowly trickling through the stack. For the time being, or32 is equivalent to or1k. --- Implement optimized version of memcpy and memset diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index ef8d1cc..fb24175 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -1,7 +1,6 @@ header-y += ucontext.h -generic-y += atomic.h generic-y += auxvec.h generic-y += barrier.h generic-y += bitsperlong.h @@ -10,8 +9,6 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h -generic-y += cmpxchg-local.h -generic-y += cmpxchg.h generic-y += current.h generic-y += device.h generic-y += div64.h @@ -22,12 +19,12 @@ generic-y += exec.h generic-y += fb.h generic-y += fcntl.h generic-y += ftrace.h -generic-y += futex.h generic-y += hardirq.h generic-y += hw_irq.h generic-y += ioctl.h generic-y += ioctls.h generic-y += ipcbuf.h +generic-y += irq.h generic-y += irq_regs.h generic-y += irq_work.h generic-y += kdebug.h diff --git a/arch/openrisc/include/asm/atomic.h b/arch/openrisc/include/asm/atomic.h new file mode 100644 index 0000000..146e166 --- /dev/null +++ b/arch/openrisc/include/asm/atomic.h @@ -0,0 +1,126 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_ATOMIC_H +#define __ASM_OPENRISC_ATOMIC_H + +#include <linux/types.h> + +/* Atomically perform op with v->counter and i */ +#define ATOMIC_OP(op) \ +static inline void atomic_##op(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%1) \n" \ + " l." #op " %0,%0,%2 \n" \ + " l.swa 0(%1),%0 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ +} + +/* Atomically perform op with v->counter and i, return the result */ +#define ATOMIC_OP_RETURN(op) \ +static inline int atomic_##op##_return(int i, atomic_t *v) \ +{ \ + int tmp; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%1) \n" \ + " l." #op " %0,%0,%2 \n" \ + " l.swa 0(%1),%0 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ + \ + return tmp; \ +} + +/* Atomically perform op with v->counter and i, return orig v->counter */ +#define ATOMIC_FETCH_OP(op) \ +static inline int atomic_fetch_##op(int i, atomic_t *v) \ +{ \ + int tmp, old; \ + \ + __asm__ __volatile__( \ + "1: l.lwa %0,0(%2) \n" \ + " l." #op " %1,%0,%3 \n" \ + " l.swa 0(%2),%1 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + : "=&r"(old), "=&r"(tmp) \ + : "r"(&v->counter), "r"(i) \ + : "cc", "memory"); \ + \ + return old; \ +} + +ATOMIC_OP_RETURN(add) +ATOMIC_OP_RETURN(sub) + +ATOMIC_FETCH_OP(add) +ATOMIC_FETCH_OP(sub) +ATOMIC_FETCH_OP(and) +ATOMIC_FETCH_OP(or) +ATOMIC_FETCH_OP(xor) + +ATOMIC_OP(and) +ATOMIC_OP(or) +ATOMIC_OP(xor) + +#undef ATOMIC_FETCH_OP +#undef ATOMIC_OP_RETURN +#undef ATOMIC_OP + +#define atomic_add_return atomic_add_return +#define atomic_sub_return atomic_sub_return +#define atomic_fetch_add atomic_fetch_add +#define atomic_fetch_sub atomic_fetch_sub +#define atomic_fetch_and atomic_fetch_and +#define atomic_fetch_or atomic_fetch_or +#define atomic_fetch_xor atomic_fetch_xor +#define atomic_and atomic_and +#define atomic_or atomic_or +#define atomic_xor atomic_xor + +/* + * Atomically add a to v->counter as long as v is not already u. + * Returns the original value at v->counter. + * + * This is often used through atomic_inc_not_zero() + */ +static inline int __atomic_add_unless(atomic_t *v, int a, int u) +{ + int old, tmp; + + __asm__ __volatile__( + "1: l.lwa %0, 0(%2) \n" + " l.sfeq %0, %4 \n" + " l.bf 2f \n" + " l.add %1, %0, %3 \n" + " l.swa 0(%2), %1 \n" + " l.bnf 1b \n" + " l.nop \n" + "2: \n" + : "=&r"(old), "=&r" (tmp) + : "r"(&v->counter), "r"(a), "r"(u) + : "cc", "memory"); + + return old; +} +#define __atomic_add_unless __atomic_add_unless + +#include <asm-generic/atomic.h> + +#endif /* __ASM_OPENRISC_ATOMIC_H */ diff --git a/arch/openrisc/include/asm/bitops.h b/arch/openrisc/include/asm/bitops.h index 3003cda..689f568 100644 --- a/arch/openrisc/include/asm/bitops.h +++ b/arch/openrisc/include/asm/bitops.h @@ -45,7 +45,7 @@ #include <asm-generic/bitops/hweight.h> #include <asm-generic/bitops/lock.h> -#include <asm-generic/bitops/atomic.h> +#include <asm/bitops/atomic.h> #include <asm-generic/bitops/non-atomic.h> #include <asm-generic/bitops/le.h> #include <asm-generic/bitops/ext2-atomic.h> diff --git a/arch/openrisc/include/asm/bitops/atomic.h b/arch/openrisc/include/asm/bitops/atomic.h new file mode 100644 index 0000000..35fb85f --- /dev/null +++ b/arch/openrisc/include/asm/bitops/atomic.h @@ -0,0 +1,123 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_BITOPS_ATOMIC_H +#define __ASM_OPENRISC_BITOPS_ATOMIC_H + +static inline void set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.or %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); +} + +static inline void clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.and %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(~mask) + : "cc", "memory"); +} + +static inline void change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%1) \n" + " l.xor %0,%0,%2 \n" + " l.swa 0(%1),%0 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); +} + +static inline int test_and_set_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.or %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static inline int test_and_clear_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.and %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(~mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +static inline int test_and_change_bit(int nr, volatile unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + unsigned long old; + unsigned long tmp; + + __asm__ __volatile__( + "1: l.lwa %0,0(%2) \n" + " l.xor %1,%0,%3 \n" + " l.swa 0(%2),%1 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(old), "=&r"(tmp) + : "r"(p), "r"(mask) + : "cc", "memory"); + + return (old & mask) != 0; +} + +#endif /* __ASM_OPENRISC_BITOPS_ATOMIC_H */ diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h new file mode 100644 index 0000000..5fcb9ac --- /dev/null +++ b/arch/openrisc/include/asm/cmpxchg.h @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __ASM_OPENRISC_CMPXCHG_H +#define __ASM_OPENRISC_CMPXCHG_H + +#include <linux/types.h> + +/* + * This function doesn't exist, so you'll get a linker error + * if something tries to do an invalid cmpxchg(). + */ +extern void __cmpxchg_called_with_bad_pointer(void); + +#define __HAVE_ARCH_CMPXCHG 1 + +static inline unsigned long +__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) +{ + if (size != 4) { + __cmpxchg_called_with_bad_pointer(); + return old; + } + + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.sfeq %0, %2 \n" + " l.bnf 2f \n" + " l.nop \n" + " l.swa 0(%1), %3 \n" + " l.bnf 1b \n" + " l.nop \n" + "2: \n" + : "=&r"(old) + : "r"(ptr), "r"(old), "r"(new) + : "cc", "memory"); + + return old; +} + +#define cmpxchg(ptr, o, n) \ + ({ \ + (__typeof__(*(ptr))) __cmpxchg((ptr), \ + (unsigned long)(o), \ + (unsigned long)(n), \ + sizeof(*(ptr))); \ + }) + +/* + * This function doesn't exist, so you'll get a linker error if + * something tries to do an invalidly-sized xchg(). + */ +extern void __xchg_called_with_bad_pointer(void); + +static inline unsigned long __xchg(unsigned long val, volatile void *ptr, + int size) +{ + if (size != 4) { + __xchg_called_with_bad_pointer(); + return val; + } + + __asm__ __volatile__( + "1: l.lwa %0, 0(%1) \n" + " l.swa 0(%1), %2 \n" + " l.bnf 1b \n" + " l.nop \n" + : "=&r"(val) + : "r"(ptr), "r"(val) + : "cc", "memory"); + + return val; +} + +#define xchg(ptr, with) \ + ((typeof(*(ptr)))__xchg((unsigned long)(with), (ptr), sizeof(*(ptr)))) + +#endif /* __ASM_OPENRISC_CMPXCHG_H */ diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h index 917318b..ec10679 100644 --- a/arch/openrisc/include/asm/cpuinfo.h +++ b/arch/openrisc/include/asm/cpuinfo.h @@ -24,9 +24,11 @@ struct cpuinfo { u32 icache_size; u32 icache_block_size; + u32 icache_ways; u32 dcache_size; u32 dcache_block_size; + u32 dcache_ways; }; extern struct cpuinfo cpuinfo; diff --git a/arch/openrisc/include/asm/futex.h b/arch/openrisc/include/asm/futex.h new file mode 100644 index 0000000..7780873 --- /dev/null +++ b/arch/openrisc/include/asm/futex.h @@ -0,0 +1,135 @@ +#ifndef __ASM_OPENRISC_FUTEX_H +#define __ASM_OPENRISC_FUTEX_H + +#ifdef __KERNEL__ + +#include <linux/futex.h> +#include <linux/uaccess.h> +#include <asm/errno.h> + +#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg) \ +({ \ + __asm__ __volatile__ ( \ + "1: l.lwa %0, %2 \n" \ + insn "\n" \ + "2: l.swa %2, %1 \n" \ + " l.bnf 1b \n" \ + " l.ori %1, r0, 0 \n" \ + "3: \n" \ + ".section .fixup,\"ax\" \n" \ + "4: l.j 3b \n" \ + " l.addi %1, r0, %3 \n" \ + ".previous \n" \ + ".section __ex_table,\"a\" \n" \ + ".word 1b,4b,2b,4b \n" \ + ".previous \n" \ + : "=&r" (oldval), "=&r" (ret), "+m" (*uaddr) \ + : "i" (-EFAULT), "r" (oparg) \ + : "cc", "memory" \ + ); \ +}) + +static inline int +futex_atomic_op_inuser(int encoded_op, u32 __user *uaddr) +{ + int op = (encoded_op >> 28) & 7; + int cmp = (encoded_op >> 24) & 15; + int oparg = (encoded_op << 8) >> 20; + int cmparg = (encoded_op << 20) >> 20; + int oldval = 0, ret; + + if (encoded_op & (FUTEX_OP_OPARG_SHIFT << 28)) + oparg = 1 << oparg; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + pagefault_disable(); + + switch (op) { + case FUTEX_OP_SET: + __futex_atomic_op("l.or %1,%4,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ADD: + __futex_atomic_op("l.add %1,%0,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_OR: + __futex_atomic_op("l.or %1,%0,%4", ret, oldval, uaddr, oparg); + break; + case FUTEX_OP_ANDN: + __futex_atomic_op("l.and %1,%0,%4", ret, oldval, uaddr, ~oparg); + break; + case FUTEX_OP_XOR: + __futex_atomic_op("l.xor %1,%0,%4", ret, oldval, uaddr, oparg); + break; + default: + ret = -ENOSYS; + } + + pagefault_enable(); + + if (!ret) { + switch (cmp) { + case FUTEX_OP_CMP_EQ: + ret = (oldval == cmparg); + break; + case FUTEX_OP_CMP_NE: + ret = (oldval != cmparg); + break; + case FUTEX_OP_CMP_LT: + ret = (oldval < cmparg); + break; + case FUTEX_OP_CMP_GE: + ret = (oldval >= cmparg); + break; + case FUTEX_OP_CMP_LE: + ret = (oldval <= cmparg); + break; + case FUTEX_OP_CMP_GT: + ret = (oldval > cmparg); + break; + default: + ret = -ENOSYS; + } + } + return ret; +} + +static inline int +futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, + u32 oldval, u32 newval) +{ + int ret = 0; + u32 prev; + + if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32))) + return -EFAULT; + + __asm__ __volatile__ ( \ + "1: l.lwa %1, %2 \n" \ + " l.sfeq %1, %3 \n" \ + " l.bnf 3f \n" \ + " l.nop \n" \ + "2: l.swa %2, %4 \n" \ + " l.bnf 1b \n" \ + " l.nop \n" \ + "3: \n" \ + ".section .fixup,\"ax\" \n" \ + "4: l.j 3b \n" \ + " l.addi %0, r0, %5 \n" \ + ".previous \n" \ + ".section __ex_table,\"a\" \n" \ + ".word 1b,4b,2b,4b \n" \ + ".previous \n" \ + : "+r" (ret), "=&r" (prev), "+m" (*uaddr) \ + : "r" (oldval), "r" (newval), "i" (-EFAULT) \ + : "cc", "memory" \ + ); + + *uval = prev; + return ret; +} + +#endif /* __KERNEL__ */ + +#endif /* __ASM_OPENRISC_FUTEX_H */ diff --git a/arch/openrisc/include/asm/spr_defs.h b/arch/openrisc/include/asm/spr_defs.h index 5dbc668..367dac7 100644 --- a/arch/openrisc/include/asm/spr_defs.h +++ b/arch/openrisc/include/asm/spr_defs.h @@ -152,8 +152,8 @@ #define SPR_UPR_MP 0x00000020 /* MAC present */ #define SPR_UPR_DUP 0x00000040 /* Debug unit present */ #define SPR_UPR_PCUP 0x00000080 /* Performance counters unit present */ -#define SPR_UPR_PMP 0x00000100 /* Power management present */ -#define SPR_UPR_PICP 0x00000200 /* PIC present */ +#define SPR_UPR_PICP 0x00000100 /* PIC present */ +#define SPR_UPR_PMP 0x00000200 /* Power management present */ #define SPR_UPR_TTP 0x00000400 /* Tick timer present */ #define SPR_UPR_RES 0x00fe0000 /* Reserved */ #define SPR_UPR_CUP 0xff000000 /* Context units present */ diff --git a/arch/openrisc/include/asm/string.h b/arch/openrisc/include/asm/string.h new file mode 100644 index 0000000..64939cc --- /dev/null +++ b/arch/openrisc/include/asm/string.h @@ -0,0 +1,10 @@ +#ifndef __ASM_OPENRISC_STRING_H +#define __ASM_OPENRISC_STRING_H + +#define __HAVE_ARCH_MEMSET +extern void *memset(void *s, int c, __kernel_size_t n); + +#define __HAVE_ARCH_MEMCPY +extern void *memcpy(void *dest, __const void *src, __kernel_size_t n); + +#endif /* __ASM_OPENRISC_STRING_H */ diff --git a/arch/openrisc/kernel/.gitignore b/arch/openrisc/kernel/.gitignore new file mode 100644 index 0000000..c5f676c --- /dev/null +++ b/arch/openrisc/kernel/.gitignore @@ -0,0 +1 @@ +vmlinux.lds diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S index aac0bde..bc65008 100644 --- a/arch/openrisc/kernel/entry.S +++ b/arch/openrisc/kernel/entry.S @@ -173,6 +173,11 @@ handler: ;\ l.j _ret_from_exception ;\ l.nop +/* clobbers 'reg' */ +#define CLEAR_LWA_FLAG(reg) \ + l.movhi reg,hi(lwa_flag) ;\ + l.ori reg,reg,lo(lwa_flag) ;\ + l.sw 0(reg),r0 /* * NOTE: one should never assume that SPR_EPC, SPR_ESR, SPR_EEAR * contain the same values as when exception we're handling @@ -193,6 +198,7 @@ EXCEPTION_ENTRY(_tng_kernel_start) /* ---[ 0x200: BUS exception ]------------------------------------------- */ EXCEPTION_ENTRY(_bus_fault_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_bus_fault l.addi r3,r1,0 /* pt_regs */ @@ -202,11 +208,13 @@ EXCEPTION_ENTRY(_bus_fault_handler) /* ---[ 0x300: Data Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_dtlb_miss_page_fault_handler) + CLEAR_LWA_FLAG(r3) l.and r5,r5,r0 l.j 1f l.nop EXCEPTION_ENTRY(_data_page_fault_handler) + CLEAR_LWA_FLAG(r3) /* set up parameters for do_page_fault */ l.ori r5,r0,0x300 // exception vector 1: @@ -220,7 +228,7 @@ EXCEPTION_ENTRY(_data_page_fault_handler) * DTLB miss handler in the CONFIG_GUARD_PROTECTED_CORE part */ #ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX - l.lwz r6,PT_PC(r3) // address of an offending insn + l.lwz r6,PT_PC(r3) // address of an offending insn l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // check opcode for jump insn @@ -236,57 +244,57 @@ EXCEPTION_ENTRY(_data_page_fault_handler) l.bf 8f l.sfeqi r6,0x12 // l.jalr l.bf 8f - - l.nop + l.nop l.j 9f - l.nop -8: + l.nop - l.lwz r6,PT_PC(r3) // address of an offending insn +8: // offending insn is in delay slot + l.lwz r6,PT_PC(r3) // address of an offending insn l.addi r6,r6,4 l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // get opcode -9: +9: // offending instruction opcode loaded in r6 #else - l.mfspr r6,r0,SPR_SR // SR -// l.lwz r6,PT_SR(r3) // ESR - l.andi r6,r6,SPR_SR_DSX // check for delay slot exception - l.sfeqi r6,0x1 // exception happened in delay slot - l.bnf 7f - l.lwz r6,PT_PC(r3) // address of an offending insn + l.lwz r6,PT_SR(r3) // SR + l.andi r6,r6,SPR_SR_DSX // check for delay slot exception + l.sfne r6,r0 // exception happened in delay slot + l.bnf 7f + l.lwz r6,PT_PC(r3) // address of an offending insn - l.addi r6,r6,4 // offending insn is in delay slot + l.addi r6,r6,4 // offending insn is in delay slot 7: l.lwz r6,0(r6) // instruction that caused pf l.srli r6,r6,26 // check opcode for write access #endif - l.sfgeui r6,0x33 // check opcode for write access + l.sfgeui r6,0x33 // check opcode for write access l.bnf 1f l.sfleui r6,0x37 l.bnf 1f l.ori r6,r0,0x1 // write access l.j 2f - l.nop + l.nop 1: l.ori r6,r0,0x0 // !write access 2: /* call fault.c handler in or32/mm/fault.c */ l.jal do_page_fault - l.nop + l.nop l.j _ret_from_exception - l.nop + l.nop /* ---[ 0x400: Insn Page Fault exception ]------------------------------- */ EXCEPTION_ENTRY(_itlb_miss_page_fault_handler) + CLEAR_LWA_FLAG(r3) l.and r5,r5,r0 l.j 1f l.nop EXCEPTION_ENTRY(_insn_page_fault_handler) + CLEAR_LWA_FLAG(r3) /* set up parameters for do_page_fault */ l.ori r5,r0,0x400 // exception vector 1: @@ -296,14 +304,15 @@ EXCEPTION_ENTRY(_insn_page_fault_handler) /* call fault.c handler in or32/mm/fault.c */ l.jal do_page_fault - l.nop + l.nop l.j _ret_from_exception - l.nop + l.nop /* ---[ 0x500: Timer exception ]----------------------------------------- */ EXCEPTION_ENTRY(_timer_handler) + CLEAR_LWA_FLAG(r3) l.jal timer_interrupt l.addi r3,r1,0 /* pt_regs */ @@ -313,6 +322,7 @@ EXCEPTION_ENTRY(_timer_handler) /* ---[ 0x600: Aligment exception ]-------------------------------------- */ EXCEPTION_ENTRY(_alignment_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_unaligned_access l.addi r3,r1,0 /* pt_regs */ @@ -509,6 +519,7 @@ EXCEPTION_ENTRY(_external_irq_handler) // l.sw PT_SR(r1),r4 1: #endif + CLEAR_LWA_FLAG(r3) l.addi r3,r1,0 l.movhi r8,hi(do_IRQ) l.ori r8,r8,lo(do_IRQ) @@ -556,8 +567,12 @@ ENTRY(_sys_call_handler) * they should be clobbered, otherwise */ l.sw PT_GPR3(r1),r3 - /* r4 already saved */ - /* r4 holds the EEAR address of the fault, load the original r4 */ + /* + * r4 already saved + * r4 holds the EEAR address of the fault, use it as screatch reg and + * then load the original r4 + */ + CLEAR_LWA_FLAG(r4) l.lwz r4,PT_GPR4(r1) l.sw PT_GPR5(r1),r5 l.sw PT_GPR6(r1),r6 @@ -776,6 +791,7 @@ UNHANDLED_EXCEPTION(_vector_0xd00,0xd00) /* ---[ 0xe00: Trap exception ]------------------------------------------ */ EXCEPTION_ENTRY(_trap_handler) + CLEAR_LWA_FLAG(r3) /* r4: EA of fault (set by EXCEPTION_HANDLE) */ l.jal do_trap l.addi r3,r1,0 /* pt_regs */ diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S index f147933..d01b82e 100644 --- a/arch/openrisc/kernel/head.S +++ b/arch/openrisc/kernel/head.S @@ -24,6 +24,7 @@ #include <asm/page.h> #include <asm/mmu.h> #include <asm/pgtable.h> +#include <asm/thread_info.h> #include <asm/cache.h> #include <asm/spr_defs.h> #include <asm/asm-offsets.h> @@ -34,7 +35,7 @@ l.add rd,rd,rs #define CLEAR_GPR(gpr) \ - l.or gpr,r0,r0 + l.movhi gpr,0x0 #define LOAD_SYMBOL_2_GPR(gpr,symbol) \ l.movhi gpr,hi(symbol) ;\ @@ -442,6 +443,9 @@ _dispatch_do_ipage_fault: __HEAD .global _start _start: + /* Init r0 to zero as per spec */ + CLEAR_GPR(r0) + /* save kernel parameters */ l.or r25,r0,r3 /* pointer to fdt */ @@ -486,7 +490,8 @@ _start: /* * set up initial ksp and current */ - LOAD_SYMBOL_2_GPR(r1,init_thread_union+0x2000) // setup kernel stack + /* setup kernel stack */ + LOAD_SYMBOL_2_GPR(r1,init_thread_union + THREAD_SIZE) LOAD_SYMBOL_2_GPR(r10,init_thread_union) // setup current tophys (r31,r10) l.sw TI_KSP(r31), r1 @@ -520,22 +525,8 @@ enable_dc: l.nop flush_tlb: - /* - * I N V A L I D A T E T L B e n t r i e s - */ - LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0)) - LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0)) - l.addi r7,r0,128 /* Maximum number of sets */ -1: - l.mtspr r5,r0,0x0 - l.mtspr r6,r0,0x0 - - l.addi r5,r5,1 - l.addi r6,r6,1 - l.sfeq r7,r0 - l.bnf 1b - l.addi r7,r7,-1 - + l.jal _flush_tlb + l.nop /* The MMU needs to be enabled before or32_early_setup is called */ @@ -627,6 +618,26 @@ jump_start_kernel: l.jr r30 l.nop +_flush_tlb: + /* + * I N V A L I D A T E T L B e n t r i e s + */ + LOAD_SYMBOL_2_GPR(r5,SPR_DTLBMR_BASE(0)) + LOAD_SYMBOL_2_GPR(r6,SPR_ITLBMR_BASE(0)) + l.addi r7,r0,128 /* Maximum number of sets */ +1: + l.mtspr r5,r0,0x0 + l.mtspr r6,r0,0x0 + + l.addi r5,r5,1 + l.addi r6,r6,1 + l.sfeq r7,r0 + l.bnf 1b + l.addi r7,r7,-1 + + l.jr r9 + l.nop + /* ========================================[ cache ]=== */ /* aligment here so we don't change memory offsets with @@ -971,8 +982,6 @@ ENTRY(dtlb_miss_handler) EXCEPTION_STORE_GPR2 EXCEPTION_STORE_GPR3 EXCEPTION_STORE_GPR4 - EXCEPTION_STORE_GPR5 - EXCEPTION_STORE_GPR6 /* * get EA of the miss */ @@ -980,91 +989,70 @@ ENTRY(dtlb_miss_handler) /* * pmd = (pmd_t *)(current_pgd + pgd_index(daddr)); */ - GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp + GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r4 is temp l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2) l.slli r4,r4,0x2 // to get address << 2 - l.add r5,r4,r3 // r4 is pgd_index(daddr) + l.add r3,r4,r3 // r4 is pgd_index(daddr) /* * if (pmd_none(*pmd)) * goto pmd_none: */ - tophys (r4,r5) + tophys (r4,r3) l.lwz r3,0x0(r4) // get *pmd value l.sfne r3,r0 l.bnf d_pmd_none - l.andi r3,r3,~PAGE_MASK //0x1fff // ~PAGE_MASK - /* - * if (pmd_bad(*pmd)) - * pmd_clear(pmd) - * goto pmd_bad: - */ -// l.sfeq r3,r0 // check *pmd value -// l.bf d_pmd_good - l.addi r3,r0,0xffffe000 // PAGE_MASK -// l.j d_pmd_bad -// l.sw 0x0(r4),r0 // clear pmd + l.addi r3,r0,0xffffe000 // PAGE_MASK + d_pmd_good: /* * pte = *pte_offset(pmd, daddr); */ l.lwz r4,0x0(r4) // get **pmd value l.and r4,r4,r3 // & PAGE_MASK - l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR - l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 + l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR + l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 l.slli r3,r3,0x2 // to get address << 2 l.add r3,r3,r4 - l.lwz r2,0x0(r3) // this is pte at last + l.lwz r3,0x0(r3) // this is pte at last /* * if (!pte_present(pte)) */ - l.andi r4,r2,0x1 + l.andi r4,r3,0x1 l.sfne r4,r0 // is pte present l.bnf d_pte_not_present - l.addi r3,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK + l.addi r4,r0,0xffffe3fa // PAGE_MASK | DTLB_UP_CONVERT_MASK /* * fill DTLB TR register */ - l.and r4,r2,r3 // apply the mask + l.and r4,r3,r4 // apply the mask // Determine number of DMMU sets - l.mfspr r6, r0, SPR_DMMUCFGR - l.andi r6, r6, SPR_DMMUCFGR_NTS - l.srli r6, r6, SPR_DMMUCFGR_NTS_OFF + l.mfspr r2, r0, SPR_DMMUCFGR + l.andi r2, r2, SPR_DMMUCFGR_NTS + l.srli r2, r2, SPR_DMMUCFGR_NTS_OFF l.ori r3, r0, 0x1 - l.sll r3, r3, r6 // r3 = number DMMU sets DMMUCFGR - l.addi r6, r3, -1 // r6 = nsets mask - l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1) + l.sll r3, r3, r2 // r3 = number DMMU sets DMMUCFGR + l.addi r2, r3, -1 // r2 = nsets mask + l.mfspr r3, r0, SPR_EEAR_BASE + l.srli r3, r3, 0xd // >> PAGE_SHIFT + l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1) //NUM_TLB_ENTRIES - l.mtspr r5,r4,SPR_DTLBTR_BASE(0) + l.mtspr r2,r4,SPR_DTLBTR_BASE(0) /* * fill DTLB MR register */ - l.mfspr r2,r0,SPR_EEAR_BASE - l.addi r3,r0,0xffffe000 // PAGE_MASK - l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?) - l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry - l.mtspr r5,r4,SPR_DTLBMR_BASE(0) + l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */ + l.ori r4,r3,0x1 // set hardware valid bit: DTBL_MR entry + l.mtspr r2,r4,SPR_DTLBMR_BASE(0) EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 - l.rfe -d_pmd_bad: - l.nop 1 - EXCEPTION_LOAD_GPR2 - EXCEPTION_LOAD_GPR3 - EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 l.rfe d_pmd_none: d_pte_not_present: EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 EXCEPTION_HANDLE(_dtlb_miss_page_fault_handler) /* ==============================================[ ITLB miss handler ]=== */ @@ -1072,8 +1060,6 @@ ENTRY(itlb_miss_handler) EXCEPTION_STORE_GPR2 EXCEPTION_STORE_GPR3 EXCEPTION_STORE_GPR4 - EXCEPTION_STORE_GPR5 - EXCEPTION_STORE_GPR6 /* * get EA of the miss */ @@ -1083,30 +1069,19 @@ ENTRY(itlb_miss_handler) * pmd = (pmd_t *)(current_pgd + pgd_index(daddr)); * */ - GET_CURRENT_PGD(r3,r5) // r3 is current_pgd, r5 is temp + GET_CURRENT_PGD(r3,r4) // r3 is current_pgd, r5 is temp l.srli r4,r2,0x18 // >> PAGE_SHIFT + (PAGE_SHIFT - 2) l.slli r4,r4,0x2 // to get address << 2 - l.add r5,r4,r3 // r4 is pgd_index(daddr) + l.add r3,r4,r3 // r4 is pgd_index(daddr) /* * if (pmd_none(*pmd)) * goto pmd_none: */ - tophys (r4,r5) + tophys (r4,r3) l.lwz r3,0x0(r4) // get *pmd value l.sfne r3,r0 l.bnf i_pmd_none - l.andi r3,r3,0x1fff // ~PAGE_MASK - /* - * if (pmd_bad(*pmd)) - * pmd_clear(pmd) - * goto pmd_bad: - */ - -// l.sfeq r3,r0 // check *pmd value -// l.bf i_pmd_good - l.addi r3,r0,0xffffe000 // PAGE_MASK -// l.j i_pmd_bad -// l.sw 0x0(r4),r0 // clear pmd + l.addi r3,r0,0xffffe000 // PAGE_MASK i_pmd_good: /* @@ -1115,35 +1090,36 @@ i_pmd_good: */ l.lwz r4,0x0(r4) // get **pmd value l.and r4,r4,r3 // & PAGE_MASK - l.srli r5,r2,0xd // >> PAGE_SHIFT, r2 == EEAR - l.andi r3,r5,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 + l.srli r2,r2,0xd // >> PAGE_SHIFT, r2 == EEAR + l.andi r3,r2,0x7ff // (1UL << PAGE_SHIFT - 2) - 1 l.slli r3,r3,0x2 // to get address << 2 l.add r3,r3,r4 - l.lwz r2,0x0(r3) // this is pte at last + l.lwz r3,0x0(r3) // this is pte at last /* * if (!pte_present(pte)) * */ - l.andi r4,r2,0x1 + l.andi r4,r3,0x1 l.sfne r4,r0 // is pte present l.bnf i_pte_not_present - l.addi r3,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK + l.addi r4,r0,0xffffe03a // PAGE_MASK | ITLB_UP_CONVERT_MASK /* * fill ITLB TR register */ - l.and r4,r2,r3 // apply the mask - l.andi r3,r2,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE -// l.andi r3,r2,0x400 // _PAGE_EXEC + l.and r4,r3,r4 // apply the mask + l.andi r3,r3,0x7c0 // _PAGE_EXEC | _PAGE_SRE | _PAGE_SWE | _PAGE_URE | _PAGE_UWE l.sfeq r3,r0 l.bf itlb_tr_fill //_workaround // Determine number of IMMU sets - l.mfspr r6, r0, SPR_IMMUCFGR - l.andi r6, r6, SPR_IMMUCFGR_NTS - l.srli r6, r6, SPR_IMMUCFGR_NTS_OFF + l.mfspr r2, r0, SPR_IMMUCFGR + l.andi r2, r2, SPR_IMMUCFGR_NTS + l.srli r2, r2, SPR_IMMUCFGR_NTS_OFF l.ori r3, r0, 0x1 - l.sll r3, r3, r6 // r3 = number IMMU sets IMMUCFGR - l.addi r6, r3, -1 // r6 = nsets mask - l.and r5, r5, r6 // calc offset: & (NUM_TLB_ENTRIES-1) + l.sll r3, r3, r2 // r3 = number IMMU sets IMMUCFGR + l.addi r2, r3, -1 // r2 = nsets mask + l.mfspr r3, r0, SPR_EEAR_BASE + l.srli r3, r3, 0xd // >> PAGE_SHIFT + l.and r2, r3, r2 // calc offset: & (NUM_TLB_ENTRIES-1) /* * __PHX__ :: fixme @@ -1155,38 +1131,24 @@ i_pmd_good: itlb_tr_fill_workaround: l.ori r4,r4,0xc0 // | (SPR_ITLBTR_UXE | ITLBTR_SXE) itlb_tr_fill: - l.mtspr r5,r4,SPR_ITLBTR_BASE(0) + l.mtspr r2,r4,SPR_ITLBTR_BASE(0) /* * fill DTLB MR register */ - l.mfspr r2,r0,SPR_EEAR_BASE - l.addi r3,r0,0xffffe000 // PAGE_MASK - l.and r4,r2,r3 // apply PAGE_MASK to EA (__PHX__ do we really need this?) - l.ori r4,r4,0x1 // set hardware valid bit: DTBL_MR entry - l.mtspr r5,r4,SPR_ITLBMR_BASE(0) + l.slli r3, r3, 0xd /* << PAGE_SHIFT => EA & PAGE_MASK */ + l.ori r4,r3,0x1 // set hardware valid bit: ITBL_MR entry + l.mtspr r2,r4,SPR_ITLBMR_BASE(0) EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 l.rfe -i_pmd_bad: - l.nop 1 - EXCEPTION_LOAD_GPR2 - EXCEPTION_LOAD_GPR3 - EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 - l.rfe i_pmd_none: i_pte_not_present: EXCEPTION_LOAD_GPR2 EXCEPTION_LOAD_GPR3 EXCEPTION_LOAD_GPR4 - EXCEPTION_LOAD_GPR5 - EXCEPTION_LOAD_GPR6 EXCEPTION_HANDLE(_itlb_miss_page_fault_handler) /* ==============================================[ boot tlb handlers ]=== */ @@ -1571,12 +1533,7 @@ ENTRY(_early_uart_init) l.jr r9 l.nop -_string_copying_linux: - .string "\n\n\n\n\n\rCopying Linux... \0" - -_string_ok_booting: - .string "Ok, booting the kernel.\n\r\0" - + .section .rodata _string_unhandled_exception: .string "\n\rRunarunaround: Unhandled exception 0x\0" @@ -1586,11 +1543,6 @@ _string_epc_prefix: _string_nl: .string "\n\r\0" - .global _string_esr_irq_bug -_string_esr_irq_bug: - .string "\n\rESR external interrupt bug, for details look into entry.S\n\r\0" - - /* ========================================[ page aligned structures ]=== */ diff --git a/arch/openrisc/kernel/or32_ksyms.c b/arch/openrisc/kernel/or32_ksyms.c index 86e31cf..5c4695d 100644 --- a/arch/openrisc/kernel/or32_ksyms.c +++ b/arch/openrisc/kernel/or32_ksyms.c @@ -44,3 +44,4 @@ DECLARE_EXPORT(__ashldi3); DECLARE_EXPORT(__lshrdi3); EXPORT_SYMBOL(__copy_tofrom_user); +EXPORT_SYMBOL(memset); diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index d7990df..6e9d1cb 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -75,6 +75,17 @@ void machine_power_off(void) __asm__("l.nop 1"); } +/* + * Send the doze signal to the cpu if available. + * Make sure, that all interrupts are enabled + */ +void arch_cpu_idle(void) +{ + local_irq_enable(); + if (mfspr(SPR_UPR) & SPR_UPR_PMP) + mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME); +} + void (*pm_power_off) (void) = machine_power_off; /* @@ -226,6 +237,7 @@ int dump_fpu(struct pt_regs *regs, elf_fpregset_t * fpu) extern struct thread_info *_switch(struct thread_info *old_ti, struct thread_info *new_ti); +extern int lwa_flag; struct task_struct *__switch_to(struct task_struct *old, struct task_struct *new) @@ -243,6 +255,8 @@ struct task_struct *__switch_to(struct task_struct *old, new_ti = new->stack; old_ti = old->stack; + lwa_flag = 0; + current_thread_info_set[smp_processor_id()] = new_ti; last = (_switch(old_ti, new_ti))->task; diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 4f59fa4..2282888 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -16,7 +16,6 @@ * 2 of the License, or (at your option) any later version. */ -#include <stddef.h> #include <linux/kernel.h> #include <linux/sched.h> #include <linux/string.h> diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index cb797a3..dbf5ee9 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -117,13 +117,15 @@ static void print_cpuinfo(void) if (upr & SPR_UPR_DCP) printk(KERN_INFO "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.dcache_size, cpuinfo.dcache_block_size, 1); + cpuinfo.dcache_size, cpuinfo.dcache_block_size, + cpuinfo.dcache_ways); else printk(KERN_INFO "-- dcache disabled\n"); if (upr & SPR_UPR_ICP) printk(KERN_INFO "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n", - cpuinfo.icache_size, cpuinfo.icache_block_size, 1); + cpuinfo.icache_size, cpuinfo.icache_block_size, + cpuinfo.icache_ways); else printk(KERN_INFO "-- icache disabled\n"); @@ -155,25 +157,25 @@ void __init setup_cpuinfo(void) { struct device_node *cpu; unsigned long iccfgr, dccfgr; - unsigned long cache_set_size, cache_ways; + unsigned long cache_set_size; cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481"); if (!cpu) panic("No compatible CPU found in device tree...\n"); iccfgr = mfspr(SPR_ICCFGR); - cache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); + cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW); cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3); cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7); cpuinfo.icache_size = - cache_set_size * cache_ways * cpuinfo.icache_block_size; + cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size; dccfgr = mfspr(SPR_DCCFGR); - cache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); + cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW); cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3); cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7); cpuinfo.dcache_size = - cache_set_size * cache_ways * cpuinfo.dcache_block_size; + cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size; if (of_property_read_u32(cpu, "clock-frequency", &cpuinfo.clock_frequency)) { @@ -308,30 +310,33 @@ static int show_cpuinfo(struct seq_file *m, void *v) revision = vr & SPR_VR_REV; seq_printf(m, - "cpu\t\t: OpenRISC-%x\n" - "revision\t: %d\n" - "frequency\t: %ld\n" - "dcache size\t: %d bytes\n" - "dcache block size\t: %d bytes\n" - "icache size\t: %d bytes\n" - "icache block size\t: %d bytes\n" - "immu\t\t: %d entries, %lu ways\n" - "dmmu\t\t: %d entries, %lu ways\n" - "bogomips\t: %lu.%02lu\n", - version, - revision, - loops_per_jiffy * HZ, - cpuinfo.dcache_size, - cpuinfo.dcache_block_size, - cpuinfo.icache_size, - cpuinfo.icache_block_size, - 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), - 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), - 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), - (loops_per_jiffy * HZ) / 500000, - ((loops_per_jiffy * HZ) / 5000) % 100); - + "cpu\t\t: OpenRISC-%x\n" + "revision\t: %d\n" + "frequency\t: %ld\n" + "dcache size\t: %d bytes\n" + "dcache block size\t: %d bytes\n" + "dcache ways\t: %d\n" + "icache size\t: %d bytes\n" + "icache block size\t: %d bytes\n" + "icache ways\t: %d\n" + "immu\t\t: %d entries, %lu ways\n" + "dmmu\t\t: %d entries, %lu ways\n" + "bogomips\t: %lu.%02lu\n", + version, + revision, + loops_per_jiffy * HZ, + cpuinfo.dcache_size, + cpuinfo.dcache_block_size, + cpuinfo.dcache_ways, + cpuinfo.icache_size, + cpuinfo.icache_block_size, + cpuinfo.icache_ways, + 1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW), + 1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2), + 1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW), + (loops_per_jiffy * HZ) / 500000, + ((loops_per_jiffy * HZ) / 5000) % 100); return 0; } diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index d29c41b..7e81ad2 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -40,6 +40,8 @@ extern char _etext, _stext; int kstack_depth_to_print = 0x180; +int lwa_flag; +unsigned long __user *lwa_addr; static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) { @@ -334,10 +336,191 @@ asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) } } +static inline int in_delay_slot(struct pt_regs *regs) +{ +#ifdef CONFIG_OPENRISC_NO_SPR_SR_DSX + /* No delay slot flag, do the old way */ + unsigned int op, insn; + + insn = *((unsigned int *)regs->pc); + op = insn >> 26; + switch (op) { + case 0x00: /* l.j */ + case 0x01: /* l.jal */ + case 0x03: /* l.bnf */ + case 0x04: /* l.bf */ + case 0x11: /* l.jr */ + case 0x12: /* l.jalr */ + return 1; + default: + return 0; + } +#else + return regs->sr & SPR_SR_DSX; +#endif +} + +static inline void adjust_pc(struct pt_regs *regs, unsigned long address) +{ + int displacement; + unsigned int rb, op, jmp; + + if (unlikely(in_delay_slot(regs))) { + /* In delay slot, instruction at pc is a branch, simulate it */ + jmp = *((unsigned int *)regs->pc); + + displacement = sign_extend32(((jmp) & 0x3ffffff) << 2, 27); + rb = (jmp & 0x0000ffff) >> 11; + op = jmp >> 26; + + switch (op) { + case 0x00: /* l.j */ + regs->pc += displacement; + return; + case 0x01: /* l.jal */ + regs->pc += displacement; + regs->gpr[9] = regs->pc + 8; + return; + case 0x03: /* l.bnf */ + if (regs->sr & SPR_SR_F) + regs->pc += 8; + else + regs->pc += displacement; + return; + case 0x04: /* l.bf */ + if (regs->sr & SPR_SR_F) + regs->pc += displacement; + else + regs->pc += 8; + return; + case 0x11: /* l.jr */ + regs->pc = regs->gpr[rb]; + return; + case 0x12: /* l.jalr */ + regs->pc = regs->gpr[rb]; + regs->gpr[9] = regs->pc + 8; + return; + default: + break; + } + } else { + regs->pc += 4; + } +} + +static inline void simulate_lwa(struct pt_regs *regs, unsigned long address, + unsigned int insn) +{ + unsigned int ra, rd; + unsigned long value; + unsigned long orig_pc; + long imm; + + const struct exception_table_entry *entry; + + orig_pc = regs->pc; + adjust_pc(regs, address); + + ra = (insn >> 16) & 0x1f; + rd = (insn >> 21) & 0x1f; + imm = (short)insn; + lwa_addr = (unsigned long __user *)(regs->gpr[ra] + imm); + + if ((unsigned long)lwa_addr & 0x3) { + do_unaligned_access(regs, address); + return; + } + + if (get_user(value, lwa_addr)) { + if (user_mode(regs)) { + force_sig(SIGSEGV, current); + return; + } + + if ((entry = search_exception_tables(orig_pc))) { + regs->pc = entry->fixup; + return; + } + + /* kernel access in kernel space, load it directly */ + value = *((unsigned long *)lwa_addr); + } + + lwa_flag = 1; + regs->gpr[rd] = value; +} + +static inline void simulate_swa(struct pt_regs *regs, unsigned long address, + unsigned int insn) +{ + unsigned long __user *vaddr; + unsigned long orig_pc; + unsigned int ra, rb; + long imm; + + const struct exception_table_entry *entry; + + orig_pc = regs->pc; + adjust_pc(regs, address); + + ra = (insn >> 16) & 0x1f; + rb = (insn >> 11) & 0x1f; + imm = (short)(((insn & 0x2200000) >> 10) | (insn & 0x7ff)); + vaddr = (unsigned long __user *)(regs->gpr[ra] + imm); + + if (!lwa_flag || vaddr != lwa_addr) { + regs->sr &= ~SPR_SR_F; + return; + } + + if ((unsigned long)vaddr & 0x3) { + do_unaligned_access(regs, address); + return; + } + + if (put_user(regs->gpr[rb], vaddr)) { + if (user_mode(regs)) { + force_sig(SIGSEGV, current); + return; + } + + if ((entry = search_exception_tables(orig_pc))) { + regs->pc = entry->fixup; + return; + } + + /* kernel access in kernel space, store it directly */ + *((unsigned long *)vaddr) = regs->gpr[rb]; + } + + lwa_flag = 0; + regs->sr |= SPR_SR_F; +} + +#define INSN_LWA 0x1b +#define INSN_SWA 0x33 + asmlinkage void do_illegal_instruction(struct pt_regs *regs, unsigned long address) { siginfo_t info; + unsigned int op; + unsigned int insn = *((unsigned int *)address); + + op = insn >> 26; + + switch (op) { + case INSN_LWA: + simulate_lwa(regs, address, insn); + return; + + case INSN_SWA: + simulate_swa(regs, address, insn); + return; + + default: + break; + } if (user_mode(regs)) { /* Send a SIGILL */ diff --git a/arch/openrisc/lib/Makefile b/arch/openrisc/lib/Makefile index 966f65d..17d9d37 100644 --- a/arch/openrisc/lib/Makefile +++ b/arch/openrisc/lib/Makefile @@ -2,4 +2,4 @@ # Makefile for or32 specific library files.. # -obj-y = string.o delay.o +obj-y := delay.o string.o memset.o memcpy.o diff --git a/arch/openrisc/lib/memcpy.c b/arch/openrisc/lib/memcpy.c new file mode 100644 index 0000000..669887a --- /dev/null +++ b/arch/openrisc/lib/memcpy.c @@ -0,0 +1,124 @@ +/* + * arch/openrisc/lib/memcpy.c + * + * Optimized memory copy routines for openrisc. These are mostly copied + * from ohter sources but slightly entended based on ideas discuassed in + * #openrisc. + * + * The word unroll implementation is an extension to the arm byte + * unrolled implementation, but using word copies (if things are + * properly aligned) + * + * The great arm loop unroll algorithm can be found at: + * arch/arm/boot/compressed/string.c + */ + +#include <linux/export.h> + +#include <linux/string.h> + +#ifdef CONFIG_OR1K_1200 +/* + * Do memcpy with word copies and loop unrolling. This gives the + * best performance on the OR1200 and MOR1KX archirectures + */ +void *memcpy(void *dest, __const void *src, __kernel_size_t n) +{ + int i = 0; + unsigned char *d, *s; + uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; + + /* If both source and dest are word aligned copy words */ + if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { + /* Copy 32 bytes per loop */ + for (i = n >> 5; i > 0; i--) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 4) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 3) { + *dest_w++ = *src_w++; + *dest_w++ = *src_w++; + } + + if (n & 1 << 2) + *dest_w++ = *src_w++; + + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + } else { + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + for (i = n >> 3; i > 0; i--) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + + if (n & 1 << 2) { + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + *d++ = *s++; + } + } + + if (n & 1 << 1) { + *d++ = *s++; + *d++ = *s++; + } + + if (n & 1) + *d++ = *s++; + + return dest; +} +#else +/* + * Use word copies but no loop unrolling as we cannot assume there + * will be benefits on the archirecture + */ +void *memcpy(void *dest, __const void *src, __kernel_size_t n) +{ + unsigned char *d = (unsigned char *)dest, *s = (unsigned char *)src; + uint32_t *dest_w = (uint32_t *)dest, *src_w = (uint32_t *)src; + + /* If both source and dest are word aligned copy words */ + if (!((unsigned int)dest_w & 3) && !((unsigned int)src_w & 3)) { + for (; n >= 4; n -= 4) + *dest_w++ = *src_w++; + } + + d = (unsigned char *)dest_w; + s = (unsigned char *)src_w; + + /* For remaining or if not aligned, copy bytes */ + for (; n >= 1; n -= 1) + *d++ = *s++; + + return dest; + +} +#endif + +EXPORT_SYMBOL(memcpy); diff --git a/arch/openrisc/lib/memset.S b/arch/openrisc/lib/memset.S new file mode 100644 index 0000000..92cc2ea --- /dev/null +++ b/arch/openrisc/lib/memset.S @@ -0,0 +1,98 @@ +/* + * OpenRISC memset.S + * + * Hand-optimized assembler version of memset for OpenRISC. + * Algorithm inspired by several other arch-specific memset routines + * in the kernel tree + * + * Copyright (C) 2015 Olof Kindgren <olof.kindgren@gmail.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + + .global memset + .type memset, @function +memset: + /* arguments: + * r3 = *s + * r4 = c + * r5 = n + * r13, r15, r17, r19 used as temp regs + */ + + /* Exit if n == 0 */ + l.sfeqi r5, 0 + l.bf 4f + + /* Truncate c to char */ + l.andi r13, r4, 0xff + + /* Skip word extension if c is 0 */ + l.sfeqi r13, 0 + l.bf 1f + /* Check for at least two whole words (8 bytes) */ + l.sfleui r5, 7 + + /* Extend char c to 32-bit word cccc in r13 */ + l.slli r15, r13, 16 // r13 = 000c, r15 = 0c00 + l.or r13, r13, r15 // r13 = 0c0c, r15 = 0c00 + l.slli r15, r13, 8 // r13 = 0c0c, r15 = c0c0 + l.or r13, r13, r15 // r13 = cccc, r15 = c0c0 + +1: l.addi r19, r3, 0 // Set r19 = src + /* Jump to byte copy loop if less than two words */ + l.bf 3f + l.or r17, r5, r0 // Set r17 = n + + /* Mask out two LSBs to check alignment */ + l.andi r15, r3, 0x3 + + /* lsb == 00, jump to word copy loop */ + l.sfeqi r15, 0 + l.bf 2f + l.addi r19, r3, 0 // Set r19 = src + + /* lsb == 01,10 or 11 */ + l.sb 0(r3), r13 // *src = c + l.addi r17, r17, -1 // Decrease n + + l.sfeqi r15, 3 + l.bf 2f + l.addi r19, r3, 1 // src += 1 + + /* lsb == 01 or 10 */ + l.sb 1(r3), r13 // *(src+1) = c + l.addi r17, r17, -1 // Decrease n + + l.sfeqi r15, 2 + l.bf 2f + l.addi r19, r3, 2 // src += 2 + + /* lsb == 01 */ + l.sb 2(r3), r13 // *(src+2) = c + l.addi r17, r17, -1 // Decrease n + l.addi r19, r3, 3 // src += 3 + + /* Word copy loop */ +2: l.sw 0(r19), r13 // *src = cccc + l.addi r17, r17, -4 // Decrease n + l.sfgeui r17, 4 + l.bf 2b + l.addi r19, r19, 4 // Increase src + + /* When n > 0, copy the remaining bytes, otherwise jump to exit */ + l.sfeqi r17, 0 + l.bf 4f + + /* Byte copy loop */ +3: l.addi r17, r17, -1 // Decrease n + l.sb 0(r19), r13 // *src = cccc + l.sfnei r17, 0 + l.bf 3b + l.addi r19, r19, 1 // Increase src + +4: l.jr r9 + l.ori r11, r3, 0 diff --git a/arch/openrisc/mm/ioremap.c b/arch/openrisc/mm/ioremap.c index 8705a46..2175e4b 100644 --- a/arch/openrisc/mm/ioremap.c +++ b/arch/openrisc/mm/ioremap.c @@ -80,6 +80,7 @@ __ioremap(phys_addr_t addr, unsigned long size, pgprot_t prot) return (void __iomem *)(offset + (char *)v); } +EXPORT_SYMBOL(__ioremap); void iounmap(void *addr) { @@ -106,6 +107,7 @@ void iounmap(void *addr) return vfree((void *)(PAGE_MASK & (unsigned long)addr)); } +EXPORT_SYMBOL(iounmap); /** * OK, this one's a bit tricky... ioremap can get called before memory is diff --git a/include/asm-generic/atomic.h b/include/asm-generic/atomic.h index 9ed8b98..3f38eb0 100644 --- a/include/asm-generic/atomic.h +++ b/include/asm-generic/atomic.h @@ -223,6 +223,7 @@ static inline void atomic_dec(atomic_t *v) #define atomic_xchg(ptr, v) (xchg(&(ptr)->counter, (v))) #define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new))) +#ifndef __atomic_add_unless static inline int __atomic_add_unless(atomic_t *v, int a, int u) { int c, old; @@ -231,5 +232,6 @@ static inline int __atomic_add_unless(atomic_t *v, int a, int u) c = old; return c; } +#endif #endif /* __ASM_GENERIC_ATOMIC_H */ diff --git a/scripts/checkstack.pl b/scripts/checkstack.pl index 694a075..3033be7 100755 --- a/scripts/checkstack.pl +++ b/scripts/checkstack.pl @@ -81,6 +81,9 @@ my (@stack, $re, $dre, $x, $xs, $funcre); } elsif ($arch eq 'nios2') { #25a8: defffb04 addi sp,sp,-20 $re = qr/.*addi.*sp,sp,-(([0-9]{2}|[3-9])[0-9]{2})/o; + } elsif ($arch eq 'openrisc') { + # c000043c: 9c 21 fe f0 l.addi r1,r1,-272 + $re = qr/.*l\.addi.*r1,r1,-(([0-9]{2}|[3-9])[0-9]{2})/o; } elsif ($arch eq 'parisc' || $arch eq 'parisc64') { $re = qr/.*ldo ($x{1,8})\(sp\),sp/o; } elsif ($arch eq 'ppc') { |