summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/kernel/entry-armv.S213
-rw-r--r--arch/arm/kernel/traps.c58
-rw-r--r--arch/arm/mm/Kconfig14
-rw-r--r--include/asm-arm/unistd.h3
4 files changed, 277 insertions, 11 deletions
diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S
index 2a5c3fe..080df90 100644
--- a/arch/arm/kernel/entry-armv.S
+++ b/arch/arm/kernel/entry-armv.S
@@ -269,6 +269,12 @@ __pabt_svc:
add r5, sp, #S_PC
ldmia r7, {r2 - r4} @ Get USR pc, cpsr
+#if __LINUX_ARM_ARCH__ < 6
+ @ make sure our user space atomic helper is aborted
+ cmp r2, #VIRT_OFFSET
+ bichs r3, r3, #PSR_Z_BIT
+#endif
+
@
@ We are now ready to fill in the remaining blanks on the stack:
@
@@ -499,8 +505,12 @@ ENTRY(__switch_to)
mra r4, r5, acc0
stmia ip, {r4, r5}
#endif
+#ifdef CONFIG_HAS_TLS_REG
+ mcr p15, 0, r3, c13, c0, 3 @ set TLS register
+#else
mov r4, #0xffff0fff
- str r3, [r4, #-3] @ Set TLS ptr
+ str r3, [r4, #-15] @ TLS val at 0xffff0ff0
+#endif
mcr p15, 0, r6, c3, c0, 0 @ Set domain register
#ifdef CONFIG_VFP
@ Always disable VFP so we can lazily save/restore the old
@@ -519,6 +529,207 @@ ENTRY(__switch_to)
ldmib r2, {r4 - sl, fp, sp, pc} @ Load all regs saved previously
__INIT
+
+/*
+ * User helpers.
+ *
+ * These are segment of kernel provided user code reachable from user space
+ * at a fixed address in kernel memory. This is used to provide user space
+ * with some operations which require kernel help because of unimplemented
+ * native feature and/or instructions in many ARM CPUs. The idea is for
+ * this code to be executed directly in user mode for best efficiency but
+ * which is too intimate with the kernel counter part to be left to user
+ * libraries. In fact this code might even differ from one CPU to another
+ * depending on the available instruction set and restrictions like on
+ * SMP systems. In other words, the kernel reserves the right to change
+ * this code as needed without warning. Only the entry points and their
+ * results are guaranteed to be stable.
+ *
+ * Each segment is 32-byte aligned and will be moved to the top of the high
+ * vector page. New segments (if ever needed) must be added in front of
+ * existing ones. This mechanism should be used only for things that are
+ * really small and justified, and not be abused freely.
+ *
+ * User space is expected to implement those things inline when optimizing
+ * for a processor that has the necessary native support, but only if such
+ * resulting binaries are already to be incompatible with earlier ARM
+ * processors due to the use of unsupported instructions other than what
+ * is provided here. In other words don't make binaries unable to run on
+ * earlier processors just for the sake of not using these kernel helpers
+ * if your compiled code is not going to use the new instructions for other
+ * purpose.
+ */
+
+ .align 5
+ .globl __kuser_helper_start
+__kuser_helper_start:
+
+/*
+ * Reference prototype:
+ *
+ * int __kernel_cmpxchg(int oldval, int newval, int *ptr)
+ *
+ * Input:
+ *
+ * r0 = oldval
+ * r1 = newval
+ * r2 = ptr
+ * lr = return address
+ *
+ * Output:
+ *
+ * r0 = returned value (zero or non-zero)
+ * C flag = set if r0 == 0, clear if r0 != 0
+ *
+ * Clobbered:
+ *
+ * r3, ip, flags
+ *
+ * Definition and user space usage example:
+ *
+ * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr);
+ * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0)
+ *
+ * Atomically store newval in *ptr if *ptr is equal to oldval for user space.
+ * Return zero if *ptr was changed or non-zero if no exchange happened.
+ * The C flag is also set if *ptr was changed to allow for assembly
+ * optimization in the calling code.
+ *
+ * For example, a user space atomic_add implementation could look like this:
+ *
+ * #define atomic_add(ptr, val) \
+ * ({ register unsigned int *__ptr asm("r2") = (ptr); \
+ * register unsigned int __result asm("r1"); \
+ * asm volatile ( \
+ * "1: @ atomic_add\n\t" \
+ * "ldr r0, [r2]\n\t" \
+ * "mov r3, #0xffff0fff\n\t" \
+ * "add lr, pc, #4\n\t" \
+ * "add r1, r0, %2\n\t" \
+ * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \
+ * "bcc 1b" \
+ * : "=&r" (__result) \
+ * : "r" (__ptr), "rIL" (val) \
+ * : "r0","r3","ip","lr","cc","memory" ); \
+ * __result; })
+ */
+
+__kuser_cmpxchg: @ 0xffff0fc0
+
+#if __LINUX_ARM_ARCH__ < 6
+
+#ifdef CONFIG_SMP /* sanity check */
+#error "CONFIG_SMP on a machine supporting pre-ARMv6 processors?"
+#endif
+
+ /*
+ * Theory of operation:
+ *
+ * We set the Z flag before loading oldval. If ever an exception
+ * occurs we can not be sure the loaded value will still be the same
+ * when the exception returns, therefore the user exception handler
+ * will clear the Z flag whenever the interrupted user code was
+ * actually from the kernel address space (see the usr_entry macro).
+ *
+ * The post-increment on the str is used to prevent a race with an
+ * exception happening just after the str instruction which would
+ * clear the Z flag although the exchange was done.
+ */
+ teq ip, ip @ set Z flag
+ ldr ip, [r2] @ load current val
+ add r3, r2, #1 @ prepare store ptr
+ teqeq ip, r0 @ compare with oldval if still allowed
+ streq r1, [r3, #-1]! @ store newval if still allowed
+ subs r0, r2, r3 @ if r2 == r3 the str occured
+ mov pc, lr
+
+#else
+
+ ldrex r3, [r2]
+ subs r3, r3, r0
+ strexeq r3, r1, [r2]
+ rsbs r0, r3, #0
+ mov pc, lr
+
+#endif
+
+ .align 5
+
+/*
+ * Reference prototype:
+ *
+ * int __kernel_get_tls(void)
+ *
+ * Input:
+ *
+ * lr = return address
+ *
+ * Output:
+ *
+ * r0 = TLS value
+ *
+ * Clobbered:
+ *
+ * the Z flag might be lost
+ *
+ * Definition and user space usage example:
+ *
+ * typedef int (__kernel_get_tls_t)(void);
+ * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0)
+ *
+ * Get the TLS value as previously set via the __ARM_NR_set_tls syscall.
+ *
+ * This could be used as follows:
+ *
+ * #define __kernel_get_tls() \
+ * ({ register unsigned int __val asm("r0"); \
+ * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \
+ * : "=r" (__val) : : "lr","cc" ); \
+ * __val; })
+ */
+
+__kuser_get_tls: @ 0xffff0fe0
+
+#ifndef CONFIG_HAS_TLS_REG
+
+#ifdef CONFIG_SMP /* sanity check */
+#error "CONFIG_SMP without CONFIG_HAS_TLS_REG is wrong"
+#endif
+
+ ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0
+ mov pc, lr
+
+#else
+
+ mrc p15, 0, r0, c13, c0, 3 @ read TLS register
+ mov pc, lr
+
+#endif
+
+ .rep 5
+ .word 0 @ pad up to __kuser_helper_version
+ .endr
+
+/*
+ * Reference declaration:
+ *
+ * extern unsigned int __kernel_helper_version;
+ *
+ * Definition and user space usage example:
+ *
+ * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc)
+ *
+ * User space may read this to determine the curent number of helpers
+ * available.
+ */
+
+__kuser_helper_version: @ 0xffff0ffc
+ .word ((__kuser_helper_end - __kuser_helper_start) >> 5)
+
+ .globl __kuser_helper_end
+__kuser_helper_end:
+
+
/*
* Vector stubs.
*
diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c
index 0078aeb..3a001fe 100644
--- a/arch/arm/kernel/traps.c
+++ b/arch/arm/kernel/traps.c
@@ -450,13 +450,17 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
case NR(set_tls):
thread->tp_value = regs->ARM_r0;
+#ifdef CONFIG_HAS_TLS_REG
+ asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) );
+#else
/*
- * Our user accessible TLS ptr is located at 0xffff0ffc.
- * On SMP read access to this address must raise a fault
- * and be emulated from the data abort handler.
- * m
+ * User space must never try to access this directly.
+ * Expect your app to break eventually if you do so.
+ * The user helper at 0xffff0fe0 must be used instead.
+ * (see entry-armv.S for details)
*/
- *((unsigned long *)0xffff0ffc) = thread->tp_value;
+ *((unsigned int *)0xffff0ff0) = regs->ARM_r0;
+#endif
return 0;
default:
@@ -493,6 +497,41 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
return 0;
}
+#if defined(CONFIG_CPU_32v6) && !defined(CONFIG_HAS_TLS_REG)
+
+/*
+ * We might be running on an ARMv6+ processor which should have the TLS
+ * register, but for some reason we can't use it and have to emulate it.
+ */
+
+static int get_tp_trap(struct pt_regs *regs, unsigned int instr)
+{
+ int reg = (instr >> 12) & 15;
+ if (reg == 15)
+ return 1;
+ regs->uregs[reg] = current_thread_info()->tp_value;
+ regs->ARM_pc += 4;
+ return 0;
+}
+
+static struct undef_hook arm_mrc_hook = {
+ .instr_mask = 0x0fff0fff,
+ .instr_val = 0x0e1d0f70,
+ .cpsr_mask = PSR_T_BIT,
+ .cpsr_val = 0,
+ .fn = get_tp_trap,
+};
+
+static int __init arm_mrc_hook_init(void)
+{
+ register_undef_hook(&arm_mrc_hook);
+ return 0;
+}
+
+late_initcall(arm_mrc_hook_init);
+
+#endif
+
void __bad_xchg(volatile void *ptr, int size)
{
printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n",
@@ -580,14 +619,17 @@ void __init trap_init(void)
{
extern char __stubs_start[], __stubs_end[];
extern char __vectors_start[], __vectors_end[];
+ extern char __kuser_helper_start[], __kuser_helper_end[];
+ int kuser_sz = __kuser_helper_end - __kuser_helper_start;
/*
- * Copy the vectors and stubs (in entry-armv.S) into the
- * vector page, mapped at 0xffff0000, and ensure these are
- * visible to the instruction stream.
+ * Copy the vectors, stubs and kuser helpers (in entry-armv.S)
+ * into the vector page, mapped at 0xffff0000, and ensure these
+ * are visible to the instruction stream.
*/
memcpy((void *)0xffff0000, __vectors_start, __vectors_end - __vectors_start);
memcpy((void *)0xffff0200, __stubs_start, __stubs_end - __stubs_start);
+ memcpy((void *)0xffff1000 - kuser_sz, __kuser_helper_start, kuser_sz);
flush_icache_range(0xffff0000, 0xffff0000 + PAGE_SIZE);
modify_domain(DOMAIN_USER, DOMAIN_CLIENT);
}
diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig
index 5b670c9..007766a 100644
--- a/arch/arm/mm/Kconfig
+++ b/arch/arm/mm/Kconfig
@@ -409,3 +409,17 @@ config CPU_BPREDICT_DISABLE
depends on CPU_ARM1020
help
Say Y here to disable branch prediction. If unsure, say N.
+
+config HAS_TLS_REG
+ bool
+ depends on CPU_32v6 && !CPU_32v5 && !CPU_32v4 && !CPU_32v3
+ help
+ This selects support for the CP15 thread register.
+ It is defined to be available on ARMv6 or later. However
+ if the kernel is configured to support multiple CPUs including
+ a pre-ARMv6 processors, or if a given ARMv6 processor doesn't
+ implement the thread register for some reason, then access to
+ this register from user space must be trapped and emulated.
+ If user space is relying on the __kuser_get_tls code then
+ there should not be any impact.
+
diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h
index a19ec09..ace2748 100644
--- a/include/asm-arm/unistd.h
+++ b/include/asm-arm/unistd.h
@@ -359,8 +359,7 @@
#define __ARM_NR_cacheflush (__ARM_NR_BASE+2)
#define __ARM_NR_usr26 (__ARM_NR_BASE+3)
#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
-
-#define __ARM_NR_set_tls (__ARM_NR_BASE+0x800)
+#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
#define __sys2(x) #x
#define __sys1(x) __sys2(x)
OpenPOWER on IntegriCloud