From 4ae73f2d53255c388d50bf83c1681112a6f9cba1 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 10:14:39 -0700 Subject: x86: use generic strncpy_from_user routine The generic strncpy_from_user() is not really optimal, since it is designed to work on both little-endian and big-endian. And on little-endian you can simplify much of the logic to find the first zero byte, since little-endian arithmetic doesn't have to worry about the carry bit propagating into earlier bytes (only later bytes, which we don't care about). But I have patches to make the generic routines use the architecture- specific infrastructure, so that we can regain the little-endian optimizations. But before we do that, switch over to the generic routines to make the patches each do just one well-defined thing. Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/uaccess.h | 1 + arch/x86/lib/usercopy.c | 97 ------------------------------------------ 3 files changed, 2 insertions(+), 97 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 81c3e8b..3220d44 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,6 +93,7 @@ config X86 select GENERIC_CLOCKEVENTS_BROADCAST if X86_64 || (X86_32 && X86_LOCAL_APIC) select GENERIC_TIME_VSYSCALL if X86_64 select KTIME_SCALAR if X86_32 + select GENERIC_STRNCPY_FROM_USER config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS || UPROBES) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 851fe0d..1354fac 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -32,6 +32,7 @@ #define segment_eq(a, b) ((a).seg == (b).seg) +#define user_addr_max() (current_thread_info()->addr_limit.seg) #define __addr_ok(addr) \ ((unsigned long __force)(addr) < \ (current_thread_info()->addr_limit.seg)) diff --git a/arch/x86/lib/usercopy.c b/arch/x86/lib/usercopy.c index 2e4e4b0..f61ee67 100644 --- a/arch/x86/lib/usercopy.c +++ b/arch/x86/lib/usercopy.c @@ -43,100 +43,3 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } EXPORT_SYMBOL_GPL(copy_from_user_nmi); - -/* - * Do a strncpy, return length of string without final '\0'. - * 'count' is the user-supplied count (return 'count' if we - * hit it), 'max' is the address space maximum (and we return - * -EFAULT if we hit it). - */ -static inline long do_strncpy_from_user(char *dst, const char __user *src, long count, unsigned long max) -{ - long res = 0; - - /* - * Truncate 'max' to the user-specified limit, so that - * we only have one limit we need to check in the loop - */ - if (max > count) - max = count; - - while (max >= sizeof(unsigned long)) { - unsigned long c, mask; - - /* Fall back to byte-at-a-time if we get a page fault */ - if (unlikely(__get_user(c,(unsigned long __user *)(src+res)))) - break; - mask = has_zero(c); - if (mask) { - mask = (mask - 1) & ~mask; - mask >>= 7; - *(unsigned long *)(dst+res) = c & mask; - return res + count_masked_bytes(mask); - } - *(unsigned long *)(dst+res) = c; - res += sizeof(unsigned long); - max -= sizeof(unsigned long); - } - - while (max) { - char c; - - if (unlikely(__get_user(c,src+res))) - return -EFAULT; - dst[res] = c; - if (!c) - return res; - res++; - max--; - } - - /* - * Uhhuh. We hit 'max'. But was that the user-specified maximum - * too? If so, that's ok - we got as much as the user asked for. - */ - if (res >= count) - return res; - - /* - * Nope: we hit the address space limit, and we still had more - * characters the caller would have wanted. That's an EFAULT. - */ - return -EFAULT; -} - -/** - * strncpy_from_user: - Copy a NUL terminated string from userspace. - * @dst: Destination address, in kernel space. This buffer must be at - * least @count bytes long. - * @src: Source address, in user space. - * @count: Maximum number of bytes to copy, including the trailing NUL. - * - * Copies a NUL-terminated string from userspace to kernel space. - * - * On success, returns the length of the string (not including the trailing - * NUL). - * - * If access to userspace fails, returns -EFAULT (some data may have been - * copied). - * - * If @count is smaller than the length of the string, copies @count bytes - * and returns @count. - */ -long -strncpy_from_user(char *dst, const char __user *src, long count) -{ - unsigned long max_addr, src_addr; - - if (unlikely(count <= 0)) - return 0; - - max_addr = current_thread_info()->addr_limit.seg; - src_addr = (unsigned long)src; - if (likely(src_addr < max_addr)) { - unsigned long max = max_addr - src_addr; - return do_strncpy_from_user(dst, src, count, max); - } - return -EFAULT; -} -EXPORT_SYMBOL(strncpy_from_user); -- cgit v1.1 From 36126f8f2ed8168eb13aa0662b9b9585cba100a9 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 10:43:17 -0700 Subject: word-at-a-time: make the interfaces truly generic This changes the interfaces in to be a bit more complicated, but a lot more generic. In particular, it allows us to really do the operations efficiently on both little-endian and big-endian machines, pretty much regardless of machine details. For example, if you can rely on a fast population count instruction on your architecture, this will allow you to make your optimized file with that. NOTE! The "generic" version in include/asm-generic/word-at-a-time.h is not truly generic, it actually only works on big-endian. Why? Because on little-endian the generic algorithms are wasteful, since you can inevitably do better. The x86 implementation is an example of that. (The only truly non-generic part of the asm-generic implementation is the "find_zero()" function, and you could make a little-endian version of it. And if the Kbuild infrastructure allowed us to pick a particular header file, that would be lovely) The functions are as follows: - WORD_AT_A_TIME_CONSTANTS: specific constants that the algorithm uses. - has_zero(): take a word, and determine if it has a zero byte in it. It gets the word, the pointer to the constant pool, and a pointer to an intermediate "data" field it can set. This is the "quick-and-dirty" zero tester: it's what is run inside the hot loops. - "prep_zero_mask()": take the word, the data that has_zero() produced, and the constant pool, and generate an *exact* mask of which byte had the first zero. This is run directly *outside* the loop, and allows the "has_zero()" function to answer the "is there a zero byte" question without necessarily getting exactly *which* byte is the first one to contain a zero. If you do multiple byte lookups concurrently (eg "hash_name()", which looks for both NUL and '/' bytes), after you've done the prep_zero_mask() phase, the result of those can be or'ed together to get the "either or" case. - The result from "prep_zero_mask()" can then be fed into "find_zero()" (to find the byte offset of the first byte that was zero) or into "zero_bytemask()" (to find the bytemask of the bytes preceding the zero byte). The existence of zero_bytemask() is optional, and is not necessary for the normal string routines. But dentry name hashing needs it, so if you enable DENTRY_WORD_AT_A_TIME you need to expose it. This changes the generic strncpy_from_user() function and the dentry hashing functions to use these modified word-at-a-time interfaces. This gets us back to the optimized state of the x86 strncpy that we lost in the previous commit when moving over to the generic version. Signed-off-by: Linus Torvalds --- arch/x86/include/asm/word-at-a-time.h | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/word-at-a-time.h b/arch/x86/include/asm/word-at-a-time.h index ae03fac..5b238981 100644 --- a/arch/x86/include/asm/word-at-a-time.h +++ b/arch/x86/include/asm/word-at-a-time.h @@ -10,6 +10,11 @@ * bit count instruction, that might be better than the multiply * and shift, for example. */ +struct word_at_a_time { + const unsigned long one_bits, high_bits; +}; + +#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } #ifdef CONFIG_64BIT @@ -37,10 +42,31 @@ static inline long count_masked_bytes(long mask) #endif -/* Return the high bit set in the first byte that is a zero */ -static inline unsigned long has_zero(unsigned long a) +/* Return nonzero if it has a zero */ +static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) +{ + unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; + *bits = mask; + return mask; +} + +static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) +{ + return bits; +} + +static inline unsigned long create_zero_mask(unsigned long bits) +{ + bits = (bits - 1) & ~bits; + return bits >> 7; +} + +/* The mask we created is directly usable as a bytemask */ +#define zero_bytemask(mask) (mask) + +static inline unsigned long find_zero(unsigned long mask) { - return ((a - REPEAT_BYTE(0x01)) & ~a) & REPEAT_BYTE(0x80); + return count_masked_bytes(mask); } /* -- cgit v1.1 From 5723aa993d83803157c22327e90cd59e3dcbe879 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 26 May 2012 11:09:53 -0700 Subject: x86: use the new generic strnlen_user() function This throws away the old x86-specific functions in favor of the generic optimized version. Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/uaccess.h | 3 +++ arch/x86/include/asm/uaccess_32.h | 17 -------------- arch/x86/include/asm/uaccess_64.h | 3 --- arch/x86/lib/usercopy_32.c | 41 --------------------------------- arch/x86/lib/usercopy_64.c | 48 --------------------------------------- 6 files changed, 4 insertions(+), 109 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 3220d44..d700811 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -94,6 +94,7 @@ config X86 select GENERIC_TIME_VSYSCALL if X86_64 select KTIME_SCALAR if X86_32 select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER config INSTRUCTION_DECODER def_bool (KPROBES || PERF_EVENTS || UPROBES) diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h index 1354fac..04cd688 100644 --- a/arch/x86/include/asm/uaccess.h +++ b/arch/x86/include/asm/uaccess.h @@ -566,6 +566,9 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n); extern __must_check long strncpy_from_user(char *dst, const char __user *src, long count); +extern __must_check long strlen_user(const char __user *str); +extern __must_check long strnlen_user(const char __user *str, long n); + /* * movsl can be slow when source and dest are not both 8-byte aligned */ diff --git a/arch/x86/include/asm/uaccess_32.h b/arch/x86/include/asm/uaccess_32.h index 8084bc73..576e39b 100644 --- a/arch/x86/include/asm/uaccess_32.h +++ b/arch/x86/include/asm/uaccess_32.h @@ -213,23 +213,6 @@ static inline unsigned long __must_check copy_from_user(void *to, return n; } -/** - * strlen_user: - Get the size of a string in user space. - * @str: The string to measure. - * - * Context: User context only. This function may sleep. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -#define strlen_user(str) strnlen_user(str, LONG_MAX) - -long strnlen_user(const char __user *str, long n); unsigned long __must_check clear_user(void __user *mem, unsigned long len); unsigned long __must_check __clear_user(void __user *mem, unsigned long len); diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index fcd4b6f..8e796fb 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -208,9 +208,6 @@ int __copy_in_user(void __user *dst, const void __user *src, unsigned size) } } -__must_check long strnlen_user(const char __user *str, long n); -__must_check long __strnlen_user(const char __user *str, long n); -__must_check long strlen_user(const char __user *str); __must_check unsigned long clear_user(void __user *mem, unsigned long len); __must_check unsigned long __clear_user(void __user *mem, unsigned long len); diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 883b216..1781b2f 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -95,47 +95,6 @@ __clear_user(void __user *to, unsigned long n) } EXPORT_SYMBOL(__clear_user); -/** - * strnlen_user: - Get the size of a string in user space. - * @s: The string to measure. - * @n: The maximum valid length - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * If the string is too long, returns a value greater than @n. - */ -long strnlen_user(const char __user *s, long n) -{ - unsigned long mask = -__addr_ok(s); - unsigned long res, tmp; - - might_fault(); - - __asm__ __volatile__( - " testl %0, %0\n" - " jz 3f\n" - " andl %0,%%ecx\n" - "0: repne; scasb\n" - " setne %%al\n" - " subl %%ecx,%0\n" - " addl %0,%%eax\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: xorl %%eax,%%eax\n" - " jmp 1b\n" - "3: movb $1,%%al\n" - " jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(0b,2b) - :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp) - :"0" (n), "1" (s), "2" (0), "3" (mask) - :"cc"); - return res & mask; -} -EXPORT_SYMBOL(strnlen_user); - #ifdef CONFIG_X86_INTEL_USERCOPY static unsigned long __copy_user_intel(void __user *to, const void *from, unsigned long size) diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 0d0326f..e5b130b 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -52,54 +52,6 @@ unsigned long clear_user(void __user *to, unsigned long n) } EXPORT_SYMBOL(clear_user); -/* - * Return the size of a string (including the ending 0) - * - * Return 0 on exception, a value greater than N if too long - */ - -long __strnlen_user(const char __user *s, long n) -{ - long res = 0; - char c; - - while (1) { - if (res>n) - return n+1; - if (__get_user(c, s)) - return 0; - if (!c) - return res+1; - res++; - s++; - } -} -EXPORT_SYMBOL(__strnlen_user); - -long strnlen_user(const char __user *s, long n) -{ - if (!access_ok(VERIFY_READ, s, 1)) - return 0; - return __strnlen_user(s, n); -} -EXPORT_SYMBOL(strnlen_user); - -long strlen_user(const char __user *s) -{ - long res = 0; - char c; - - for (;;) { - if (get_user(c, s)) - return 0; - if (!c) - return res+1; - res++; - s++; - } -} -EXPORT_SYMBOL(strlen_user); - unsigned long copy_in_user(void __user *to, const void __user *from, unsigned len) { if (access_ok(VERIFY_WRITE, to, len) && access_ok(VERIFY_READ, from, len)) { -- cgit v1.1