diff options
40 files changed, 2648 insertions, 232 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index d9a94da..df884a5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -294,11 +294,6 @@ config X86_32_LAZY_GS def_bool y depends on X86_32 && !CC_STACKPROTECTOR -config ARCH_HWEIGHT_CFLAGS - string - default "-fcall-saved-ecx -fcall-saved-edx" if X86_32 - default "-fcall-saved-rdi -fcall-saved-rsi -fcall-saved-rdx -fcall-saved-rcx -fcall-saved-r8 -fcall-saved-r9 -fcall-saved-r10 -fcall-saved-r11" if X86_64 - config ARCH_SUPPORTS_UPROBES def_bool y diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 878e4b9..0d41d68 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -16,14 +16,16 @@ #define BOOT_BITOPS_H #define _LINUX_BITOPS_H /* Inhibit inclusion of <linux/bitops.h> */ -static inline int constant_test_bit(int nr, const void *addr) +#include <linux/types.h> + +static inline bool constant_test_bit(int nr, const void *addr) { const u32 *p = (const u32 *)addr; return ((1UL << (nr & 31)) & (p[nr >> 5])) != 0; } -static inline int variable_test_bit(int nr, const void *addr) +static inline bool variable_test_bit(int nr, const void *addr) { - u8 v; + bool v; const u32 *p = (const u32 *)addr; asm("btl %2,%1; setc %0" : "=qm" (v) : "m" (*p), "Ir" (nr)); diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 9011a88..7c1495f 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -24,6 +24,7 @@ #include <linux/types.h> #include <linux/edd.h> #include <asm/setup.h> +#include <asm/asm.h> #include "bitops.h" #include "ctype.h" #include "cpuflags.h" @@ -176,18 +177,18 @@ static inline void wrgs32(u32 v, addr_t addr) } /* Note: these only return true/false, not a signed return value! */ -static inline int memcmp_fs(const void *s1, addr_t s2, size_t len) +static inline bool memcmp_fs(const void *s1, addr_t s2, size_t len) { - u8 diff; - asm volatile("fs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + bool diff; + asm volatile("fs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } -static inline int memcmp_gs(const void *s1, addr_t s2, size_t len) +static inline bool memcmp_gs(const void *s1, addr_t s2, size_t len) { - u8 diff; - asm volatile("gs; repe; cmpsb; setnz %0" - : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); + bool diff; + asm volatile("gs; repe; cmpsb" CC_SET(nz) + : CC_OUT(nz) (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; } diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 318b846..cc3bd58 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -17,7 +17,7 @@ int memcmp(const void *s1, const void *s2, size_t len) { - u8 diff; + bool diff; asm("repe; cmpsb; setnz %0" : "=qm" (diff), "+D" (s1), "+S" (s2), "+c" (len)); return diff; diff --git a/arch/x86/entry/thunk_64.S b/arch/x86/entry/thunk_64.S index 027aec4..627ecbc 100644 --- a/arch/x86/entry/thunk_64.S +++ b/arch/x86/entry/thunk_64.S @@ -33,7 +33,7 @@ .endif call \func - jmp restore + jmp .L_restore _ASM_NOKPROBE(\name) .endm @@ -54,7 +54,7 @@ #if defined(CONFIG_TRACE_IRQFLAGS) \ || defined(CONFIG_DEBUG_LOCK_ALLOC) \ || defined(CONFIG_PREEMPT) -restore: +.L_restore: popq %r11 popq %r10 popq %r9 @@ -66,5 +66,5 @@ restore: popq %rdi popq %rbp ret - _ASM_NOKPROBE(restore) + _ASM_NOKPROBE(.L_restore) #endif diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index 253b72e..68b63fd 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -134,7 +134,7 @@ VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-m,elf_i386 -Wl,-soname=linux-gate.so.1 override obj-dirs = $(dir $(obj)) $(obj)/vdso32/ targets += vdso32/vdso32.lds -targets += vdso32/note.o vdso32/vclock_gettime.o vdso32/system_call.o +targets += vdso32/note.o vdso32/system_call.o vdso32/sigreturn.o targets += vdso32/vclock_gettime.o KBUILD_AFLAGS_32 := $(filter-out -m64,$(KBUILD_AFLAGS)) -DBUILD_VDSO @@ -156,7 +156,8 @@ $(obj)/vdso32.so.dbg: FORCE \ $(obj)/vdso32/vdso32.lds \ $(obj)/vdso32/vclock_gettime.o \ $(obj)/vdso32/note.o \ - $(obj)/vdso32/system_call.o + $(obj)/vdso32/system_call.o \ + $(obj)/vdso32/sigreturn.o $(call if_changed,vdso) # diff --git a/arch/x86/entry/vdso/vdso32/sigreturn.S b/arch/x86/entry/vdso/vdso32/sigreturn.S index d7ec4e2..20633e0 100644 --- a/arch/x86/entry/vdso/vdso32/sigreturn.S +++ b/arch/x86/entry/vdso/vdso32/sigreturn.S @@ -1,11 +1,3 @@ -/* - * Common code for the sigreturn entry points in vDSO images. - * So far this code is the same for both int80 and sysenter versions. - * This file is #include'd by int80.S et al to define them first thing. - * The kernel assumes that the addresses of these routines are constant - * for all vDSO implementations. - */ - #include <linux/linkage.h> #include <asm/unistd_32.h> #include <asm/asm-offsets.h> diff --git a/arch/x86/entry/vdso/vdso32/system_call.S b/arch/x86/entry/vdso/vdso32/system_call.S index 0109ac6..ed4bc97 100644 --- a/arch/x86/entry/vdso/vdso32/system_call.S +++ b/arch/x86/entry/vdso/vdso32/system_call.S @@ -2,16 +2,11 @@ * AT_SYSINFO entry point */ +#include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/cpufeatures.h> #include <asm/alternative-asm.h> -/* - * First get the common code for the sigreturn entry points. - * This must come first. - */ -#include "sigreturn.S" - .text .globl __kernel_vsyscall .type __kernel_vsyscall,@function diff --git a/arch/x86/include/asm/apm.h b/arch/x86/include/asm/apm.h index 20370c6..93eebc63 100644 --- a/arch/x86/include/asm/apm.h +++ b/arch/x86/include/asm/apm.h @@ -45,11 +45,11 @@ static inline void apm_bios_call_asm(u32 func, u32 ebx_in, u32 ecx_in, : "memory", "cc"); } -static inline u8 apm_bios_call_simple_asm(u32 func, u32 ebx_in, - u32 ecx_in, u32 *eax) +static inline bool apm_bios_call_simple_asm(u32 func, u32 ebx_in, + u32 ecx_in, u32 *eax) { int cx, dx, si; - u8 error; + bool error; /* * N.B. We do NOT need a cld after the BIOS call diff --git a/arch/x86/include/asm/arch_hweight.h b/arch/x86/include/asm/arch_hweight.h index 02e799f..e7cd631 100644 --- a/arch/x86/include/asm/arch_hweight.h +++ b/arch/x86/include/asm/arch_hweight.h @@ -4,8 +4,8 @@ #include <asm/cpufeatures.h> #ifdef CONFIG_64BIT -/* popcnt %edi, %eax -- redundant REX prefix for alignment */ -#define POPCNT32 ".byte 0xf3,0x40,0x0f,0xb8,0xc7" +/* popcnt %edi, %eax */ +#define POPCNT32 ".byte 0xf3,0x0f,0xb8,0xc7" /* popcnt %rdi, %rax */ #define POPCNT64 ".byte 0xf3,0x48,0x0f,0xb8,0xc7" #define REG_IN "D" @@ -17,19 +17,15 @@ #define REG_OUT "a" #endif -/* - * __sw_hweightXX are called from within the alternatives below - * and callee-clobbered registers need to be taken care of. See - * ARCH_HWEIGHT_CFLAGS in <arch/x86/Kconfig> for the respective - * compiler switches. - */ +#define __HAVE_ARCH_SW_HWEIGHT + static __always_inline unsigned int __arch_hweight32(unsigned int w) { - unsigned int res = 0; + unsigned int res; asm (ALTERNATIVE("call __sw_hweight32", POPCNT32, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } @@ -53,11 +49,11 @@ static inline unsigned long __arch_hweight64(__u64 w) #else static __always_inline unsigned long __arch_hweight64(__u64 w) { - unsigned long res = 0; + unsigned long res; asm (ALTERNATIVE("call __sw_hweight64", POPCNT64, X86_FEATURE_POPCNT) - : "="REG_OUT (res) - : REG_IN (w)); + : "="REG_OUT (res) + : REG_IN (w)); return res; } diff --git a/arch/x86/include/asm/archrandom.h b/arch/x86/include/asm/archrandom.h index 69f1366..5b0579a 100644 --- a/arch/x86/include/asm/archrandom.h +++ b/arch/x86/include/asm/archrandom.h @@ -25,8 +25,6 @@ #include <asm/processor.h> #include <asm/cpufeature.h> -#include <asm/alternative.h> -#include <asm/nops.h> #define RDRAND_RETRY_LOOPS 10 @@ -40,97 +38,91 @@ # define RDSEED_LONG RDSEED_INT #endif -#ifdef CONFIG_ARCH_RANDOM +/* Unconditional execution of RDRAND and RDSEED */ -/* Instead of arch_get_random_long() when alternatives haven't run. */ -static inline int rdrand_long(unsigned long *v) +static inline bool rdrand_long(unsigned long *v) { - int ok; - asm volatile("1: " RDRAND_LONG "\n\t" - "jc 2f\n\t" - "decl %0\n\t" - "jnz 1b\n\t" - "2:" - : "=r" (ok), "=a" (*v) - : "0" (RDRAND_RETRY_LOOPS)); - return ok; + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile(RDRAND_LONG "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + if (ok) + return true; + } while (--retry); + return false; +} + +static inline bool rdrand_int(unsigned int *v) +{ + bool ok; + unsigned int retry = RDRAND_RETRY_LOOPS; + do { + asm volatile(RDRAND_INT "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + if (ok) + return true; + } while (--retry); + return false; } -/* A single attempt at RDSEED */ static inline bool rdseed_long(unsigned long *v) { - unsigned char ok; + bool ok; asm volatile(RDSEED_LONG "\n\t" - "setc %0" - : "=qm" (ok), "=a" (*v)); + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); return ok; } -#define GET_RANDOM(name, type, rdrand, nop) \ -static inline int name(type *v) \ -{ \ - int ok; \ - alternative_io("movl $0, %0\n\t" \ - nop, \ - "\n1: " rdrand "\n\t" \ - "jc 2f\n\t" \ - "decl %0\n\t" \ - "jnz 1b\n\t" \ - "2:", \ - X86_FEATURE_RDRAND, \ - ASM_OUTPUT2("=r" (ok), "=a" (*v)), \ - "0" (RDRAND_RETRY_LOOPS)); \ - return ok; \ -} - -#define GET_SEED(name, type, rdseed, nop) \ -static inline int name(type *v) \ -{ \ - unsigned char ok; \ - alternative_io("movb $0, %0\n\t" \ - nop, \ - rdseed "\n\t" \ - "setc %0", \ - X86_FEATURE_RDSEED, \ - ASM_OUTPUT2("=q" (ok), "=a" (*v))); \ - return ok; \ +static inline bool rdseed_int(unsigned int *v) +{ + bool ok; + asm volatile(RDSEED_INT "\n\t" + CC_SET(c) + : CC_OUT(c) (ok), "=a" (*v)); + return ok; } -#ifdef CONFIG_X86_64 - -GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP5); -GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP4); - -GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP5); -GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); - -#else - -GET_RANDOM(arch_get_random_long, unsigned long, RDRAND_LONG, ASM_NOP3); -GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3); - -GET_SEED(arch_get_random_seed_long, unsigned long, RDSEED_LONG, ASM_NOP4); -GET_SEED(arch_get_random_seed_int, unsigned int, RDSEED_INT, ASM_NOP4); - -#endif /* CONFIG_X86_64 */ - +/* Conditional execution based on CPU type */ #define arch_has_random() static_cpu_has(X86_FEATURE_RDRAND) #define arch_has_random_seed() static_cpu_has(X86_FEATURE_RDSEED) -#else +/* + * These are the generic interfaces; they must not be declared if the + * stubs in <linux/random.h> are to be invoked, + * i.e. CONFIG_ARCH_RANDOM is not defined. + */ +#ifdef CONFIG_ARCH_RANDOM -static inline int rdrand_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { - return 0; + return arch_has_random() ? rdrand_long(v) : false; } -static inline bool rdseed_long(unsigned long *v) +static inline bool arch_get_random_int(unsigned int *v) { - return 0; + return arch_has_random() ? rdrand_int(v) : false; } -#endif /* CONFIG_ARCH_RANDOM */ +static inline bool arch_get_random_seed_long(unsigned long *v) +{ + return arch_has_random_seed() ? rdseed_long(v) : false; +} + +static inline bool arch_get_random_seed_int(unsigned int *v) +{ + return arch_has_random_seed() ? rdseed_int(v) : false; +} extern void x86_init_rdrand(struct cpuinfo_x86 *c); +#else /* !CONFIG_ARCH_RANDOM */ + +static inline void x86_init_rdrand(struct cpuinfo_x86 *c) { } + +#endif /* !CONFIG_ARCH_RANDOM */ + #endif /* ASM_X86_ARCHRANDOM_H */ diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index f5063b6..7acb51c 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -42,6 +42,18 @@ #define _ASM_SI __ASM_REG(si) #define _ASM_DI __ASM_REG(di) +/* + * Macros to generate condition code outputs from inline assembly, + * The output operand must be type "bool". + */ +#ifdef __GCC_ASM_FLAG_OUTPUTS__ +# define CC_SET(c) "\n\t/* output condition code " #c "*/\n" +# define CC_OUT(c) "=@cc" #c +#else +# define CC_SET(c) "\n\tset" #c " %[_cc_" #c "]\n" +# define CC_OUT(c) [_cc_ ## c] "=qm" +#endif + /* Exception table entry */ #ifdef __ASSEMBLY__ # define _ASM_EXTABLE_HANDLE(from, to, handler) \ diff --git a/arch/x86/include/asm/atomic.h b/arch/x86/include/asm/atomic.h index 3e86742..7322c15 100644 --- a/arch/x86/include/asm/atomic.h +++ b/arch/x86/include/asm/atomic.h @@ -75,9 +75,9 @@ static __always_inline void atomic_sub(int i, atomic_t *v) * true if the result is zero, or false for all * other cases. */ -static __always_inline int atomic_sub_and_test(int i, atomic_t *v) +static __always_inline bool atomic_sub_and_test(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subl", v->counter, "er", i, "%0", e); } /** @@ -112,9 +112,9 @@ static __always_inline void atomic_dec(atomic_t *v) * returns true if the result is 0, or false for all other * cases. */ -static __always_inline int atomic_dec_and_test(atomic_t *v) +static __always_inline bool atomic_dec_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", e); } /** @@ -125,9 +125,9 @@ static __always_inline int atomic_dec_and_test(atomic_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static __always_inline int atomic_inc_and_test(atomic_t *v) +static __always_inline bool atomic_inc_and_test(atomic_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incl", v->counter, "%0", e); } /** @@ -139,9 +139,9 @@ static __always_inline int atomic_inc_and_test(atomic_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static __always_inline int atomic_add_negative(int i, atomic_t *v) +static __always_inline bool atomic_add_negative(int i, atomic_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addl", v->counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/atomic64_64.h b/arch/x86/include/asm/atomic64_64.h index 0373510..57bf925 100644 --- a/arch/x86/include/asm/atomic64_64.h +++ b/arch/x86/include/asm/atomic64_64.h @@ -70,9 +70,9 @@ static inline void atomic64_sub(long i, atomic64_t *v) * true if the result is zero, or false for all * other cases. */ -static inline int atomic64_sub_and_test(long i, atomic64_t *v) +static inline bool atomic64_sub_and_test(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(LOCK_PREFIX "subq", v->counter, "er", i, "%0", e); } /** @@ -109,9 +109,9 @@ static __always_inline void atomic64_dec(atomic64_t *v) * returns true if the result is 0, or false for all other * cases. */ -static inline int atomic64_dec_and_test(atomic64_t *v) +static inline bool atomic64_dec_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "decq", v->counter, "%0", e); } /** @@ -122,9 +122,9 @@ static inline int atomic64_dec_and_test(atomic64_t *v) * and returns true if the result is zero, or false for all * other cases. */ -static inline int atomic64_inc_and_test(atomic64_t *v) +static inline bool atomic64_inc_and_test(atomic64_t *v) { - GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", "e"); + GEN_UNARY_RMWcc(LOCK_PREFIX "incq", v->counter, "%0", e); } /** @@ -136,9 +136,9 @@ static inline int atomic64_inc_and_test(atomic64_t *v) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int atomic64_add_negative(long i, atomic64_t *v) +static inline bool atomic64_add_negative(long i, atomic64_t *v) { - GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(LOCK_PREFIX "addq", v->counter, "er", i, "%0", s); } /** @@ -180,7 +180,7 @@ static inline long atomic64_xchg(atomic64_t *v, long new) * Atomically adds @a to @v, so long as it was not @u. * Returns the old value of @v. */ -static inline int atomic64_add_unless(atomic64_t *v, long a, long u) +static inline bool atomic64_add_unless(atomic64_t *v, long a, long u) { long c, old; c = atomic64_read(v); diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 7766d1c..68557f52 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -201,9 +201,9 @@ static __always_inline void change_bit(long nr, volatile unsigned long *addr) * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_set_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "bts", *addr, "Ir", nr, "%0", c); } /** @@ -213,7 +213,7 @@ static __always_inline int test_and_set_bit(long nr, volatile unsigned long *add * * This is the same as test_and_set_bit on x86. */ -static __always_inline int +static __always_inline bool test_and_set_bit_lock(long nr, volatile unsigned long *addr) { return test_and_set_bit(nr, addr); @@ -228,13 +228,13 @@ test_and_set_bit_lock(long nr, volatile unsigned long *addr) * If two examples of this operation race, one can appear to succeed * but actually fail. You must protect multiple accesses with a lock. */ -static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; + bool oldbit; asm("bts %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); return oldbit; } @@ -247,9 +247,9 @@ static __always_inline int __test_and_set_bit(long nr, volatile unsigned long *a * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_clear_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btr", *addr, "Ir", nr, "%0", c); } /** @@ -268,25 +268,25 @@ static __always_inline int test_and_clear_bit(long nr, volatile unsigned long *a * accessed from a hypervisor on the same CPU if running in a VM: don't change * this without also updating arch/x86/kernel/kvm.c */ -static __always_inline int __test_and_clear_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; + bool oldbit; asm volatile("btr %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr)); return oldbit; } /* WARNING: non atomic and it can be reordered! */ -static __always_inline int __test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool __test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; + bool oldbit; asm volatile("btc %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit), ADDR + CC_SET(c) + : CC_OUT(c) (oldbit), ADDR : "Ir" (nr) : "memory"); return oldbit; @@ -300,24 +300,24 @@ static __always_inline int __test_and_change_bit(long nr, volatile unsigned long * This operation is atomic and cannot be reordered. * It also implies a memory barrier. */ -static __always_inline int test_and_change_bit(long nr, volatile unsigned long *addr) +static __always_inline bool test_and_change_bit(long nr, volatile unsigned long *addr) { - GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", "c"); + GEN_BINARY_RMWcc(LOCK_PREFIX "btc", *addr, "Ir", nr, "%0", c); } -static __always_inline int constant_test_bit(long nr, const volatile unsigned long *addr) +static __always_inline bool constant_test_bit(long nr, const volatile unsigned long *addr) { return ((1UL << (nr & (BITS_PER_LONG-1))) & (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } -static __always_inline int variable_test_bit(long nr, volatile const unsigned long *addr) +static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { - int oldbit; + bool oldbit; asm volatile("bt %2,%1\n\t" - "sbb %0,%0" - : "=r" (oldbit) + CC_SET(c) + : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; @@ -329,7 +329,7 @@ static __always_inline int variable_test_bit(long nr, volatile const unsigned lo * @nr: bit number to test * @addr: Address to start counting from */ -static int test_bit(int nr, const volatile unsigned long *addr); +static bool test_bit(int nr, const volatile unsigned long *addr); #endif #define test_bit(nr, addr) \ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index 5a3b2c1..a188061 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -40,6 +40,7 @@ typedef s32 compat_long_t; typedef s64 __attribute__((aligned(4))) compat_s64; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; +typedef u32 compat_u32; typedef u64 __attribute__((aligned(4))) compat_u64; typedef u32 compat_uptr_t; @@ -181,6 +182,16 @@ typedef struct compat_siginfo { /* SIGILL, SIGFPE, SIGSEGV, SIGBUS */ struct { unsigned int _addr; /* faulting insn/memory ref. */ + short int _addr_lsb; /* Valid LSB of the reported address. */ + union { + /* used when si_code=SEGV_BNDERR */ + struct { + compat_uptr_t _lower; + compat_uptr_t _upper; + } _addr_bnd; + /* used when si_code=SEGV_PKUERR */ + compat_u32 _pkey; + }; } _sigfault; /* SIGPOLL */ diff --git a/arch/x86/include/asm/local.h b/arch/x86/include/asm/local.h index 4ad6560..7511978 100644 --- a/arch/x86/include/asm/local.h +++ b/arch/x86/include/asm/local.h @@ -50,9 +50,9 @@ static inline void local_sub(long i, local_t *l) * true if the result is zero, or false for all * other cases. */ -static inline int local_sub_and_test(long i, local_t *l) +static inline bool local_sub_and_test(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", "e"); + GEN_BINARY_RMWcc(_ASM_SUB, l->a.counter, "er", i, "%0", e); } /** @@ -63,9 +63,9 @@ static inline int local_sub_and_test(long i, local_t *l) * returns true if the result is 0, or false for all other * cases. */ -static inline int local_dec_and_test(local_t *l) +static inline bool local_dec_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_DEC, l->a.counter, "%0", e); } /** @@ -76,9 +76,9 @@ static inline int local_dec_and_test(local_t *l) * and returns true if the result is zero, or false for all * other cases. */ -static inline int local_inc_and_test(local_t *l) +static inline bool local_inc_and_test(local_t *l) { - GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", "e"); + GEN_UNARY_RMWcc(_ASM_INC, l->a.counter, "%0", e); } /** @@ -90,9 +90,9 @@ static inline int local_inc_and_test(local_t *l) * if the result is negative, or false when * result is greater than or equal to zero. */ -static inline int local_add_negative(long i, local_t *l) +static inline bool local_add_negative(long i, local_t *l) { - GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", "s"); + GEN_BINARY_RMWcc(_ASM_ADD, l->a.counter, "er", i, "%0", s); } /** diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index e0ba66c..e02e3f8 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -510,14 +510,15 @@ do { \ /* This is not atomic against other CPUs -- CPU preemption needs to be off */ #define x86_test_and_clear_bit_percpu(bit, var) \ ({ \ - int old__; \ - asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \ - : "=r" (old__), "+m" (var) \ + bool old__; \ + asm volatile("btr %2,"__percpu_arg(1)"\n\t" \ + CC_SET(c) \ + : CC_OUT(c) (old__), "+m" (var) \ : "dIr" (bit)); \ old__; \ }) -static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, +static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, const unsigned long __percpu *addr) { unsigned long __percpu *a = (unsigned long *)addr + nr / BITS_PER_LONG; @@ -529,14 +530,14 @@ static __always_inline int x86_this_cpu_constant_test_bit(unsigned int nr, #endif } -static inline int x86_this_cpu_variable_test_bit(int nr, +static inline bool x86_this_cpu_variable_test_bit(int nr, const unsigned long __percpu *addr) { - int oldbit; + bool oldbit; asm volatile("bt "__percpu_arg(2)",%1\n\t" - "sbb %0,%0" - : "=r" (oldbit) + CC_SET(c) + : CC_OUT(c) (oldbit) : "m" (*(unsigned long *)addr), "Ir" (nr)); return oldbit; diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index d397deb..17f2186 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -81,7 +81,7 @@ static __always_inline void __preempt_count_sub(int val) */ static __always_inline bool __preempt_count_dec_and_test(void) { - GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), "e"); + GEN_UNARY_RMWcc("decl", __preempt_count, __percpu_arg(0), e); } /* diff --git a/arch/x86/include/asm/rmwcc.h b/arch/x86/include/asm/rmwcc.h index 8f7866a..661dd30 100644 --- a/arch/x86/include/asm/rmwcc.h +++ b/arch/x86/include/asm/rmwcc.h @@ -1,11 +1,13 @@ #ifndef _ASM_X86_RMWcc #define _ASM_X86_RMWcc -#ifdef CC_HAVE_ASM_GOTO +#if !defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(CC_HAVE_ASM_GOTO) + +/* Use asm goto */ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + asm_volatile_goto (fullop "; j" #cc " %l[cc_label]" \ : : "m" (var), ## __VA_ARGS__ \ : "memory" : cc_label); \ return 0; \ @@ -19,15 +21,17 @@ cc_label: \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) -#else /* !CC_HAVE_ASM_GOTO */ +#else /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ + +/* Use flags output or a set instruction */ #define __GEN_RMWcc(fullop, var, cc, ...) \ do { \ - char c; \ - asm volatile (fullop "; set" cc " %1" \ - : "+m" (var), "=qm" (c) \ + bool c; \ + asm volatile (fullop ";" CC_SET(cc) \ + : "+m" (var), CC_OUT(cc) (c) \ : __VA_ARGS__ : "memory"); \ - return c != 0; \ + return c; \ } while (0) #define GEN_UNARY_RMWcc(op, var, arg0, cc) \ @@ -36,6 +40,6 @@ do { \ #define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) -#endif /* CC_HAVE_ASM_GOTO */ +#endif /* defined(__GCC_ASM_FLAG_OUTPUTS__) || !defined(CC_HAVE_ASM_GOTO) */ #endif /* _ASM_X86_RMWcc */ diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h index 453744c..1e8be26 100644 --- a/arch/x86/include/asm/rwsem.h +++ b/arch/x86/include/asm/rwsem.h @@ -77,7 +77,7 @@ static inline void __down_read(struct rw_semaphore *sem) /* * trylock for reading -- returns 1 if successful, 0 if contention */ -static inline int __down_read_trylock(struct rw_semaphore *sem) +static inline bool __down_read_trylock(struct rw_semaphore *sem) { long result, tmp; asm volatile("# beginning __down_read_trylock\n\t" @@ -93,7 +93,7 @@ static inline int __down_read_trylock(struct rw_semaphore *sem) : "+m" (sem->count), "=&a" (result), "=&r" (tmp) : "i" (RWSEM_ACTIVE_READ_BIAS) : "memory", "cc"); - return result >= 0 ? 1 : 0; + return result >= 0; } /* @@ -134,9 +134,10 @@ static inline int __down_write_killable(struct rw_semaphore *sem) /* * trylock for writing -- returns 1 if successful, 0 if contention */ -static inline int __down_write_trylock(struct rw_semaphore *sem) +static inline bool __down_write_trylock(struct rw_semaphore *sem) { - long result, tmp; + bool result; + long tmp0, tmp1; asm volatile("# beginning __down_write_trylock\n\t" " mov %0,%1\n\t" "1:\n\t" @@ -144,14 +145,14 @@ static inline int __down_write_trylock(struct rw_semaphore *sem) /* was the active mask 0 before? */ " jnz 2f\n\t" " mov %1,%2\n\t" - " add %3,%2\n\t" + " add %4,%2\n\t" LOCK_PREFIX " cmpxchg %2,%0\n\t" " jnz 1b\n\t" "2:\n\t" - " sete %b1\n\t" - " movzbl %b1, %k1\n\t" + CC_SET(e) "# ending __down_write_trylock\n\t" - : "+m" (sem->count), "=&a" (result), "=&r" (tmp) + : "+m" (sem->count), "=&a" (tmp0), "=&r" (tmp1), + CC_OUT(e) (result) : "er" (RWSEM_ACTIVE_WRITE_BIAS) : "memory", "cc"); return result; diff --git a/arch/x86/include/asm/signal.h b/arch/x86/include/asm/signal.h index 2138c9a..dd1e7d6 100644 --- a/arch/x86/include/asm/signal.h +++ b/arch/x86/include/asm/signal.h @@ -81,9 +81,9 @@ static inline int __const_sigismember(sigset_t *set, int _sig) static inline int __gen_sigismember(sigset_t *set, int _sig) { - int ret; - asm("btl %2,%1\n\tsbbl %0,%0" - : "=r"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); + unsigned char ret; + asm("btl %2,%1\n\tsetc %0" + : "=qm"(ret) : "m"(*set), "Ir"(_sig-1) : "cc"); return ret; } diff --git a/arch/x86/include/asm/sync_bitops.h b/arch/x86/include/asm/sync_bitops.h index f28a24b..cbf8847 100644 --- a/arch/x86/include/asm/sync_bitops.h +++ b/arch/x86/include/asm/sync_bitops.h @@ -79,10 +79,10 @@ static inline void sync_change_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; bts %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; bts %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } @@ -97,10 +97,10 @@ static inline int sync_test_and_set_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; btr %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; btr %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } @@ -115,10 +115,10 @@ static inline int sync_test_and_clear_bit(long nr, volatile unsigned long *addr) */ static inline int sync_test_and_change_bit(long nr, volatile unsigned long *addr) { - int oldbit; + unsigned char oldbit; - asm volatile("lock; btc %2,%1\n\tsbbl %0,%0" - : "=r" (oldbit), "+m" (ADDR) + asm volatile("lock; btc %2,%1\n\tsetc %0" + : "=qm" (oldbit), "+m" (ADDR) : "Ir" (nr) : "memory"); return oldbit; } diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c index f6f50c4..cfa97ff 100644 --- a/arch/x86/kernel/cpu/rdrand.c +++ b/arch/x86/kernel/cpu/rdrand.c @@ -39,9 +39,9 @@ __setup("nordrand", x86_rdrand_setup); */ #define SANITY_CHECK_LOOPS 8 +#ifdef CONFIG_ARCH_RANDOM void x86_init_rdrand(struct cpuinfo_x86 *c) { -#ifdef CONFIG_ARCH_RANDOM unsigned long tmp; int i; @@ -55,5 +55,5 @@ void x86_init_rdrand(struct cpuinfo_x86 *c) return; } } -#endif } +#endif diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c index 64341aa..d40ee8a 100644 --- a/arch/x86/kernel/i386_ksyms_32.c +++ b/arch/x86/kernel/i386_ksyms_32.c @@ -42,3 +42,5 @@ EXPORT_SYMBOL(empty_zero_page); EXPORT_SYMBOL(___preempt_schedule); EXPORT_SYMBOL(___preempt_schedule_notrace); #endif + +EXPORT_SYMBOL(__sw_hweight32); diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index dc3c0b1..b44564b 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -1,11 +1,104 @@ #include <linux/compat.h> #include <linux/uaccess.h> +/* + * The compat_siginfo_t structure and handing code is very easy + * to break in several ways. It must always be updated when new + * updates are made to the main siginfo_t, and + * copy_siginfo_to_user32() must be updated when the + * (arch-independent) copy_siginfo_to_user() is updated. + * + * It is also easy to put a new member in the compat_siginfo_t + * which has implicit alignment which can move internal structure + * alignment around breaking the ABI. This can happen if you, + * for instance, put a plain 64-bit value in there. + */ +static inline void signal_compat_build_tests(void) +{ + int _sifields_offset = offsetof(compat_siginfo_t, _sifields); + + /* + * If adding a new si_code, there is probably new data in + * the siginfo. Make sure folks bumping the si_code + * limits also have to look at this code. Make sure any + * new fields are handled in copy_siginfo_to_user32()! + */ + BUILD_BUG_ON(NSIGILL != 8); + BUILD_BUG_ON(NSIGFPE != 8); + BUILD_BUG_ON(NSIGSEGV != 4); + BUILD_BUG_ON(NSIGBUS != 5); + BUILD_BUG_ON(NSIGTRAP != 4); + BUILD_BUG_ON(NSIGCHLD != 6); + BUILD_BUG_ON(NSIGSYS != 1); + + /* This is part of the ABI and can never change in size: */ + BUILD_BUG_ON(sizeof(compat_siginfo_t) != 128); + /* + * The offsets of all the (unioned) si_fields are fixed + * in the ABI, of course. Make sure none of them ever + * move and are always at the beginning: + */ + BUILD_BUG_ON(offsetof(compat_siginfo_t, _sifields) != 3 * sizeof(int)); +#define CHECK_CSI_OFFSET(name) BUILD_BUG_ON(_sifields_offset != offsetof(compat_siginfo_t, _sifields.name)) + + /* + * Ensure that the size of each si_field never changes. + * If it does, it is a sign that the + * copy_siginfo_to_user32() code below needs to updated + * along with the size in the CHECK_SI_SIZE(). + * + * We repeat this check for both the generic and compat + * siginfos. + * + * Note: it is OK for these to grow as long as the whole + * structure stays within the padding size (checked + * above). + */ +#define CHECK_CSI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((compat_siginfo_t *)0)->_sifields.name)) +#define CHECK_SI_SIZE(name, size) BUILD_BUG_ON(size != sizeof(((siginfo_t *)0)->_sifields.name)) + + CHECK_CSI_OFFSET(_kill); + CHECK_CSI_SIZE (_kill, 2*sizeof(int)); + CHECK_SI_SIZE (_kill, 2*sizeof(int)); + + CHECK_CSI_OFFSET(_timer); + CHECK_CSI_SIZE (_timer, 5*sizeof(int)); + CHECK_SI_SIZE (_timer, 6*sizeof(int)); + + CHECK_CSI_OFFSET(_rt); + CHECK_CSI_SIZE (_rt, 3*sizeof(int)); + CHECK_SI_SIZE (_rt, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld); + CHECK_CSI_SIZE (_sigchld, 5*sizeof(int)); + CHECK_SI_SIZE (_sigchld, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigchld_x32); + CHECK_CSI_SIZE (_sigchld_x32, 7*sizeof(int)); + /* no _sigchld_x32 in the generic siginfo_t */ + + CHECK_CSI_OFFSET(_sigfault); + CHECK_CSI_SIZE (_sigfault, 4*sizeof(int)); + CHECK_SI_SIZE (_sigfault, 8*sizeof(int)); + + CHECK_CSI_OFFSET(_sigpoll); + CHECK_CSI_SIZE (_sigpoll, 2*sizeof(int)); + CHECK_SI_SIZE (_sigpoll, 4*sizeof(int)); + + CHECK_CSI_OFFSET(_sigsys); + CHECK_CSI_SIZE (_sigsys, 3*sizeof(int)); + CHECK_SI_SIZE (_sigsys, 4*sizeof(int)); + + /* any new si_fields should be added here */ +} + int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) { int err = 0; bool ia32 = test_thread_flag(TIF_IA32); + signal_compat_build_tests(); + if (!access_ok(VERIFY_WRITE, to, sizeof(compat_siginfo_t))) return -EFAULT; @@ -32,6 +125,21 @@ int copy_siginfo_to_user32(compat_siginfo_t __user *to, const siginfo_t *from) &to->_sifields._pad[0]); switch (from->si_code >> 16) { case __SI_FAULT >> 16: + if (from->si_signo == SIGBUS && + (from->si_code == BUS_MCEERR_AR || + from->si_code == BUS_MCEERR_AO)) + put_user_ex(from->si_addr_lsb, &to->si_addr_lsb); + + if (from->si_signo == SIGSEGV) { + if (from->si_code == SEGV_BNDERR) { + compat_uptr_t lower = (unsigned long)&to->si_lower; + compat_uptr_t upper = (unsigned long)&to->si_upper; + put_user_ex(lower, &to->si_lower); + put_user_ex(upper, &to->si_upper); + } + if (from->si_code == SEGV_PKUERR) + put_user_ex(from->si_pkey, &to->si_pkey); + } break; case __SI_SYS >> 16: put_user_ex(from->si_syscall, &to->si_syscall); diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 3dce1ca..01f30e5 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -440,10 +440,7 @@ static inline unsigned long get_vflags(struct kernel_vm86_regs *regs) static inline int is_revectored(int nr, struct revectored_struct *bitmap) { - __asm__ __volatile__("btl %2,%1\n\tsbbl %0,%0" - :"=r" (nr) - :"m" (*bitmap), "r" (nr)); - return nr; + return test_bit(nr, bitmap->__map); } #define val_byte(val, n) (((__u8 *)&val)[n]) diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c index cd05942..f1aebfb 100644 --- a/arch/x86/kernel/x8664_ksyms_64.c +++ b/arch/x86/kernel/x8664_ksyms_64.c @@ -44,6 +44,9 @@ EXPORT_SYMBOL(clear_page); EXPORT_SYMBOL(csum_partial); +EXPORT_SYMBOL(__sw_hweight32); +EXPORT_SYMBOL(__sw_hweight64); + /* * Export string functions. We normally rely on gcc builtin for most of these, * but gcc sometimes decides not to inline them. diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 72a5767..ec969cc 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -25,7 +25,7 @@ lib-y += memcpy_$(BITS).o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o -obj-y += msr.o msr-reg.o msr-reg-export.o +obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o ifeq ($(CONFIG_X86_32),y) obj-y += atomic64_32.o diff --git a/arch/x86/lib/hweight.S b/arch/x86/lib/hweight.S new file mode 100644 index 0000000..02de3d7 --- /dev/null +++ b/arch/x86/lib/hweight.S @@ -0,0 +1,77 @@ +#include <linux/linkage.h> + +#include <asm/asm.h> + +/* + * unsigned int __sw_hweight32(unsigned int w) + * %rdi: w + */ +ENTRY(__sw_hweight32) + +#ifdef CONFIG_X86_64 + movl %edi, %eax # w +#endif + __ASM_SIZE(push,) %__ASM_REG(dx) + movl %eax, %edx # w -> t + shrl %edx # t >>= 1 + andl $0x55555555, %edx # t &= 0x55555555 + subl %edx, %eax # w -= t + + movl %eax, %edx # w -> t + shrl $2, %eax # w_tmp >>= 2 + andl $0x33333333, %edx # t &= 0x33333333 + andl $0x33333333, %eax # w_tmp &= 0x33333333 + addl %edx, %eax # w = w_tmp + t + + movl %eax, %edx # w -> t + shrl $4, %edx # t >>= 4 + addl %edx, %eax # w_tmp += t + andl $0x0f0f0f0f, %eax # w_tmp &= 0x0f0f0f0f + imull $0x01010101, %eax, %eax # w_tmp *= 0x01010101 + shrl $24, %eax # w = w_tmp >> 24 + __ASM_SIZE(pop,) %__ASM_REG(dx) + ret +ENDPROC(__sw_hweight32) + +ENTRY(__sw_hweight64) +#ifdef CONFIG_X86_64 + pushq %rdx + + movq %rdi, %rdx # w -> t + movabsq $0x5555555555555555, %rax + shrq %rdx # t >>= 1 + andq %rdx, %rax # t &= 0x5555555555555555 + movabsq $0x3333333333333333, %rdx + subq %rax, %rdi # w -= t + + movq %rdi, %rax # w -> t + shrq $2, %rdi # w_tmp >>= 2 + andq %rdx, %rax # t &= 0x3333333333333333 + andq %rdi, %rdx # w_tmp &= 0x3333333333333333 + addq %rdx, %rax # w = w_tmp + t + + movq %rax, %rdx # w -> t + shrq $4, %rdx # t >>= 4 + addq %rdx, %rax # w_tmp += t + movabsq $0x0f0f0f0f0f0f0f0f, %rdx + andq %rdx, %rax # w_tmp &= 0x0f0f0f0f0f0f0f0f + movabsq $0x0101010101010101, %rdx + imulq %rdx, %rax # w_tmp *= 0x0101010101010101 + shrq $56, %rax # w = w_tmp >> 56 + + popq %rdx + ret +#else /* CONFIG_X86_32 */ + /* We're getting an u64 arg in (%eax,%edx): unsigned long hweight64(__u64 w) */ + pushl %ecx + + call __sw_hweight32 + movl %eax, %ecx # stash away result + movl %edx, %eax # second part of input + call __sw_hweight32 + addl %ecx, %eax # result + + popl %ecx + ret +#endif +ENDPROC(__sw_hweight64) diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 760789a..0f87db2 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -521,9 +521,7 @@ static void set_aliased_prot(void *v, pgprot_t prot) preempt_disable(); - pagefault_disable(); /* Avoid warnings due to being atomic. */ - __get_user(dummy, (unsigned char __user __force *)v); - pagefault_enable(); + probe_kernel_read(&dummy, v, 1); if (HYPERVISOR_update_va_mapping((unsigned long)v, pte, 0)) BUG(); diff --git a/include/linux/random.h b/include/linux/random.h index e47e533..3d6e981 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -95,27 +95,27 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) #ifdef CONFIG_ARCH_RANDOM # include <asm/archrandom.h> #else -static inline int arch_get_random_long(unsigned long *v) +static inline bool arch_get_random_long(unsigned long *v) { return 0; } -static inline int arch_get_random_int(unsigned int *v) +static inline bool arch_get_random_int(unsigned int *v) { return 0; } -static inline int arch_has_random(void) +static inline bool arch_has_random(void) { return 0; } -static inline int arch_get_random_seed_long(unsigned long *v) +static inline bool arch_get_random_seed_long(unsigned long *v) { return 0; } -static inline int arch_get_random_seed_int(unsigned int *v) +static inline bool arch_get_random_seed_int(unsigned int *v) { return 0; } -static inline int arch_has_random_seed(void) +static inline bool arch_has_random_seed(void) { return 0; } diff --git a/lib/Makefile b/lib/Makefile index ff6a7a6..07d06a8 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -15,9 +15,6 @@ KCOV_INSTRUMENT_rbtree.o := n KCOV_INSTRUMENT_list_debug.o := n KCOV_INSTRUMENT_debugobjects.o := n KCOV_INSTRUMENT_dynamic_debug.o := n -# Kernel does not boot if we instrument this file as it uses custom calling -# convention (see CONFIG_ARCH_HWEIGHT_CFLAGS). -KCOV_INSTRUMENT_hweight.o := n lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ @@ -74,8 +71,6 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o -GCOV_PROFILE_hweight.o := n -CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o diff --git a/lib/hweight.c b/lib/hweight.c index 9a5c1f2..43273a7 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -9,6 +9,7 @@ * The Hamming Weight of a number is the total number of bits set in it. */ +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned int __sw_hweight32(unsigned int w) { #ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER @@ -25,6 +26,7 @@ unsigned int __sw_hweight32(unsigned int w) #endif } EXPORT_SYMBOL(__sw_hweight32); +#endif unsigned int __sw_hweight16(unsigned int w) { @@ -43,6 +45,7 @@ unsigned int __sw_hweight8(unsigned int w) } EXPORT_SYMBOL(__sw_hweight8); +#ifndef __HAVE_ARCH_SW_HWEIGHT unsigned long __sw_hweight64(__u64 w) { #if BITS_PER_LONG == 32 @@ -65,3 +68,4 @@ unsigned long __sw_hweight64(__u64 w) #endif } EXPORT_SYMBOL(__sw_hweight64); +#endif diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index c73425de..abe9c35 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -5,7 +5,7 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ - check_initial_reg_state sigreturn ldt_gdt iopl + check_initial_reg_state sigreturn ldt_gdt iopl mpx-mini-test TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mpx-debug.h b/tools/testing/selftests/x86/mpx-debug.h new file mode 100644 index 0000000..9230981 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-debug.h @@ -0,0 +1,14 @@ +#ifndef _MPX_DEBUG_H +#define _MPX_DEBUG_H + +#ifndef DEBUG_LEVEL +#define DEBUG_LEVEL 0 +#endif +#define dprintf_level(level, args...) do { if(level <= DEBUG_LEVEL) printf(args); } while(0) +#define dprintf1(args...) dprintf_level(1, args) +#define dprintf2(args...) dprintf_level(2, args) +#define dprintf3(args...) dprintf_level(3, args) +#define dprintf4(args...) dprintf_level(4, args) +#define dprintf5(args...) dprintf_level(5, args) + +#endif /* _MPX_DEBUG_H */ diff --git a/tools/testing/selftests/x86/mpx-dig.c b/tools/testing/selftests/x86/mpx-dig.c new file mode 100644 index 0000000..ce85356 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-dig.c @@ -0,0 +1,498 @@ +/* + * Written by Dave Hansen <dave.hansen@intel.com> + */ + +#include <stdlib.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdio.h> +#include <errno.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <sys/mman.h> +#include <string.h> +#include <fcntl.h> +#include "mpx-debug.h" +#include "mpx-mm.h" +#include "mpx-hw.h" + +unsigned long bounds_dir_global; + +#define mpx_dig_abort() __mpx_dig_abort(__FILE__, __func__, __LINE__) +static void inline __mpx_dig_abort(const char *file, const char *func, int line) +{ + fprintf(stderr, "MPX dig abort @ %s::%d in %s()\n", file, line, func); + printf("MPX dig abort @ %s::%d in %s()\n", file, line, func); + abort(); +} + +/* + * run like this (BDIR finds the probably bounds directory): + * + * BDIR="$(cat /proc/$pid/smaps | grep -B1 2097152 \ + * | head -1 | awk -F- '{print $1}')"; + * ./mpx-dig $pid 0x$BDIR + * + * NOTE: + * assumes that the only 2097152-kb VMA is the bounds dir + */ + +long nr_incore(void *ptr, unsigned long size_bytes) +{ + int i; + long ret = 0; + long vec_len = size_bytes / PAGE_SIZE; + unsigned char *vec = malloc(vec_len); + int incore_ret; + + if (!vec) + mpx_dig_abort(); + + incore_ret = mincore(ptr, size_bytes, vec); + if (incore_ret) { + printf("mincore ret: %d\n", incore_ret); + perror("mincore"); + mpx_dig_abort(); + } + for (i = 0; i < vec_len; i++) + ret += vec[i]; + free(vec); + return ret; +} + +int open_proc(int pid, char *file) +{ + static char buf[100]; + int fd; + + snprintf(&buf[0], sizeof(buf), "/proc/%d/%s", pid, file); + fd = open(&buf[0], O_RDONLY); + if (fd < 0) + perror(buf); + + return fd; +} + +struct vaddr_range { + unsigned long start; + unsigned long end; +}; +struct vaddr_range *ranges; +int nr_ranges_allocated; +int nr_ranges_populated; +int last_range = -1; + +int __pid_load_vaddrs(int pid) +{ + int ret = 0; + int proc_maps_fd = open_proc(pid, "maps"); + char linebuf[10000]; + unsigned long start; + unsigned long end; + char rest[1000]; + FILE *f = fdopen(proc_maps_fd, "r"); + + if (!f) + mpx_dig_abort(); + nr_ranges_populated = 0; + while (!feof(f)) { + char *readret = fgets(linebuf, sizeof(linebuf), f); + int parsed; + + if (readret == NULL) { + if (feof(f)) + break; + mpx_dig_abort(); + } + + parsed = sscanf(linebuf, "%lx-%lx%s", &start, &end, rest); + if (parsed != 3) + mpx_dig_abort(); + + dprintf4("result[%d]: %lx-%lx<->%s\n", parsed, start, end, rest); + if (nr_ranges_populated >= nr_ranges_allocated) { + ret = -E2BIG; + break; + } + ranges[nr_ranges_populated].start = start; + ranges[nr_ranges_populated].end = end; + nr_ranges_populated++; + } + last_range = -1; + fclose(f); + close(proc_maps_fd); + return ret; +} + +int pid_load_vaddrs(int pid) +{ + int ret; + + dprintf2("%s(%d)\n", __func__, pid); + if (!ranges) { + nr_ranges_allocated = 4; + ranges = malloc(nr_ranges_allocated * sizeof(ranges[0])); + dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, pid, + nr_ranges_allocated, ranges); + assert(ranges != NULL); + } + do { + ret = __pid_load_vaddrs(pid); + if (!ret) + break; + if (ret == -E2BIG) { + dprintf2("%s(%d) need to realloc\n", __func__, pid); + nr_ranges_allocated *= 2; + ranges = realloc(ranges, + nr_ranges_allocated * sizeof(ranges[0])); + dprintf2("%s(%d) allocated %d ranges @ %p\n", __func__, + pid, nr_ranges_allocated, ranges); + assert(ranges != NULL); + dprintf1("reallocating to hold %d ranges\n", nr_ranges_allocated); + } + } while (1); + + dprintf2("%s(%d) done\n", __func__, pid); + + return ret; +} + +static inline int vaddr_in_range(unsigned long vaddr, struct vaddr_range *r) +{ + if (vaddr < r->start) + return 0; + if (vaddr >= r->end) + return 0; + return 1; +} + +static inline int vaddr_mapped_by_range(unsigned long vaddr) +{ + int i; + + if (last_range > 0 && vaddr_in_range(vaddr, &ranges[last_range])) + return 1; + + for (i = 0; i < nr_ranges_populated; i++) { + struct vaddr_range *r = &ranges[i]; + + if (vaddr_in_range(vaddr, r)) + continue; + last_range = i; + return 1; + } + return 0; +} + +const int bt_entry_size_bytes = sizeof(unsigned long) * 4; + +void *read_bounds_table_into_buf(unsigned long table_vaddr) +{ +#ifdef MPX_DIG_STANDALONE + static char bt_buf[MPX_BOUNDS_TABLE_SIZE_BYTES]; + off_t seek_ret = lseek(fd, table_vaddr, SEEK_SET); + if (seek_ret != table_vaddr) + mpx_dig_abort(); + + int read_ret = read(fd, &bt_buf, sizeof(bt_buf)); + if (read_ret != sizeof(bt_buf)) + mpx_dig_abort(); + return &bt_buf; +#else + return (void *)table_vaddr; +#endif +} + +int dump_table(unsigned long table_vaddr, unsigned long base_controlled_vaddr, + unsigned long bde_vaddr) +{ + unsigned long offset_inside_bt; + int nr_entries = 0; + int do_abort = 0; + char *bt_buf; + + dprintf3("%s() base_controlled_vaddr: 0x%012lx bde_vaddr: 0x%012lx\n", + __func__, base_controlled_vaddr, bde_vaddr); + + bt_buf = read_bounds_table_into_buf(table_vaddr); + + dprintf4("%s() read done\n", __func__); + + for (offset_inside_bt = 0; + offset_inside_bt < MPX_BOUNDS_TABLE_SIZE_BYTES; + offset_inside_bt += bt_entry_size_bytes) { + unsigned long bt_entry_index; + unsigned long bt_entry_controls; + unsigned long this_bt_entry_for_vaddr; + unsigned long *bt_entry_buf; + int i; + + dprintf4("%s() offset_inside_bt: 0x%lx of 0x%llx\n", __func__, + offset_inside_bt, MPX_BOUNDS_TABLE_SIZE_BYTES); + bt_entry_buf = (void *)&bt_buf[offset_inside_bt]; + if (!bt_buf) { + printf("null bt_buf\n"); + mpx_dig_abort(); + } + if (!bt_entry_buf) { + printf("null bt_entry_buf\n"); + mpx_dig_abort(); + } + dprintf4("%s() reading *bt_entry_buf @ %p\n", __func__, + bt_entry_buf); + if (!bt_entry_buf[0] && + !bt_entry_buf[1] && + !bt_entry_buf[2] && + !bt_entry_buf[3]) + continue; + + nr_entries++; + + bt_entry_index = offset_inside_bt/bt_entry_size_bytes; + bt_entry_controls = sizeof(void *); + this_bt_entry_for_vaddr = + base_controlled_vaddr + bt_entry_index*bt_entry_controls; + /* + * We sign extend vaddr bits 48->63 which effectively + * creates a hole in the virtual address space. + * This calculation corrects for the hole. + */ + if (this_bt_entry_for_vaddr > 0x00007fffffffffffUL) + this_bt_entry_for_vaddr |= 0xffff800000000000; + + if (!vaddr_mapped_by_range(this_bt_entry_for_vaddr)) { + printf("bt_entry_buf: %p\n", bt_entry_buf); + printf("there is a bte for %lx but no mapping\n", + this_bt_entry_for_vaddr); + printf(" bde vaddr: %016lx\n", bde_vaddr); + printf("base_controlled_vaddr: %016lx\n", base_controlled_vaddr); + printf(" table_vaddr: %016lx\n", table_vaddr); + printf(" entry vaddr: %016lx @ offset %lx\n", + table_vaddr + offset_inside_bt, offset_inside_bt); + do_abort = 1; + mpx_dig_abort(); + } + if (DEBUG_LEVEL < 4) + continue; + + printf("table entry[%lx]: ", offset_inside_bt); + for (i = 0; i < bt_entry_size_bytes; i += sizeof(unsigned long)) + printf("0x%016lx ", bt_entry_buf[i]); + printf("\n"); + } + if (do_abort) + mpx_dig_abort(); + dprintf4("%s() done\n", __func__); + return nr_entries; +} + +int search_bd_buf(char *buf, int len_bytes, unsigned long bd_offset_bytes, + int *nr_populated_bdes) +{ + unsigned long i; + int total_entries = 0; + + dprintf3("%s(%p, %x, %lx, ...) buf end: %p\n", __func__, buf, + len_bytes, bd_offset_bytes, buf + len_bytes); + + for (i = 0; i < len_bytes; i += sizeof(unsigned long)) { + unsigned long bd_index = (bd_offset_bytes + i) / sizeof(unsigned long); + unsigned long *bounds_dir_entry_ptr = (unsigned long *)&buf[i]; + unsigned long bounds_dir_entry; + unsigned long bd_for_vaddr; + unsigned long bt_start; + unsigned long bt_tail; + int nr_entries; + + dprintf4("%s() loop i: %ld bounds_dir_entry_ptr: %p\n", __func__, i, + bounds_dir_entry_ptr); + + bounds_dir_entry = *bounds_dir_entry_ptr; + if (!bounds_dir_entry) { + dprintf4("no bounds dir at index 0x%lx / 0x%lx " + "start at offset:%lx %lx\n", bd_index, bd_index, + bd_offset_bytes, i); + continue; + } + dprintf3("found bounds_dir_entry: 0x%lx @ " + "index 0x%lx buf ptr: %p\n", bounds_dir_entry, i, + &buf[i]); + /* mask off the enable bit: */ + bounds_dir_entry &= ~0x1; + (*nr_populated_bdes)++; + dprintf4("nr_populated_bdes: %p\n", nr_populated_bdes); + dprintf4("*nr_populated_bdes: %d\n", *nr_populated_bdes); + + bt_start = bounds_dir_entry; + bt_tail = bounds_dir_entry + MPX_BOUNDS_TABLE_SIZE_BYTES - 1; + if (!vaddr_mapped_by_range(bt_start)) { + printf("bounds directory 0x%lx points to nowhere\n", + bounds_dir_entry); + mpx_dig_abort(); + } + if (!vaddr_mapped_by_range(bt_tail)) { + printf("bounds directory end 0x%lx points to nowhere\n", + bt_tail); + mpx_dig_abort(); + } + /* + * Each bounds directory entry controls 1MB of virtual address + * space. This variable is the virtual address in the process + * of the beginning of the area controlled by this bounds_dir. + */ + bd_for_vaddr = bd_index * (1UL<<20); + + nr_entries = dump_table(bounds_dir_entry, bd_for_vaddr, + bounds_dir_global+bd_offset_bytes+i); + total_entries += nr_entries; + dprintf5("dir entry[%4ld @ %p]: 0x%lx %6d entries " + "total this buf: %7d bd_for_vaddrs: 0x%lx -> 0x%lx\n", + bd_index, buf+i, + bounds_dir_entry, nr_entries, total_entries, + bd_for_vaddr, bd_for_vaddr + (1UL<<20)); + } + dprintf3("%s(%p, %x, %lx, ...) done\n", __func__, buf, len_bytes, + bd_offset_bytes); + return total_entries; +} + +int proc_pid_mem_fd = -1; + +void *fill_bounds_dir_buf_other(long byte_offset_inside_bounds_dir, + long buffer_size_bytes, void *buffer) +{ + unsigned long seekto = bounds_dir_global + byte_offset_inside_bounds_dir; + int read_ret; + off_t seek_ret = lseek(proc_pid_mem_fd, seekto, SEEK_SET); + + if (seek_ret != seekto) + mpx_dig_abort(); + + read_ret = read(proc_pid_mem_fd, buffer, buffer_size_bytes); + /* there shouldn't practically be short reads of /proc/$pid/mem */ + if (read_ret != buffer_size_bytes) + mpx_dig_abort(); + + return buffer; +} +void *fill_bounds_dir_buf_self(long byte_offset_inside_bounds_dir, + long buffer_size_bytes, void *buffer) + +{ + unsigned char vec[buffer_size_bytes / PAGE_SIZE]; + char *dig_bounds_dir_ptr = + (void *)(bounds_dir_global + byte_offset_inside_bounds_dir); + /* + * use mincore() to quickly find the areas of the bounds directory + * that have memory and thus will be worth scanning. + */ + int incore_ret; + + int incore = 0; + int i; + + dprintf4("%s() dig_bounds_dir_ptr: %p\n", __func__, dig_bounds_dir_ptr); + + incore_ret = mincore(dig_bounds_dir_ptr, buffer_size_bytes, &vec[0]); + if (incore_ret) { + printf("mincore ret: %d\n", incore_ret); + perror("mincore"); + mpx_dig_abort(); + } + for (i = 0; i < sizeof(vec); i++) + incore += vec[i]; + dprintf4("%s() total incore: %d\n", __func__, incore); + if (!incore) + return NULL; + dprintf3("%s() total incore: %d\n", __func__, incore); + return dig_bounds_dir_ptr; +} + +int inspect_pid(int pid) +{ + static int dig_nr; + long offset_inside_bounds_dir; + char bounds_dir_buf[sizeof(unsigned long) * (1UL << 15)]; + char *dig_bounds_dir_ptr; + int total_entries = 0; + int nr_populated_bdes = 0; + int inspect_self; + + if (getpid() == pid) { + dprintf4("inspecting self\n"); + inspect_self = 1; + } else { + dprintf4("inspecting pid %d\n", pid); + mpx_dig_abort(); + } + + for (offset_inside_bounds_dir = 0; + offset_inside_bounds_dir < MPX_BOUNDS_TABLE_SIZE_BYTES; + offset_inside_bounds_dir += sizeof(bounds_dir_buf)) { + static int bufs_skipped; + int this_entries; + + if (inspect_self) { + dig_bounds_dir_ptr = + fill_bounds_dir_buf_self(offset_inside_bounds_dir, + sizeof(bounds_dir_buf), + &bounds_dir_buf[0]); + } else { + dig_bounds_dir_ptr = + fill_bounds_dir_buf_other(offset_inside_bounds_dir, + sizeof(bounds_dir_buf), + &bounds_dir_buf[0]); + } + if (!dig_bounds_dir_ptr) { + bufs_skipped++; + continue; + } + this_entries = search_bd_buf(dig_bounds_dir_ptr, + sizeof(bounds_dir_buf), + offset_inside_bounds_dir, + &nr_populated_bdes); + total_entries += this_entries; + } + printf("mpx dig (%3d) complete, SUCCESS (%8d / %4d)\n", ++dig_nr, + total_entries, nr_populated_bdes); + return total_entries + nr_populated_bdes; +} + +#ifdef MPX_DIG_REMOTE +int main(int argc, char **argv) +{ + int err; + char *c; + unsigned long bounds_dir_entry; + int pid; + + printf("mpx-dig starting...\n"); + err = sscanf(argv[1], "%d", &pid); + printf("parsing: '%s', err: %d\n", argv[1], err); + if (err != 1) + mpx_dig_abort(); + + err = sscanf(argv[2], "%lx", &bounds_dir_global); + printf("parsing: '%s': %d\n", argv[2], err); + if (err != 1) + mpx_dig_abort(); + + proc_pid_mem_fd = open_proc(pid, "mem"); + if (proc_pid_mem_fd < 0) + mpx_dig_abort(); + + inspect_pid(pid); + return 0; +} +#endif + +long inspect_me(struct mpx_bounds_dir *bounds_dir) +{ + int pid = getpid(); + + pid_load_vaddrs(pid); + bounds_dir_global = (unsigned long)bounds_dir; + dprintf4("enter %s() bounds dir: %p\n", __func__, bounds_dir); + return inspect_pid(pid); +} diff --git a/tools/testing/selftests/x86/mpx-hw.h b/tools/testing/selftests/x86/mpx-hw.h new file mode 100644 index 0000000..093c190 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-hw.h @@ -0,0 +1,123 @@ +#ifndef _MPX_HW_H +#define _MPX_HW_H + +#include <assert.h> + +/* Describe the MPX Hardware Layout in here */ + +#define NR_MPX_BOUNDS_REGISTERS 4 + +#ifdef __i386__ + +#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 16 /* 4 * 32-bits */ +#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 14) /* 16k */ +#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 4 +#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 22) /* 4MB */ + +#define MPX_BOUNDS_TABLE_BOTTOM_BIT 2 +#define MPX_BOUNDS_TABLE_TOP_BIT 11 +#define MPX_BOUNDS_DIR_BOTTOM_BIT 12 +#define MPX_BOUNDS_DIR_TOP_BIT 31 + +#else + +/* + * Linear Address of "pointer" (LAp) + * 0 -> 2: ignored + * 3 -> 19: index in to bounds table + * 20 -> 47: index in to bounds directory + * 48 -> 63: ignored + */ + +#define MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES 32 +#define MPX_BOUNDS_TABLE_SIZE_BYTES (1ULL << 22) /* 4MB */ +#define MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES 8 +#define MPX_BOUNDS_DIR_SIZE_BYTES (1ULL << 31) /* 2GB */ + +#define MPX_BOUNDS_TABLE_BOTTOM_BIT 3 +#define MPX_BOUNDS_TABLE_TOP_BIT 19 +#define MPX_BOUNDS_DIR_BOTTOM_BIT 20 +#define MPX_BOUNDS_DIR_TOP_BIT 47 + +#endif + +#define MPX_BOUNDS_DIR_NR_ENTRIES \ + (MPX_BOUNDS_DIR_SIZE_BYTES/MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES) +#define MPX_BOUNDS_TABLE_NR_ENTRIES \ + (MPX_BOUNDS_TABLE_SIZE_BYTES/MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES) + +#define MPX_BOUNDS_TABLE_ENTRY_VALID_BIT 0x1 + +struct mpx_bd_entry { + union { + char x[MPX_BOUNDS_DIR_ENTRY_SIZE_BYTES]; + void *contents[1]; + }; +} __attribute__((packed)); + +struct mpx_bt_entry { + union { + char x[MPX_BOUNDS_TABLE_ENTRY_SIZE_BYTES]; + unsigned long contents[1]; + }; +} __attribute__((packed)); + +struct mpx_bounds_dir { + struct mpx_bd_entry entries[MPX_BOUNDS_DIR_NR_ENTRIES]; +} __attribute__((packed)); + +struct mpx_bounds_table { + struct mpx_bt_entry entries[MPX_BOUNDS_TABLE_NR_ENTRIES]; +} __attribute__((packed)); + +static inline unsigned long GET_BITS(unsigned long val, int bottombit, int topbit) +{ + int total_nr_bits = topbit - bottombit; + unsigned long mask = (1UL << total_nr_bits)-1; + return (val >> bottombit) & mask; +} + +static inline unsigned long __vaddr_bounds_table_index(void *vaddr) +{ + return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_TABLE_BOTTOM_BIT, + MPX_BOUNDS_TABLE_TOP_BIT); +} + +static inline unsigned long __vaddr_bounds_directory_index(void *vaddr) +{ + return GET_BITS((unsigned long)vaddr, MPX_BOUNDS_DIR_BOTTOM_BIT, + MPX_BOUNDS_DIR_TOP_BIT); +} + +static inline struct mpx_bd_entry *mpx_vaddr_to_bd_entry(void *vaddr, + struct mpx_bounds_dir *bounds_dir) +{ + unsigned long index = __vaddr_bounds_directory_index(vaddr); + return &bounds_dir->entries[index]; +} + +static inline int bd_entry_valid(struct mpx_bd_entry *bounds_dir_entry) +{ + unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; + return (__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); +} + +static inline struct mpx_bounds_table * +__bd_entry_to_bounds_table(struct mpx_bd_entry *bounds_dir_entry) +{ + unsigned long __bd_entry = (unsigned long)bounds_dir_entry->contents; + assert(__bd_entry & MPX_BOUNDS_TABLE_ENTRY_VALID_BIT); + __bd_entry &= ~MPX_BOUNDS_TABLE_ENTRY_VALID_BIT; + return (struct mpx_bounds_table *)__bd_entry; +} + +static inline struct mpx_bt_entry * +mpx_vaddr_to_bt_entry(void *vaddr, struct mpx_bounds_dir *bounds_dir) +{ + struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(vaddr, bounds_dir); + struct mpx_bounds_table *bt = __bd_entry_to_bounds_table(bde); + unsigned long index = __vaddr_bounds_table_index(vaddr); + return &bt->entries[index]; +} + +#endif /* _MPX_HW_H */ diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c new file mode 100644 index 0000000..616ee96 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -0,0 +1,1585 @@ +/* + * mpx-mini-test.c: routines to test Intel MPX (Memory Protection eXtentions) + * + * Written by: + * "Ren, Qiaowei" <qiaowei.ren@intel.com> + * "Wei, Gang" <gang.wei@intel.com> + * "Hansen, Dave" <dave.hansen@intel.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2. + */ + +/* + * 2014-12-05: Dave Hansen: fixed all of the compiler warnings, and made sure + * it works on 32-bit. + */ + +int inspect_every_this_many_mallocs = 100; +int zap_all_every_this_many_mallocs = 1000; + +#define _GNU_SOURCE +#define _LARGEFILE64_SOURCE + +#include <string.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <signal.h> +#include <assert.h> +#include <stdlib.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> + +#include "mpx-hw.h" +#include "mpx-debug.h" +#include "mpx-mm.h" + +#ifndef __always_inline +#define __always_inline inline __attribute__((always_inline) +#endif + +#ifndef TEST_DURATION_SECS +#define TEST_DURATION_SECS 3 +#endif + +void write_int_to(char *prefix, char *file, int int_to_write) +{ + char buf[100]; + int fd = open(file, O_RDWR); + int len; + int ret; + + assert(fd >= 0); + len = snprintf(buf, sizeof(buf), "%s%d", prefix, int_to_write); + assert(len >= 0); + assert(len < sizeof(buf)); + ret = write(fd, buf, len); + assert(ret == len); + ret = close(fd); + assert(!ret); +} + +void write_pid_to(char *prefix, char *file) +{ + write_int_to(prefix, file, getpid()); +} + +void trace_me(void) +{ +/* tracing events dir */ +#define TED "/sys/kernel/debug/tracing/events/" +/* + write_pid_to("common_pid=", TED "signal/filter"); + write_pid_to("common_pid=", TED "exceptions/filter"); + write_int_to("", TED "signal/enable", 1); + write_int_to("", TED "exceptions/enable", 1); +*/ + write_pid_to("", "/sys/kernel/debug/tracing/set_ftrace_pid"); + write_int_to("", "/sys/kernel/debug/tracing/trace", 0); +} + +#define test_failed() __test_failed(__FILE__, __LINE__) +static void __test_failed(char *f, int l) +{ + fprintf(stderr, "abort @ %s::%d\n", f, l); + abort(); +} + +/* Error Printf */ +#define eprintf(args...) fprintf(stderr, args) + +#ifdef __i386__ + +/* i386 directory size is 4MB */ +#define REG_IP_IDX REG_EIP +#define REX_PREFIX + +#define XSAVE_OFFSET_IN_FPMEM sizeof(struct _libc_fpstate) + +/* + * __cpuid() is from the Linux Kernel: + */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "push %%ebx;" + "cpuid;" + "mov %%ebx, %1;" + "pop %%ebx" + : "=a" (*eax), + "=g" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +#else /* __i386__ */ + +#define REG_IP_IDX REG_RIP +#define REX_PREFIX "0x48, " + +#define XSAVE_OFFSET_IN_FPMEM 0 + +/* + * __cpuid() is from the Linux Kernel: + */ +static inline void __cpuid(unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + /* ecx is often an input as well as an output. */ + asm volatile( + "cpuid;" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (*eax), "2" (*ecx)); +} + +#endif /* !__i386__ */ + +struct xsave_hdr_struct { + uint64_t xstate_bv; + uint64_t reserved1[2]; + uint64_t reserved2[5]; +} __attribute__((packed)); + +struct bndregs_struct { + uint64_t bndregs[8]; +} __attribute__((packed)); + +struct bndcsr_struct { + uint64_t cfg_reg_u; + uint64_t status_reg; +} __attribute__((packed)); + +struct xsave_struct { + uint8_t fpu_sse[512]; + struct xsave_hdr_struct xsave_hdr; + uint8_t ymm[256]; + uint8_t lwp[128]; + struct bndregs_struct bndregs; + struct bndcsr_struct bndcsr; +} __attribute__((packed)); + +uint8_t __attribute__((__aligned__(64))) buffer[4096]; +struct xsave_struct *xsave_buf = (struct xsave_struct *)buffer; + +uint8_t __attribute__((__aligned__(64))) test_buffer[4096]; +struct xsave_struct *xsave_test_buf = (struct xsave_struct *)test_buffer; + +uint64_t num_bnd_chk; + +static __always_inline void xrstor_state(struct xsave_struct *fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static __always_inline void xsave_state_1(void *_fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + unsigned char *fx = _fx; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static inline uint64_t xgetbv(uint32_t index) +{ + uint32_t eax, edx; + + asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */ + : "=a" (eax), "=d" (edx) + : "c" (index)); + return eax + ((uint64_t)edx << 32); +} + +static uint64_t read_mpx_status_sig(ucontext_t *uctxt) +{ + memset(buffer, 0, sizeof(buffer)); + memcpy(buffer, + (uint8_t *)uctxt->uc_mcontext.fpregs + XSAVE_OFFSET_IN_FPMEM, + sizeof(struct xsave_struct)); + + return xsave_buf->bndcsr.status_reg; +} + +#include <pthread.h> + +static uint8_t *get_next_inst_ip(uint8_t *addr) +{ + uint8_t *ip = addr; + uint8_t sib; + uint8_t rm; + uint8_t mod; + uint8_t base; + uint8_t modrm; + + /* determine the prefix. */ + switch(*ip) { + case 0xf2: + case 0xf3: + case 0x66: + ip++; + break; + } + + /* look for rex prefix */ + if ((*ip & 0x40) == 0x40) + ip++; + + /* Make sure we have a MPX instruction. */ + if (*ip++ != 0x0f) + return addr; + + /* Skip the op code byte. */ + ip++; + + /* Get the modrm byte. */ + modrm = *ip++; + + /* Break it down into parts. */ + rm = modrm & 7; + mod = (modrm >> 6); + + /* Init the parts of the address mode. */ + base = 8; + + /* Is it a mem mode? */ + if (mod != 3) { + /* look for scaled indexed addressing */ + if (rm == 4) { + /* SIB addressing */ + sib = *ip++; + base = sib & 7; + switch (mod) { + case 0: + if (base == 5) + ip += 4; + break; + + case 1: + ip++; + break; + + case 2: + ip += 4; + break; + } + + } else { + /* MODRM addressing */ + switch (mod) { + case 0: + /* DISP32 addressing, no base */ + if (rm == 5) + ip += 4; + break; + + case 1: + ip++; + break; + + case 2: + ip += 4; + break; + } + } + } + return ip; +} + +#ifdef si_lower +static inline void *__si_bounds_lower(siginfo_t *si) +{ + return si->si_lower; +} + +static inline void *__si_bounds_upper(siginfo_t *si) +{ + return si->si_upper; +} +#else +static inline void **__si_bounds_hack(siginfo_t *si) +{ + void *sigfault = &si->_sifields._sigfault; + void *end_sigfault = sigfault + sizeof(si->_sifields._sigfault); + void **__si_lower = end_sigfault; + + return __si_lower; +} + +static inline void *__si_bounds_lower(siginfo_t *si) +{ + return *__si_bounds_hack(si); +} + +static inline void *__si_bounds_upper(siginfo_t *si) +{ + return (*__si_bounds_hack(si)) + sizeof(void *); +} +#endif + +static int br_count; +static int expected_bnd_index = -1; +uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ +unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; + +/* + * The kernel is supposed to provide some information about the bounds + * exception in the siginfo. It should match what we have in the bounds + * registers that we are checking against. Just check against the shadow copy + * since it is easily available, and we also check that *it* matches the real + * registers. + */ +void check_siginfo_vs_shadow(siginfo_t* si) +{ + int siginfo_ok = 1; + void *shadow_lower = (void *)(unsigned long)shadow_plb[expected_bnd_index][0]; + void *shadow_upper = (void *)(unsigned long)shadow_plb[expected_bnd_index][1]; + + if ((expected_bnd_index < 0) || + (expected_bnd_index >= NR_MPX_BOUNDS_REGISTERS)) { + fprintf(stderr, "ERROR: invalid expected_bnd_index: %d\n", + expected_bnd_index); + exit(6); + } + if (__si_bounds_lower(si) != shadow_lower) + siginfo_ok = 0; + if (__si_bounds_upper(si) != shadow_upper) + siginfo_ok = 0; + + if (!siginfo_ok) { + fprintf(stderr, "ERROR: siginfo bounds do not match " + "shadow bounds for register %d\n", expected_bnd_index); + exit(7); + } +} + +void handler(int signum, siginfo_t *si, void *vucontext) +{ + int i; + ucontext_t *uctxt = vucontext; + int trapno; + unsigned long ip; + + dprintf1("entered signal handler\n"); + + trapno = uctxt->uc_mcontext.gregs[REG_TRAPNO]; + ip = uctxt->uc_mcontext.gregs[REG_IP_IDX]; + + if (trapno == 5) { + typeof(si->si_addr) *si_addr_ptr = &si->si_addr; + uint64_t status = read_mpx_status_sig(uctxt); + uint64_t br_reason = status & 0x3; + + br_count++; + dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); + +#define __SI_FAULT (3 << 16) +#define SEGV_BNDERR (__SI_FAULT|3) /* failed address bound checks */ + + dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", + status, ip, br_reason); + dprintf2("si_signo: %d\n", si->si_signo); + dprintf2(" signum: %d\n", signum); + dprintf2("info->si_code == SEGV_BNDERR: %d\n", + (si->si_code == SEGV_BNDERR)); + dprintf2("info->si_code: %d\n", si->si_code); + dprintf2("info->si_lower: %p\n", __si_bounds_lower(si)); + dprintf2("info->si_upper: %p\n", __si_bounds_upper(si)); + + check_siginfo_vs_shadow(si); + + for (i = 0; i < 8; i++) + dprintf3("[%d]: %p\n", i, si_addr_ptr[i]); + switch (br_reason) { + case 0: /* traditional BR */ + fprintf(stderr, + "Undefined status with bound exception:%jx\n", + status); + exit(5); + case 1: /* #BR MPX bounds exception */ + /* these are normal and we expect to see them */ + dprintf1("bounds exception (normal): status 0x%jx at %p si_addr: %p\n", + status, (void *)ip, si->si_addr); + num_bnd_chk++; + uctxt->uc_mcontext.gregs[REG_IP_IDX] = + (greg_t)get_next_inst_ip((uint8_t *)ip); + break; + case 2: + fprintf(stderr, "#BR status == 2, missing bounds table," + "kernel should have handled!!\n"); + exit(4); + break; + default: + fprintf(stderr, "bound check error: status 0x%jx at %p\n", + status, (void *)ip); + num_bnd_chk++; + uctxt->uc_mcontext.gregs[REG_IP_IDX] = + (greg_t)get_next_inst_ip((uint8_t *)ip); + fprintf(stderr, "bound check error: si_addr %p\n", si->si_addr); + exit(3); + } + } else if (trapno == 14) { + eprintf("ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", + trapno, ip); + eprintf("si_addr %p\n", si->si_addr); + eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + test_failed(); + } else { + eprintf("unexpected trap %d! at 0x%lx\n", trapno, ip); + eprintf("si_addr %p\n", si->si_addr); + eprintf("REG_ERR: %lx\n", (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); + test_failed(); + } +} + +static inline void cpuid_count(unsigned int op, int count, + unsigned int *eax, unsigned int *ebx, + unsigned int *ecx, unsigned int *edx) +{ + *eax = op; + *ecx = count; + __cpuid(eax, ebx, ecx, edx); +} + +#define XSTATE_CPUID 0x0000000d + +/* + * List of XSAVE features Linux knows about: + */ +enum xfeature_bit { + XSTATE_BIT_FP, + XSTATE_BIT_SSE, + XSTATE_BIT_YMM, + XSTATE_BIT_BNDREGS, + XSTATE_BIT_BNDCSR, + XSTATE_BIT_OPMASK, + XSTATE_BIT_ZMM_Hi256, + XSTATE_BIT_Hi16_ZMM, + + XFEATURES_NR_MAX, +}; + +#define XSTATE_FP (1 << XSTATE_BIT_FP) +#define XSTATE_SSE (1 << XSTATE_BIT_SSE) +#define XSTATE_YMM (1 << XSTATE_BIT_YMM) +#define XSTATE_BNDREGS (1 << XSTATE_BIT_BNDREGS) +#define XSTATE_BNDCSR (1 << XSTATE_BIT_BNDCSR) +#define XSTATE_OPMASK (1 << XSTATE_BIT_OPMASK) +#define XSTATE_ZMM_Hi256 (1 << XSTATE_BIT_ZMM_Hi256) +#define XSTATE_Hi16_ZMM (1 << XSTATE_BIT_Hi16_ZMM) + +#define MPX_XSTATES (XSTATE_BNDREGS | XSTATE_BNDCSR) /* 0x18 */ + +bool one_bit(unsigned int x, int bit) +{ + return !!(x & (1<<bit)); +} + +void print_state_component(int state_bit_nr, char *name) +{ + unsigned int eax, ebx, ecx, edx; + unsigned int state_component_size; + unsigned int state_component_supervisor; + unsigned int state_component_user; + unsigned int state_component_aligned; + + /* See SDM Section 13.2 */ + cpuid_count(XSTATE_CPUID, state_bit_nr, &eax, &ebx, &ecx, &edx); + assert(eax || ebx || ecx); + state_component_size = eax; + state_component_supervisor = ((!ebx) && one_bit(ecx, 0)); + state_component_user = !one_bit(ecx, 0); + state_component_aligned = one_bit(ecx, 1); + printf("%8s: size: %d user: %d supervisor: %d aligned: %d\n", + name, + state_component_size, state_component_user, + state_component_supervisor, state_component_aligned); + +} + +/* Intel-defined CPU features, CPUID level 0x00000001 (ecx) */ +#define XSAVE_FEATURE_BIT (26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ +#define OSXSAVE_FEATURE_BIT (27) /* XSAVE enabled in the OS */ + +bool check_mpx_support(void) +{ + unsigned int eax, ebx, ecx, edx; + + cpuid_count(1, 0, &eax, &ebx, &ecx, &edx); + + /* We can't do much without XSAVE, so just make these assert()'s */ + if (!one_bit(ecx, XSAVE_FEATURE_BIT)) { + fprintf(stderr, "processor lacks XSAVE, can not run MPX tests\n"); + exit(0); + } + + if (!one_bit(ecx, OSXSAVE_FEATURE_BIT)) { + fprintf(stderr, "processor lacks OSXSAVE, can not run MPX tests\n"); + exit(0); + } + + /* CPUs not supporting the XSTATE CPUID leaf do not support MPX */ + /* Is this redundant with the feature bit checks? */ + cpuid_count(0, 0, &eax, &ebx, &ecx, &edx); + if (eax < XSTATE_CPUID) { + fprintf(stderr, "processor lacks XSTATE CPUID leaf," + " can not run MPX tests\n"); + exit(0); + } + + printf("XSAVE is supported by HW & OS\n"); + + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + + printf("XSAVE processor supported state mask: 0x%x\n", eax); + printf("XSAVE OS supported state mask: 0x%jx\n", xgetbv(0)); + + /* Make sure that the MPX states are enabled in in XCR0 */ + if ((eax & MPX_XSTATES) != MPX_XSTATES) { + fprintf(stderr, "processor lacks MPX XSTATE(s), can not run MPX tests\n"); + exit(0); + } + + /* Make sure the MPX states are supported by XSAVE* */ + if ((xgetbv(0) & MPX_XSTATES) != MPX_XSTATES) { + fprintf(stderr, "MPX XSTATE(s) no enabled in XCR0, " + "can not run MPX tests\n"); + exit(0); + } + + print_state_component(XSTATE_BIT_BNDREGS, "BNDREGS"); + print_state_component(XSTATE_BIT_BNDCSR, "BNDCSR"); + + return true; +} + +void enable_mpx(void *l1base) +{ + /* enable point lookup */ + memset(buffer, 0, sizeof(buffer)); + xrstor_state(xsave_buf, 0x18); + + xsave_buf->xsave_hdr.xstate_bv = 0x10; + xsave_buf->bndcsr.cfg_reg_u = (unsigned long)l1base | 1; + xsave_buf->bndcsr.status_reg = 0; + + dprintf2("bf xrstor\n"); + dprintf2("xsave cndcsr: status %jx, configu %jx\n", + xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); + xrstor_state(xsave_buf, 0x18); + dprintf2("after xrstor\n"); + + xsave_state_1(xsave_buf, 0x18); + + dprintf1("xsave bndcsr: status %jx, configu %jx\n", + xsave_buf->bndcsr.status_reg, xsave_buf->bndcsr.cfg_reg_u); +} + +#include <sys/prctl.h> + +struct mpx_bounds_dir *bounds_dir_ptr; + +unsigned long __bd_incore(const char *func, int line) +{ + unsigned long ret = nr_incore(bounds_dir_ptr, MPX_BOUNDS_DIR_SIZE_BYTES); + return ret; +} +#define bd_incore() __bd_incore(__func__, __LINE__) + +void check_clear(void *ptr, unsigned long sz) +{ + unsigned long *i; + + for (i = ptr; (void *)i < ptr + sz; i++) { + if (*i) { + dprintf1("%p is NOT clear at %p\n", ptr, i); + assert(0); + } + } + dprintf1("%p is clear for %lx\n", ptr, sz); +} + +void check_clear_bd(void) +{ + check_clear(bounds_dir_ptr, 2UL << 30); +} + +#define USE_MALLOC_FOR_BOUNDS_DIR 1 +bool process_specific_init(void) +{ + unsigned long size; + unsigned long *dir; + /* Guarantee we have the space to align it, add padding: */ + unsigned long pad = getpagesize(); + + size = 2UL << 30; /* 2GB */ + if (sizeof(unsigned long) == 4) + size = 4UL << 20; /* 4MB */ + dprintf1("trying to allocate %ld MB bounds directory\n", (size >> 20)); + + if (USE_MALLOC_FOR_BOUNDS_DIR) { + unsigned long _dir; + + dir = malloc(size + pad); + assert(dir); + _dir = (unsigned long)dir; + _dir += 0xfffUL; + _dir &= ~0xfffUL; + dir = (void *)_dir; + } else { + /* + * This makes debugging easier because the address + * calculations are simpler: + */ + dir = mmap((void *)0x200000000000, size + pad, + PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (dir == (void *)-1) { + perror("unable to allocate bounds directory"); + abort(); + } + check_clear(dir, size); + } + bounds_dir_ptr = (void *)dir; + madvise(bounds_dir_ptr, size, MADV_NOHUGEPAGE); + bd_incore(); + dprintf1("bounds directory: 0x%p -> 0x%p\n", bounds_dir_ptr, + (char *)bounds_dir_ptr + size); + check_clear(dir, size); + enable_mpx(dir); + check_clear(dir, size); + if (prctl(43, 0, 0, 0, 0)) { + printf("no MPX support\n"); + abort(); + return false; + } + return true; +} + +bool process_specific_finish(void) +{ + if (prctl(44)) { + printf("no MPX support\n"); + return false; + } + return true; +} + +void setup_handler() +{ + int r, rs; + struct sigaction newact; + struct sigaction oldact; + + /* #BR is mapped to sigsegv */ + int signum = SIGSEGV; + + newact.sa_handler = 0; /* void(*)(int)*/ + newact.sa_sigaction = handler; /* void (*)(int, siginfo_t*, void *) */ + + /*sigset_t - signals to block while in the handler */ + /* get the old signal mask. */ + rs = sigprocmask(SIG_SETMASK, 0, &newact.sa_mask); + assert(rs == 0); + + /* call sa_sigaction, not sa_handler*/ + newact.sa_flags = SA_SIGINFO; + + newact.sa_restorer = 0; /* void(*)(), obsolete */ + r = sigaction(signum, &newact, &oldact); + assert(r == 0); +} + +void mpx_prepare(void) +{ + dprintf2("%s()\n", __func__); + setup_handler(); + process_specific_init(); +} + +void mpx_cleanup(void) +{ + printf("%s(): %jd BRs. bye...\n", __func__, num_bnd_chk); + process_specific_finish(); +} + +/*-------------- the following is test case ---------------*/ +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <time.h> + +uint64_t num_lower_brs; +uint64_t num_upper_brs; + +#define MPX_CONFIG_OFFSET 1024 +#define MPX_BOUNDS_OFFSET 960 +#define MPX_HEADER_OFFSET 512 +#define MAX_ADDR_TESTED (1<<28) +#define TEST_ROUNDS 100 + +/* + 0F 1A /r BNDLDX-Load + 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation + 66 0F 1A /r BNDMOV bnd1, bnd2/m128 + 66 0F 1B /r BNDMOV bnd1/m128, bnd2 + F2 0F 1A /r BNDCU bnd, r/m64 + F2 0F 1B /r BNDCN bnd, r/m64 + F3 0F 1A /r BNDCL bnd, r/m64 + F3 0F 1B /r BNDMK bnd, m64 +*/ + +static __always_inline void xsave_state(void *_fx, uint64_t mask) +{ + uint32_t lmask = mask; + uint32_t hmask = mask >> 32; + unsigned char *fx = _fx; + + asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x27\n\t" + : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask) + : "memory"); +} + +static __always_inline void mpx_clear_bnd0(void) +{ + long size = 0; + void *ptr = NULL; + /* F3 0F 1B /r BNDMK bnd, m64 */ + /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr), "d" (size-1) + : "memory"); +} + +static __always_inline void mpx_make_bound_helper(unsigned long ptr, + unsigned long size) +{ + /* F3 0F 1B /r BNDMK bnd, m64 */ + /* f3 0f 1b 04 11 bndmk (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr), "d" (size-1) + : "memory"); +} + +static __always_inline void mpx_check_lowerbound_helper(unsigned long ptr) +{ + /* F3 0F 1A /r NDCL bnd, r/m64 */ + /* f3 0f 1a 01 bndcl (%rcx),%bnd0 */ + asm volatile(".byte 0xf3,0x0f,0x1a,0x01\n\t" + : : "c" (ptr) + : "memory"); +} + +static __always_inline void mpx_check_upperbound_helper(unsigned long ptr) +{ + /* F2 0F 1A /r BNDCU bnd, r/m64 */ + /* f2 0f 1a 01 bndcu (%rcx),%bnd0 */ + asm volatile(".byte 0xf2,0x0f,0x1a,0x01\n\t" + : : "c" (ptr) + : "memory"); +} + +static __always_inline void mpx_movbndreg_helper() +{ + /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ + /* 66 0f 1b c2 bndmov %bnd0,%bnd2 */ + + asm volatile(".byte 0x66,0x0f,0x1b,0xc2\n\t"); +} + +static __always_inline void mpx_movbnd2mem_helper(uint8_t *mem) +{ + /* 66 0F 1B /r BNDMOV bnd1/m128, bnd2 */ + /* 66 0f 1b 01 bndmov %bnd0,(%rcx) */ + asm volatile(".byte 0x66,0x0f,0x1b,0x01\n\t" + : : "c" (mem) + : "memory"); +} + +static __always_inline void mpx_movbnd_from_mem_helper(uint8_t *mem) +{ + /* 66 0F 1A /r BNDMOV bnd1, bnd2/m128 */ + /* 66 0f 1a 01 bndmov (%rcx),%bnd0 */ + asm volatile(".byte 0x66,0x0f,0x1a,0x01\n\t" + : : "c" (mem) + : "memory"); +} + +static __always_inline void mpx_store_dsc_helper(unsigned long ptr_addr, + unsigned long ptr_val) +{ + /* 0F 1B /r BNDSTX-Store Extended Bounds Using Address Translation */ + /* 0f 1b 04 11 bndstx %bnd0,(%rcx,%rdx,1) */ + asm volatile(".byte 0x0f,0x1b,0x04,0x11\n\t" + : : "c" (ptr_addr), "d" (ptr_val) + : "memory"); +} + +static __always_inline void mpx_load_dsc_helper(unsigned long ptr_addr, + unsigned long ptr_val) +{ + /* 0F 1A /r BNDLDX-Load */ + /*/ 0f 1a 04 11 bndldx (%rcx,%rdx,1),%bnd0 */ + asm volatile(".byte 0x0f,0x1a,0x04,0x11\n\t" + : : "c" (ptr_addr), "d" (ptr_val) + : "memory"); +} + +void __print_context(void *__print_xsave_buffer, int line) +{ + uint64_t *bounds = (uint64_t *)(__print_xsave_buffer + MPX_BOUNDS_OFFSET); + uint64_t *cfg = (uint64_t *)(__print_xsave_buffer + MPX_CONFIG_OFFSET); + + int i; + eprintf("%s()::%d\n", "print_context", line); + for (i = 0; i < 4; i++) { + eprintf("bound[%d]: 0x%016lx 0x%016lx(0x%016lx)\n", i, + (unsigned long)bounds[i*2], + ~(unsigned long)bounds[i*2+1], + (unsigned long)bounds[i*2+1]); + } + + eprintf("cpcfg: %jx cpstatus: %jx\n", cfg[0], cfg[1]); +} +#define print_context(x) __print_context(x, __LINE__) +#ifdef DEBUG +#define dprint_context(x) print_context(x) +#else +#define dprint_context(x) do{}while(0) +#endif + +void init() +{ + int i; + + srand((unsigned int)time(NULL)); + + for (i = 0; i < 4; i++) { + shadow_plb[i][0] = 0; + shadow_plb[i][1] = ~(unsigned long)0; + } +} + +long int __mpx_random(int line) +{ +#ifdef NOT_SO_RANDOM + static long fake = 722122311; + fake += 563792075; + return fakse; +#else + return random(); +#endif +} +#define mpx_random() __mpx_random(__LINE__) + +uint8_t *get_random_addr() +{ + uint8_t*addr = (uint8_t *)(unsigned long)(rand() % MAX_ADDR_TESTED); + return (addr - (unsigned long)addr % sizeof(uint8_t *)); +} + +static inline bool compare_context(void *__xsave_buffer) +{ + uint64_t *bounds = (uint64_t *)(__xsave_buffer + MPX_BOUNDS_OFFSET); + + int i; + for (i = 0; i < 4; i++) { + dprintf3("shadow[%d]{%016lx/%016lx}\nbounds[%d]{%016lx/%016lx}\n", + i, (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], + i, (unsigned long)bounds[i*2], ~(unsigned long)bounds[i*2+1]); + if ((shadow_plb[i][0] != bounds[i*2]) || + (shadow_plb[i][1] != ~(unsigned long)bounds[i*2+1])) { + eprintf("ERROR comparing shadow to real bound register %d\n", i); + eprintf("shadow{0x%016lx/0x%016lx}\nbounds{0x%016lx/0x%016lx}\n", + (unsigned long)shadow_plb[i][0], (unsigned long)shadow_plb[i][1], + (unsigned long)bounds[i*2], (unsigned long)bounds[i*2+1]); + return false; + } + } + + return true; +} + +void mkbnd_shadow(uint8_t *ptr, int index, long offset) +{ + uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); + uint64_t *upper = (uint64_t *)&(shadow_plb[index][1]); + *lower = (unsigned long)ptr; + *upper = (unsigned long)ptr + offset - 1; +} + +void check_lowerbound_shadow(uint8_t *ptr, int index) +{ + uint64_t *lower = (uint64_t *)&(shadow_plb[index][0]); + if (*lower > (uint64_t)(unsigned long)ptr) + num_lower_brs++; + else + dprintf1("LowerBoundChk passed:%p\n", ptr); +} + +void check_upperbound_shadow(uint8_t *ptr, int index) +{ + uint64_t upper = *(uint64_t *)&(shadow_plb[index][1]); + if (upper < (uint64_t)(unsigned long)ptr) + num_upper_brs++; + else + dprintf1("UpperBoundChk passed:%p\n", ptr); +} + +__always_inline void movbndreg_shadow(int src, int dest) +{ + shadow_plb[dest][0] = shadow_plb[src][0]; + shadow_plb[dest][1] = shadow_plb[src][1]; +} + +__always_inline void movbnd2mem_shadow(int src, unsigned long *dest) +{ + unsigned long *lower = (unsigned long *)&(shadow_plb[src][0]); + unsigned long *upper = (unsigned long *)&(shadow_plb[src][1]); + *dest = *lower; + *(dest+1) = *upper; +} + +__always_inline void movbnd_from_mem_shadow(unsigned long *src, int dest) +{ + unsigned long *lower = (unsigned long *)&(shadow_plb[dest][0]); + unsigned long *upper = (unsigned long *)&(shadow_plb[dest][1]); + *lower = *src; + *upper = *(src+1); +} + +__always_inline void stdsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) +{ + shadow_map[0] = (unsigned long)shadow_plb[index][0]; + shadow_map[1] = (unsigned long)shadow_plb[index][1]; + shadow_map[2] = (unsigned long)ptr_val; + dprintf3("%s(%d, %p, %p) set shadow map[2]: %p\n", __func__, + index, ptr, ptr_val, ptr_val); + /*ptr ignored */ +} + +void lddsc_shadow(int index, uint8_t *ptr, uint8_t *ptr_val) +{ + uint64_t lower = shadow_map[0]; + uint64_t upper = shadow_map[1]; + uint8_t *value = (uint8_t *)shadow_map[2]; + + if (value != ptr_val) { + dprintf2("%s(%d, %p, %p) init shadow bounds[%d] " + "because %p != %p\n", __func__, index, ptr, + ptr_val, index, value, ptr_val); + shadow_plb[index][0] = 0; + shadow_plb[index][1] = ~(unsigned long)0; + } else { + shadow_plb[index][0] = lower; + shadow_plb[index][1] = upper; + } + /* ptr ignored */ +} + +static __always_inline void mpx_test_helper0(uint8_t *buf, uint8_t *ptr) +{ + mpx_make_bound_helper((unsigned long)ptr, 0x1800); +} + +static __always_inline void mpx_test_helper0_shadow(uint8_t *buf, uint8_t *ptr) +{ + mkbnd_shadow(ptr, 0, 0x1800); +} + +static __always_inline void mpx_test_helper1(uint8_t *buf, uint8_t *ptr) +{ + /* these are hard-coded to check bnd0 */ + expected_bnd_index = 0; + mpx_check_lowerbound_helper((unsigned long)(ptr-1)); + mpx_check_upperbound_helper((unsigned long)(ptr+0x1800)); + /* reset this since we do not expect any more bounds exceptions */ + expected_bnd_index = -1; +} + +static __always_inline void mpx_test_helper1_shadow(uint8_t *buf, uint8_t *ptr) +{ + check_lowerbound_shadow(ptr-1, 0); + check_upperbound_shadow(ptr+0x1800, 0); +} + +static __always_inline void mpx_test_helper2(uint8_t *buf, uint8_t *ptr) +{ + mpx_make_bound_helper((unsigned long)ptr, 0x1800); + mpx_movbndreg_helper(); + mpx_movbnd2mem_helper(buf); + mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); +} + +static __always_inline void mpx_test_helper2_shadow(uint8_t *buf, uint8_t *ptr) +{ + mkbnd_shadow(ptr, 0, 0x1800); + movbndreg_shadow(0, 2); + movbnd2mem_shadow(0, (unsigned long *)buf); + mkbnd_shadow(ptr+0x12, 0, 0x1800); +} + +static __always_inline void mpx_test_helper3(uint8_t *buf, uint8_t *ptr) +{ + mpx_movbnd_from_mem_helper(buf); +} + +static __always_inline void mpx_test_helper3_shadow(uint8_t *buf, uint8_t *ptr) +{ + movbnd_from_mem_shadow((unsigned long *)buf, 0); +} + +static __always_inline void mpx_test_helper4(uint8_t *buf, uint8_t *ptr) +{ + mpx_store_dsc_helper((unsigned long)buf, (unsigned long)ptr); + mpx_make_bound_helper((unsigned long)(ptr+0x12), 0x1800); +} + +static __always_inline void mpx_test_helper4_shadow(uint8_t *buf, uint8_t *ptr) +{ + stdsc_shadow(0, buf, ptr); + mkbnd_shadow(ptr+0x12, 0, 0x1800); +} + +static __always_inline void mpx_test_helper5(uint8_t *buf, uint8_t *ptr) +{ + mpx_load_dsc_helper((unsigned long)buf, (unsigned long)ptr); +} + +static __always_inline void mpx_test_helper5_shadow(uint8_t *buf, uint8_t *ptr) +{ + lddsc_shadow(0, buf, ptr); +} + +#define NR_MPX_TEST_FUNCTIONS 6 + +/* + * For compatibility reasons, MPX will clear the bounds registers + * when you make function calls (among other things). We have to + * preserve the registers in between calls to the "helpers" since + * they build on each other. + * + * Be very careful not to make any function calls inside the + * helpers, or anywhere else beween the xrstor and xsave. + */ +#define run_helper(helper_nr, buf, buf_shadow, ptr) do { \ + xrstor_state(xsave_test_buf, flags); \ + mpx_test_helper##helper_nr(buf, ptr); \ + xsave_state(xsave_test_buf, flags); \ + mpx_test_helper##helper_nr##_shadow(buf_shadow, ptr); \ +} while (0) + +static void run_helpers(int nr, uint8_t *buf, uint8_t *buf_shadow, uint8_t *ptr) +{ + uint64_t flags = 0x18; + + dprint_context(xsave_test_buf); + switch (nr) { + case 0: + run_helper(0, buf, buf_shadow, ptr); + break; + case 1: + run_helper(1, buf, buf_shadow, ptr); + break; + case 2: + run_helper(2, buf, buf_shadow, ptr); + break; + case 3: + run_helper(3, buf, buf_shadow, ptr); + break; + case 4: + run_helper(4, buf, buf_shadow, ptr); + break; + case 5: + run_helper(5, buf, buf_shadow, ptr); + break; + default: + test_failed(); + break; + } + dprint_context(xsave_test_buf); +} + +unsigned long buf_shadow[1024]; /* used to check load / store descriptors */ +extern long inspect_me(struct mpx_bounds_dir *bounds_dir); + +long cover_buf_with_bt_entries(void *buf, long buf_len) +{ + int i; + long nr_to_fill; + int ratio = 1000; + unsigned long buf_len_in_ptrs; + + /* Fill about 1/100 of the space with bt entries */ + nr_to_fill = buf_len / (sizeof(unsigned long) * ratio); + + if (!nr_to_fill) + dprintf3("%s() nr_to_fill: %ld\n", __func__, nr_to_fill); + + /* Align the buffer to pointer size */ + while (((unsigned long)buf) % sizeof(void *)) { + buf++; + buf_len--; + } + /* We are storing pointers, so make */ + buf_len_in_ptrs = buf_len / sizeof(void *); + + for (i = 0; i < nr_to_fill; i++) { + long index = (mpx_random() % buf_len_in_ptrs); + void *ptr = buf + index * sizeof(unsigned long); + unsigned long ptr_addr = (unsigned long)ptr; + + /* ptr and size can be anything */ + mpx_make_bound_helper((unsigned long)ptr, 8); + + /* + * take bnd0 and put it in to bounds tables "buf + index" is an + * address inside the buffer where we are pretending that we + * are going to put a pointer We do not, though because we will + * never load entries from the table, so it doesn't matter. + */ + mpx_store_dsc_helper(ptr_addr, (unsigned long)ptr); + dprintf4("storing bound table entry for %lx (buf start @ %p)\n", + ptr_addr, buf); + } + return nr_to_fill; +} + +unsigned long align_down(unsigned long alignme, unsigned long align_to) +{ + return alignme & ~(align_to-1); +} + +unsigned long align_up(unsigned long alignme, unsigned long align_to) +{ + return (alignme + align_to - 1) & ~(align_to-1); +} + +/* + * Using 1MB alignment guarantees that each no allocation + * will overlap with another's bounds tables. + * + * We have to cook our own allocator here. malloc() can + * mix other allocation with ours which means that even + * if we free all of our allocations, there might still + * be bounds tables for the *areas* since there is other + * valid memory there. + * + * We also can't use malloc() because a free() of an area + * might not free it back to the kernel. We want it + * completely unmapped an malloc() does not guarantee + * that. + */ +#ifdef __i386__ +long alignment = 4096; +long sz_alignment = 4096; +#else +long alignment = 1 * MB; +long sz_alignment = 1 * MB; +#endif +void *mpx_mini_alloc(unsigned long sz) +{ + unsigned long long tries = 0; + static void *last; + void *ptr; + void *try_at; + + sz = align_up(sz, sz_alignment); + + try_at = last + alignment; + while (1) { + ptr = mmap(try_at, sz, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (ptr == (void *)-1) + return NULL; + if (ptr == try_at) + break; + + munmap(ptr, sz); + try_at += alignment; +#ifdef __i386__ + /* + * This isn't quite correct for 32-bit binaries + * on 64-bit kernels since they can use the + * entire 32-bit address space, but it's close + * enough. + */ + if (try_at > (void *)0xC0000000) +#else + if (try_at > (void *)0x0000800000000000) +#endif + try_at = (void *)0x0; + if (!(++tries % 10000)) + dprintf1("stuck in %s(), tries: %lld\n", __func__, tries); + continue; + } + last = ptr; + dprintf3("mpx_mini_alloc(0x%lx) returning: %p\n", sz, ptr); + return ptr; +} +void mpx_mini_free(void *ptr, long sz) +{ + dprintf2("%s() ptr: %p\n", __func__, ptr); + if ((unsigned long)ptr > 0x100000000000) { + dprintf1("uh oh !!!!!!!!!!!!!!! pointer too high: %p\n", ptr); + test_failed(); + } + sz = align_up(sz, sz_alignment); + dprintf3("%s() ptr: %p before munmap\n", __func__, ptr); + munmap(ptr, sz); + dprintf3("%s() ptr: %p DONE\n", __func__, ptr); +} + +#define NR_MALLOCS 100 +struct one_malloc { + char *ptr; + int nr_filled_btes; + unsigned long size; +}; +struct one_malloc mallocs[NR_MALLOCS]; + +void free_one_malloc(int index) +{ + unsigned long free_ptr; + unsigned long mask; + + if (!mallocs[index].ptr) + return; + + mpx_mini_free(mallocs[index].ptr, mallocs[index].size); + dprintf4("freed[%d]: %p\n", index, mallocs[index].ptr); + + free_ptr = (unsigned long)mallocs[index].ptr; + mask = alignment-1; + dprintf4("lowerbits: %lx / %lx mask: %lx\n", free_ptr, + (free_ptr & mask), mask); + assert((free_ptr & mask) == 0); + + mallocs[index].ptr = NULL; +} + +#ifdef __i386__ +#define MPX_BOUNDS_TABLE_COVERS 4096 +#else +#define MPX_BOUNDS_TABLE_COVERS (1 * MB) +#endif +void zap_everything(void) +{ + long after_zap; + long before_zap; + int i; + + before_zap = inspect_me(bounds_dir_ptr); + dprintf1("zapping everything start: %ld\n", before_zap); + for (i = 0; i < NR_MALLOCS; i++) + free_one_malloc(i); + + after_zap = inspect_me(bounds_dir_ptr); + dprintf1("zapping everything done: %ld\n", after_zap); + /* + * We only guarantee to empty the thing out if our allocations are + * exactly aligned on the boundaries of a boudns table. + */ + if ((alignment >= MPX_BOUNDS_TABLE_COVERS) && + (sz_alignment >= MPX_BOUNDS_TABLE_COVERS)) { + if (after_zap != 0) + test_failed(); + + assert(after_zap == 0); + } +} + +void do_one_malloc(void) +{ + static int malloc_counter; + long sz; + int rand_index = (mpx_random() % NR_MALLOCS); + void *ptr = mallocs[rand_index].ptr; + + dprintf3("%s() enter\n", __func__); + + if (ptr) { + dprintf3("freeing one malloc at index: %d\n", rand_index); + free_one_malloc(rand_index); + if (mpx_random() % (NR_MALLOCS*3) == 3) { + int i; + dprintf3("zapping some more\n"); + for (i = rand_index; i < NR_MALLOCS; i++) + free_one_malloc(i); + } + if ((mpx_random() % zap_all_every_this_many_mallocs) == 4) + zap_everything(); + } + + /* 1->~1M */ + sz = (1 + mpx_random() % 1000) * 1000; + ptr = mpx_mini_alloc(sz); + if (!ptr) { + /* + * If we are failing allocations, just assume we + * are out of memory and zap everything. + */ + dprintf3("zapping everything because out of memory\n"); + zap_everything(); + goto out; + } + + dprintf3("malloc: %p size: 0x%lx\n", ptr, sz); + mallocs[rand_index].nr_filled_btes = cover_buf_with_bt_entries(ptr, sz); + mallocs[rand_index].ptr = ptr; + mallocs[rand_index].size = sz; +out: + if ((++malloc_counter) % inspect_every_this_many_mallocs == 0) + inspect_me(bounds_dir_ptr); +} + +void run_timed_test(void (*test_func)(void)) +{ + int done = 0; + long iteration = 0; + static time_t last_print; + time_t now; + time_t start; + + time(&start); + while (!done) { + time(&now); + if ((now - start) > TEST_DURATION_SECS) + done = 1; + + test_func(); + iteration++; + + if ((now - last_print > 1) || done) { + printf("iteration %ld complete, OK so far\n", iteration); + last_print = now; + } + } +} + +void check_bounds_table_frees(void) +{ + printf("executing unmaptest\n"); + inspect_me(bounds_dir_ptr); + run_timed_test(&do_one_malloc); + printf("done with malloc() fun\n"); +} + +void insn_test_failed(int test_nr, int test_round, void *buf, + void *buf_shadow, void *ptr) +{ + print_context(xsave_test_buf); + eprintf("ERROR: test %d round %d failed\n", test_nr, test_round); + while (test_nr == 5) { + struct mpx_bt_entry *bte; + struct mpx_bounds_dir *bd = (void *)bounds_dir_ptr; + struct mpx_bd_entry *bde = mpx_vaddr_to_bd_entry(buf, bd); + + printf(" bd: %p\n", bd); + printf("&bde: %p\n", bde); + printf("*bde: %lx\n", *(unsigned long *)bde); + if (!bd_entry_valid(bde)) + break; + + bte = mpx_vaddr_to_bt_entry(buf, bd); + printf(" te: %p\n", bte); + printf("bte[0]: %lx\n", bte->contents[0]); + printf("bte[1]: %lx\n", bte->contents[1]); + printf("bte[2]: %lx\n", bte->contents[2]); + printf("bte[3]: %lx\n", bte->contents[3]); + break; + } + test_failed(); +} + +void check_mpx_insns_and_tables(void) +{ + int successes = 0; + int failures = 0; + int buf_size = (1024*1024); + unsigned long *buf = malloc(buf_size); + const int total_nr_tests = NR_MPX_TEST_FUNCTIONS * TEST_ROUNDS; + int i, j; + + memset(buf, 0, buf_size); + memset(buf_shadow, 0, sizeof(buf_shadow)); + + for (i = 0; i < TEST_ROUNDS; i++) { + uint8_t *ptr = get_random_addr() + 8; + + for (j = 0; j < NR_MPX_TEST_FUNCTIONS; j++) { + if (0 && j != 5) { + successes++; + continue; + } + dprintf2("starting test %d round %d\n", j, i); + dprint_context(xsave_test_buf); + /* + * test5 loads an address from the bounds tables. + * The load will only complete if 'ptr' matches + * the load and the store, so with random addrs, + * the odds of this are very small. Make it + * higher by only moving 'ptr' 1/10 times. + */ + if (random() % 10 <= 0) + ptr = get_random_addr() + 8; + dprintf3("random ptr{%p}\n", ptr); + dprint_context(xsave_test_buf); + run_helpers(j, (void *)buf, (void *)buf_shadow, ptr); + dprint_context(xsave_test_buf); + if (!compare_context(xsave_test_buf)) { + insn_test_failed(j, i, buf, buf_shadow, ptr); + failures++; + goto exit; + } + successes++; + dprint_context(xsave_test_buf); + dprintf2("finished test %d round %d\n", j, i); + dprintf3("\n"); + dprint_context(xsave_test_buf); + } + } + +exit: + dprintf2("\nabout to free:\n"); + free(buf); + dprintf1("successes: %d\n", successes); + dprintf1(" failures: %d\n", failures); + dprintf1(" tests: %d\n", total_nr_tests); + dprintf1(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); + dprintf1(" saw: %d #BRs\n", br_count); + if (failures) { + eprintf("ERROR: non-zero number of failures\n"); + exit(20); + } + if (successes != total_nr_tests) { + eprintf("ERROR: succeded fewer than number of tries (%d != %d)\n", + successes, total_nr_tests); + exit(21); + } + if (num_upper_brs + num_lower_brs != br_count) { + eprintf("ERROR: unexpected number of #BRs: %jd %jd %d\n", + num_upper_brs, num_lower_brs, br_count); + eprintf("successes: %d\n", successes); + eprintf(" failures: %d\n", failures); + eprintf(" tests: %d\n", total_nr_tests); + eprintf(" expected: %jd #BRs\n", num_upper_brs + num_lower_brs); + eprintf(" saw: %d #BRs\n", br_count); + exit(22); + } +} + +/* + * This is supposed to SIGSEGV nicely once the kernel + * can no longer allocate vaddr space. + */ +void exhaust_vaddr_space(void) +{ + unsigned long ptr; + /* Try to make sure there is no room for a bounds table anywhere */ + unsigned long skip = MPX_BOUNDS_TABLE_SIZE_BYTES - PAGE_SIZE; +#ifdef __i386__ + unsigned long max_vaddr = 0xf7788000UL; +#else + unsigned long max_vaddr = 0x800000000000UL; +#endif + + dprintf1("%s() start\n", __func__); + /* do not start at 0, we aren't allowed to map there */ + for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { + void *ptr_ret; + int ret = madvise((void *)ptr, PAGE_SIZE, MADV_NORMAL); + + if (!ret) { + dprintf1("madvise() %lx ret: %d\n", ptr, ret); + continue; + } + ptr_ret = mmap((void *)ptr, PAGE_SIZE, PROT_READ|PROT_WRITE, + MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if (ptr_ret != (void *)ptr) { + perror("mmap"); + dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); + break; + } + if (!(ptr & 0xffffff)) + dprintf1("mmap(%lx) ret: %p\n", ptr, ptr_ret); + } + for (ptr = PAGE_SIZE; ptr < max_vaddr; ptr += skip) { + dprintf2("covering 0x%lx with bounds table entries\n", ptr); + cover_buf_with_bt_entries((void *)ptr, PAGE_SIZE); + } + dprintf1("%s() end\n", __func__); + printf("done with vaddr space fun\n"); +} + +void mpx_table_test(void) +{ + printf("starting mpx bounds table test\n"); + run_timed_test(check_mpx_insns_and_tables); + printf("done with mpx bounds table test\n"); +} + +int main(int argc, char **argv) +{ + int unmaptest = 0; + int vaddrexhaust = 0; + int tabletest = 0; + int i; + + check_mpx_support(); + mpx_prepare(); + srandom(11179); + + bd_incore(); + init(); + bd_incore(); + + trace_me(); + + xsave_state((void *)xsave_test_buf, 0x1f); + if (!compare_context(xsave_test_buf)) + printf("Init failed\n"); + + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "unmaptest")) + unmaptest = 1; + if (!strcmp(argv[i], "vaddrexhaust")) + vaddrexhaust = 1; + if (!strcmp(argv[i], "tabletest")) + tabletest = 1; + } + if (!(unmaptest || vaddrexhaust || tabletest)) { + unmaptest = 1; + /* vaddrexhaust = 1; */ + tabletest = 1; + } + if (unmaptest) + check_bounds_table_frees(); + if (tabletest) + mpx_table_test(); + if (vaddrexhaust) + exhaust_vaddr_space(); + printf("%s completed successfully\n", argv[0]); + exit(0); +} + +#include "mpx-dig.c" diff --git a/tools/testing/selftests/x86/mpx-mm.h b/tools/testing/selftests/x86/mpx-mm.h new file mode 100644 index 0000000..af706a5 --- /dev/null +++ b/tools/testing/selftests/x86/mpx-mm.h @@ -0,0 +1,9 @@ +#ifndef _MPX_MM_H +#define _MPX_MM_H + +#define PAGE_SIZE 4096 +#define MB (1UL<<20) + +extern long nr_incore(void *ptr, unsigned long size_bytes); + +#endif /* _MPX_MM_H */ |