From 8bfcb3960fde049b863266dab8c3617bb5a541aa Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Mon, 18 Aug 2008 12:33:20 +0200 Subject: x86: make movsl_mask definition non-CPU specific movsl_mask is currently defined in arch/x86/kernel/cpu/intel.c, which contains code specific to Intel CPUs. However, movsl_mask is used in the non-CPU specific code in arch/x86/lib/usercopy_32.c, which breaks the compilation when support for Intel CPUs is compiled out. This patch solves this problem by moving movsl_mask's definition close to its users in arch/x86/lib/usercopy_32.c. Signed-off-by: Thomas Petazzoni Cc: michael@free-electrons.com Signed-off-by: Ingo Molnar --- arch/x86/lib/usercopy_32.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c index 24e6094..9e68075 100644 --- a/arch/x86/lib/usercopy_32.c +++ b/arch/x86/lib/usercopy_32.c @@ -14,6 +14,13 @@ #include #include +#ifdef CONFIG_X86_INTEL_USERCOPY +/* + * Alignment at which movsl is preferred for bulk memory copies. + */ +struct movsl_mask movsl_mask __read_mostly; +#endif + static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) { #ifdef CONFIG_X86_INTEL_USERCOPY -- cgit v1.1 From fb481dd56adf3c5b0993b8f052cc9ba966e3959d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 4 Sep 2008 13:46:11 +0200 Subject: x86: drop -funroll-loops for csum_partial_64.c Impact: performance optimization I did some rebenchmarking with modern compilers and dropping -funroll-loops makes the function consistently go faster by a few percent. So drop that flag. Thanks to Richard Guenther for a hint. Signed-off-by: Andi Kleen Signed-off-by: H. Peter Anvin --- arch/x86/lib/Makefile | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86/lib') diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index aa3fa41..55e11aa 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y) lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o else obj-y += io_64.o iomap_copy_64.o - - CFLAGS_csum-partial_64.o := -funroll-loops - lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o -- cgit v1.1