From 7d11965ddb9b9b1e0a5d13c58345ada1ccbc663b Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 16 May 2013 17:20:32 +0200 Subject: lib/raid6: add ARM-NEON accelerated syndrome calculation Rebased/reworked a patch contributed by Rob Herring that uses NEON intrinsics to perform the RAID-6 syndrome calculations. It uses the existing unroll.awk code to generate several unrolled versions of which the best performing one is selected at boot time. Signed-off-by: Ard Biesheuvel Acked-by: Nicolas Pitre Cc: hpa@linux.intel.com --- lib/raid6/Makefile | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'lib/raid6/Makefile') diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 9f7c184..b462578 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o hostprogs-y += mktables @@ -16,6 +17,21 @@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec -mabi=altivec endif +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) +NEON_FLAGS := -ffreestanding +ifeq ($(ARCH),arm) +NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon +endif +ifeq ($(ARCH),arm64) +CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only +endif +endif + targets += int1.c $(obj)/int1.c: UNROLL := 1 $(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE @@ -70,6 +86,30 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +CFLAGS_neon1.o += $(NEON_FLAGS) +targets += neon1.c +$(obj)/neon1.c: UNROLL := 1 +$(obj)/neon1.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon2.o += $(NEON_FLAGS) +targets += neon2.c +$(obj)/neon2.c: UNROLL := 2 +$(obj)/neon2.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon4.o += $(NEON_FLAGS) +targets += neon4.c +$(obj)/neon4.c: UNROLL := 4 +$(obj)/neon4.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon8.o += $(NEON_FLAGS) +targets += neon8.c +$(obj)/neon8.c: UNROLL := 8 +$(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + quiet_cmd_mktable = TABLE $@ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) -- cgit v1.1