summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig2
-rw-r--r--lib/crc-t10dif.c73
-rw-r--r--lib/lz4/lz4_compress.c4
-rw-r--r--lib/lz4/lz4_decompress.c6
-rw-r--r--lib/lz4/lz4hc_compress.c4
-rw-r--r--lib/mpi/longlong.h17
-rw-r--r--lib/raid6/.gitignore1
-rw-r--r--lib/raid6/Makefile40
-rw-r--r--lib/raid6/algos.c6
-rw-r--r--lib/raid6/neon.c58
-rw-r--r--lib/raid6/neon.uc80
-rw-r--r--lib/raid6/test/Makefile26
12 files changed, 267 insertions, 50 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index 35da513..71d9f81 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -66,8 +66,6 @@ config CRC16
config CRC_T10DIF
tristate "CRC calculation for the T10 Data Integrity Field"
- select CRYPTO
- select CRYPTO_CRCT10DIF
help
This option is only needed if a module that's not in the
kernel tree needs to calculate CRC checks for use with the
diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c
index fe3428c..fbbd66e 100644
--- a/lib/crc-t10dif.c
+++ b/lib/crc-t10dif.c
@@ -11,44 +11,57 @@
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
-static struct crypto_shash *crct10dif_tfm;
+/* Table generated using the following polynomium:
+ * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
+ * gt: 0x8bb7
+ */
+static const __u16 t10_dif_crc_table[256] = {
+ 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
+ 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
+ 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
+ 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
+ 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
+ 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
+ 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
+ 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
+ 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
+ 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
+ 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
+ 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
+ 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
+ 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
+ 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
+ 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
+ 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
+ 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
+ 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
+ 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
+ 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
+ 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
+ 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
+ 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
+ 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
+ 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
+ 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
+ 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
+ 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
+ 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
+ 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
+ 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
+};
__u16 crc_t10dif(const unsigned char *buffer, size_t len)
{
- struct {
- struct shash_desc shash;
- char ctx[2];
- } desc;
- int err;
-
- desc.shash.tfm = crct10dif_tfm;
- desc.shash.flags = 0;
- *(__u16 *)desc.ctx = 0;
+ __u16 crc = 0;
+ unsigned int i;
- err = crypto_shash_update(&desc.shash, buffer, len);
- BUG_ON(err);
+ for (i = 0 ; i < len ; i++)
+ crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
- return *(__u16 *)desc.ctx;
+ return crc;
}
EXPORT_SYMBOL(crc_t10dif);
-static int __init crc_t10dif_mod_init(void)
-{
- crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
- return PTR_RET(crct10dif_tfm);
-}
-
-static void __exit crc_t10dif_mod_fini(void)
-{
- crypto_free_shash(crct10dif_tfm);
-}
-
-module_init(crc_t10dif_mod_init);
-module_exit(crc_t10dif_mod_fini);
-
MODULE_DESCRIPTION("T10 DIF CRC calculation");
MODULE_LICENSE("GPL");
diff --git a/lib/lz4/lz4_compress.c b/lib/lz4/lz4_compress.c
index fd94058..28321d8 100644
--- a/lib/lz4/lz4_compress.c
+++ b/lib/lz4/lz4_compress.c
@@ -437,7 +437,7 @@ int lz4_compress(const unsigned char *src, size_t src_len,
exit:
return ret;
}
-EXPORT_SYMBOL_GPL(lz4_compress);
+EXPORT_SYMBOL(lz4_compress);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4 compressor");
diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c
index d3414ea..411be80 100644
--- a/lib/lz4/lz4_decompress.c
+++ b/lib/lz4/lz4_decompress.c
@@ -299,7 +299,7 @@ exit_0:
return ret;
}
#ifndef STATIC
-EXPORT_SYMBOL_GPL(lz4_decompress);
+EXPORT_SYMBOL(lz4_decompress);
#endif
int lz4_decompress_unknownoutputsize(const char *src, size_t src_len,
@@ -319,8 +319,8 @@ exit_0:
return ret;
}
#ifndef STATIC
-EXPORT_SYMBOL_GPL(lz4_decompress_unknownoutputsize);
+EXPORT_SYMBOL(lz4_decompress_unknownoutputsize);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4 Decompressor");
#endif
diff --git a/lib/lz4/lz4hc_compress.c b/lib/lz4/lz4hc_compress.c
index eb1a74f..f344f76 100644
--- a/lib/lz4/lz4hc_compress.c
+++ b/lib/lz4/lz4hc_compress.c
@@ -533,7 +533,7 @@ int lz4hc_compress(const unsigned char *src, size_t src_len,
exit:
return ret;
}
-EXPORT_SYMBOL_GPL(lz4hc_compress);
+EXPORT_SYMBOL(lz4hc_compress);
-MODULE_LICENSE("GPL");
+MODULE_LICENSE("Dual BSD/GPL");
MODULE_DESCRIPTION("LZ4HC compressor");
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
index d411355..aac5114 100644
--- a/lib/mpi/longlong.h
+++ b/lib/mpi/longlong.h
@@ -151,15 +151,12 @@ do { \
#endif /* __a29k__ */
#if defined(__alpha) && W_TYPE_SIZE == 64
-#define umul_ppmm(ph, pl, m0, m1) \
-do { \
- UDItype __m0 = (m0), __m1 = (m1); \
- __asm__ ("umulh %r1,%2,%0" \
- : "=r" ((UDItype) ph) \
- : "%rJ" (__m0), \
- "rI" (__m1)); \
- (pl) = __m0 * __m1; \
- } while (0)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+ UDItype __m0 = (m0), __m1 = (m1); \
+ (ph) = __builtin_alpha_umulh(__m0, __m1); \
+ (pl) = __m0 * __m1; \
+} while (0)
#define UMUL_TIME 46
#ifndef LONGLONG_STANDALONE
#define udiv_qrnnd(q, r, n1, n0, d) \
@@ -167,7 +164,7 @@ do { UDItype __r; \
(q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
(r) = __r; \
} while (0)
-extern UDItype __udiv_qrnnd();
+extern UDItype __udiv_qrnnd(UDItype *, UDItype, UDItype, UDItype);
#define UDIV_TIME 220
#endif /* LONGLONG_STANDALONE */
#endif /* __alpha */
diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore
index 162beca..0a7e494 100644
--- a/lib/raid6/.gitignore
+++ b/lib/raid6/.gitignore
@@ -2,3 +2,4 @@ mktables
altivec*.c
int*.c
tables.c
+neon?.c
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 9f7c184..b462578 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \
raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o
raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
+raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o
hostprogs-y += mktables
@@ -16,6 +17,21 @@ ifeq ($(CONFIG_ALTIVEC),y)
altivec_flags := -maltivec -mabi=altivec
endif
+# The GCC option -ffreestanding is required in order to compile code containing
+# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel)
+ifeq ($(CONFIG_KERNEL_MODE_NEON),y)
+NEON_FLAGS := -ffreestanding
+ifeq ($(ARCH),arm)
+NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon
+endif
+ifeq ($(ARCH),arm64)
+CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only
+CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only
+CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only
+CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only
+endif
+endif
+
targets += int1.c
$(obj)/int1.c: UNROLL := 1
$(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE
@@ -70,6 +86,30 @@ $(obj)/altivec8.c: UNROLL := 8
$(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE
$(call if_changed,unroll)
+CFLAGS_neon1.o += $(NEON_FLAGS)
+targets += neon1.c
+$(obj)/neon1.c: UNROLL := 1
+$(obj)/neon1.c: $(src)/neon.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_neon2.o += $(NEON_FLAGS)
+targets += neon2.c
+$(obj)/neon2.c: UNROLL := 2
+$(obj)/neon2.c: $(src)/neon.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_neon4.o += $(NEON_FLAGS)
+targets += neon4.c
+$(obj)/neon4.c: UNROLL := 4
+$(obj)/neon4.c: $(src)/neon.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
+CFLAGS_neon8.o += $(NEON_FLAGS)
+targets += neon8.c
+$(obj)/neon8.c: UNROLL := 8
+$(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE
+ $(call if_changed,unroll)
+
quiet_cmd_mktable = TABLE $@
cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 )
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 6d7316f..74e6f56 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -70,6 +70,12 @@ const struct raid6_calls * const raid6_algos[] = {
&raid6_intx2,
&raid6_intx4,
&raid6_intx8,
+#ifdef CONFIG_KERNEL_MODE_NEON
+ &raid6_neonx1,
+ &raid6_neonx2,
+ &raid6_neonx4,
+ &raid6_neonx8,
+#endif
NULL
};
diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c
new file mode 100644
index 0000000..36ad470
--- /dev/null
+++ b/lib/raid6/neon.c
@@ -0,0 +1,58 @@
+/*
+ * linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics
+ *
+ * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/raid/pq.h>
+
+#ifdef __KERNEL__
+#include <asm/neon.h>
+#else
+#define kernel_neon_begin()
+#define kernel_neon_end()
+#define cpu_has_neon() (1)
+#endif
+
+/*
+ * There are 2 reasons these wrappers are kept in a separate compilation unit
+ * from the actual implementations in neonN.c (generated from neon.uc by
+ * unroll.awk):
+ * - the actual implementations use NEON intrinsics, and the GCC support header
+ * (arm_neon.h) is not fully compatible (type wise) with the kernel;
+ * - the neonN.c files are compiled with -mfpu=neon and optimization enabled,
+ * and we have to make sure that we never use *any* NEON/VFP instructions
+ * outside a kernel_neon_begin()/kernel_neon_end() pair.
+ */
+
+#define RAID6_NEON_WRAPPER(_n) \
+ static void raid6_neon ## _n ## _gen_syndrome(int disks, \
+ size_t bytes, void **ptrs) \
+ { \
+ void raid6_neon ## _n ## _gen_syndrome_real(int, \
+ unsigned long, void**); \
+ kernel_neon_begin(); \
+ raid6_neon ## _n ## _gen_syndrome_real(disks, \
+ (unsigned long)bytes, ptrs); \
+ kernel_neon_end(); \
+ } \
+ struct raid6_calls const raid6_neonx ## _n = { \
+ raid6_neon ## _n ## _gen_syndrome, \
+ raid6_have_neon, \
+ "neonx" #_n, \
+ 0 \
+ }
+
+static int raid6_have_neon(void)
+{
+ return cpu_has_neon();
+}
+
+RAID6_NEON_WRAPPER(1);
+RAID6_NEON_WRAPPER(2);
+RAID6_NEON_WRAPPER(4);
+RAID6_NEON_WRAPPER(8);
diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc
new file mode 100644
index 0000000..1b9ed79
--- /dev/null
+++ b/lib/raid6/neon.uc
@@ -0,0 +1,80 @@
+/* -----------------------------------------------------------------------
+ *
+ * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions
+ *
+ * Copyright (C) 2012 Rob Herring
+ *
+ * Based on altivec.uc:
+ * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, Inc., 53 Temple Place Ste 330,
+ * Boston MA 02111-1307, USA; either version 2 of the License, or
+ * (at your option) any later version; incorporated herein by reference.
+ *
+ * ----------------------------------------------------------------------- */
+
+/*
+ * neon$#.c
+ *
+ * $#-way unrolled NEON intrinsics math RAID-6 instruction set
+ *
+ * This file is postprocessed using unroll.awk
+ */
+
+#include <arm_neon.h>
+
+typedef uint8x16_t unative_t;
+
+#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x})
+#define NSIZE sizeof(unative_t)
+
+/*
+ * The SHLBYTE() operation shifts each byte left by 1, *not*
+ * rolling over into the next byte
+ */
+static inline unative_t SHLBYTE(unative_t v)
+{
+ return vshlq_n_u8(v, 1);
+}
+
+/*
+ * The MASK() operation returns 0xFF in any byte for which the high
+ * bit is 1, 0x00 for any byte for which the high bit is 0.
+ */
+static inline unative_t MASK(unative_t v)
+{
+ const uint8x16_t temp = NBYTES(0);
+ return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp);
+}
+
+void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs)
+{
+ uint8_t **dptr = (uint8_t **)ptrs;
+ uint8_t *p, *q;
+ int d, z, z0;
+
+ register unative_t wd$$, wq$$, wp$$, w1$$, w2$$;
+ const unative_t x1d = NBYTES(0x1d);
+
+ z0 = disks - 3; /* Highest data disk */
+ p = dptr[z0+1]; /* XOR parity */
+ q = dptr[z0+2]; /* RS syndrome */
+
+ for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
+ wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]);
+ for ( z = z0-1 ; z >= 0 ; z-- ) {
+ wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]);
+ wp$$ = veorq_u8(wp$$, wd$$);
+ w2$$ = MASK(wq$$);
+ w1$$ = SHLBYTE(wq$$);
+
+ w2$$ = vandq_u8(w2$$, x1d);
+ w1$$ = veorq_u8(w1$$, w2$$);
+ wq$$ = veorq_u8(w1$$, wd$$);
+ }
+ vst1q_u8(&p[d+NSIZE*$$], wp$$);
+ vst1q_u8(&q[d+NSIZE*$$], wq$$);
+ }
+}
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index 087332d..28afa1a 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -22,11 +22,23 @@ ifeq ($(ARCH),x86_64)
IS_X86 = yes
endif
+ifeq ($(ARCH),arm)
+ CFLAGS += -I../../../arch/arm/include -mfpu=neon
+ HAS_NEON = yes
+endif
+ifeq ($(ARCH),arm64)
+ CFLAGS += -I../../../arch/arm64/include
+ HAS_NEON = yes
+endif
+
ifeq ($(IS_X86),yes)
OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o
CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \
gcc -c -x assembler - >&/dev/null && \
rm ./-.o && echo -DCONFIG_AS_AVX2=1)
+else ifeq ($(HAS_NEON),yes)
+ OBJS += neon.o neon1.o neon2.o neon4.o neon8.o
+ CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1
else
HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\
gcc -c -x c - >&/dev/null && \
@@ -55,6 +67,18 @@ raid6.a: $(OBJS)
raid6test: test.c raid6.a
$(CC) $(CFLAGS) -o raid6test $^
+neon1.c: neon.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=1 < neon.uc > $@
+
+neon2.c: neon.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=2 < neon.uc > $@
+
+neon4.c: neon.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=4 < neon.uc > $@
+
+neon8.c: neon.uc ../unroll.awk
+ $(AWK) ../unroll.awk -vN=8 < neon.uc > $@
+
altivec1.c: altivec.uc ../unroll.awk
$(AWK) ../unroll.awk -vN=1 < altivec.uc > $@
@@ -89,7 +113,7 @@ tables.c: mktables
./mktables > tables.c
clean:
- rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test
+ rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
spotless: clean
rm -f *~
OpenPOWER on IntegriCloud