diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 2 | ||||
-rw-r--r-- | lib/crc32.c | 17 | ||||
-rw-r--r-- | lib/decompress_inflate.c | 2 | ||||
-rw-r--r-- | lib/div64.c | 40 | ||||
-rw-r--r-- | lib/genalloc.c | 22 | ||||
-rw-r--r-- | lib/lz4/lz4_decompress.c | 8 | ||||
-rw-r--r-- | lib/radix-tree.c | 41 | ||||
-rw-r--r-- | lib/raid6/Makefile | 6 | ||||
-rw-r--r-- | lib/raid6/algos.c | 3 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 9 | ||||
-rw-r--r-- | lib/raid6/tilegx.uc | 86 | ||||
-rw-r--r-- | lib/rbtree.c | 40 | ||||
-rw-r--r-- | lib/rbtree_test.c | 12 |
13 files changed, 264 insertions, 24 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 652bea9..c9eef36 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1461,7 +1461,7 @@ config BACKTRACE_SELF_TEST config RBTREE_TEST tristate "Red-Black tree test" - depends on m && DEBUG_KERNEL + depends on DEBUG_KERNEL help A benchmark measuring the performance of the rbtree library. Also includes rbtree invariant checks. diff --git a/lib/crc32.c b/lib/crc32.c index 072fbd8..410093d 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -131,11 +131,14 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) #endif /** - * crc32_le() - Calculate bitwise little-endian Ethernet AUTODIN II CRC32 - * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for - * other uses, or the previous crc32 value if computing incrementally. - * @p: pointer to buffer over which CRC is run + * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II + * CRC32/CRC32C + * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for other + * uses, or the previous crc32/crc32c value if computing incrementally. + * @p: pointer to buffer over which CRC32/CRC32C is run * @len: length of buffer @p + * @tab: little-endian Ethernet table + * @polynomial: CRC32/CRC32c LE polynomial */ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, size_t len, const u32 (*tab)[256], @@ -201,11 +204,13 @@ EXPORT_SYMBOL(crc32_le); EXPORT_SYMBOL(__crc32c_le); /** - * crc32_be() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 + * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 * @crc: seed value for computation. ~0 for Ethernet, sometimes 0 for * other uses, or the previous crc32 value if computing incrementally. - * @p: pointer to buffer over which CRC is run + * @p: pointer to buffer over which CRC32 is run * @len: length of buffer @p + * @tab: big-endian Ethernet table + * @polynomial: CRC32 BE polynomial */ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, size_t len, const u32 (*tab)[256], diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index 19ff89e..d619b28 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -48,7 +48,7 @@ STATIC int INIT gunzip(unsigned char *buf, int len, out_len = 0x8000; /* 32 K */ out_buf = malloc(out_len); } else { - out_len = 0x7fffffff; /* no limit */ + out_len = ((size_t)~0) - (size_t)out_buf; /* no limit */ } if (!out_buf) { error("Out of memory while allocating output buffer"); diff --git a/lib/div64.c b/lib/div64.c index a163b6c..4382ad7 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -79,6 +79,46 @@ EXPORT_SYMBOL(div_s64_rem); #endif /** + * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder + * @dividend: 64bit dividend + * @divisor: 64bit divisor + * @remainder: 64bit remainder + * + * This implementation is a comparable to algorithm used by div64_u64. + * But this operation, which includes math for calculating the remainder, + * is kept distinct to avoid slowing down the div64_u64 operation on 32bit + * systems. + */ +#ifndef div64_u64_rem +u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) +{ + u32 high = divisor >> 32; + u64 quot; + + if (high == 0) { + u32 rem32; + quot = div_u64_rem(dividend, divisor, &rem32); + *remainder = rem32; + } else { + int n = 1 + fls(high); + quot = div_u64(dividend >> n, divisor >> n); + + if (quot != 0) + quot--; + + *remainder = dividend - quot * divisor; + if (*remainder >= divisor) { + quot++; + *remainder -= divisor; + } + } + + return quot; +} +EXPORT_SYMBOL(div64_u64_rem); +#endif + +/** * div64_u64 - unsigned 64bit divide with 64bit divisor * @dividend: 64bit dividend * @divisor: 64bit divisor diff --git a/lib/genalloc.c b/lib/genalloc.c index b35cfa9..26cf20b 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -37,6 +37,11 @@ #include <linux/of_address.h> #include <linux/of_device.h> +static inline size_t chunk_size(const struct gen_pool_chunk *chunk) +{ + return chunk->end_addr - chunk->start_addr + 1; +} + static int set_bits_ll(unsigned long *addr, unsigned long mask_to_set) { unsigned long val, nval; @@ -182,13 +187,13 @@ int gen_pool_add_virt(struct gen_pool *pool, unsigned long virt, phys_addr_t phy int nbytes = sizeof(struct gen_pool_chunk) + BITS_TO_LONGS(nbits) * sizeof(long); - chunk = kmalloc_node(nbytes, GFP_KERNEL | __GFP_ZERO, nid); + chunk = kzalloc_node(nbytes, GFP_KERNEL, nid); if (unlikely(chunk == NULL)) return -ENOMEM; chunk->phys_addr = phys; chunk->start_addr = virt; - chunk->end_addr = virt + size; + chunk->end_addr = virt + size - 1; atomic_set(&chunk->avail, size); spin_lock(&pool->lock); @@ -213,7 +218,7 @@ phys_addr_t gen_pool_virt_to_phys(struct gen_pool *pool, unsigned long addr) rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (addr >= chunk->start_addr && addr < chunk->end_addr) { + if (addr >= chunk->start_addr && addr <= chunk->end_addr) { paddr = chunk->phys_addr + (addr - chunk->start_addr); break; } @@ -242,7 +247,7 @@ void gen_pool_destroy(struct gen_pool *pool) chunk = list_entry(_chunk, struct gen_pool_chunk, next_chunk); list_del(&chunk->next_chunk); - end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit = chunk_size(chunk) >> order; bit = find_next_bit(chunk->bits, end_bit, 0); BUG_ON(bit < end_bit); @@ -283,7 +288,7 @@ unsigned long gen_pool_alloc(struct gen_pool *pool, size_t size) if (size > atomic_read(&chunk->avail)) continue; - end_bit = (chunk->end_addr - chunk->start_addr) >> order; + end_bit = chunk_size(chunk) >> order; retry: start_bit = pool->algo(chunk->bits, end_bit, start_bit, nbits, pool->data); @@ -330,8 +335,8 @@ void gen_pool_free(struct gen_pool *pool, unsigned long addr, size_t size) nbits = (size + (1UL << order) - 1) >> order; rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) { - if (addr >= chunk->start_addr && addr < chunk->end_addr) { - BUG_ON(addr + size > chunk->end_addr); + if (addr >= chunk->start_addr && addr <= chunk->end_addr) { + BUG_ON(addr + size - 1 > chunk->end_addr); start_bit = (addr - chunk->start_addr) >> order; remain = bitmap_clear_ll(chunk->bits, start_bit, nbits); BUG_ON(remain); @@ -400,7 +405,7 @@ size_t gen_pool_size(struct gen_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(chunk, &pool->chunks, next_chunk) - size += chunk->end_addr - chunk->start_addr; + size += chunk_size(chunk); rcu_read_unlock(); return size; } @@ -519,7 +524,6 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, /** * dev_get_gen_pool - Obtain the gen_pool (if any) for a device * @dev: device to retrieve the gen_pool from - * @name: Optional name for the gen_pool, usually NULL * * Returns the gen_pool for the device if one is present, or NULL. */ diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 411be80..df6839e 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -283,8 +283,8 @@ _output_error: return (int) (-(((char *) ip) - source)); } -int lz4_decompress(const char *src, size_t *src_len, char *dest, - size_t actual_dest_len) +int lz4_decompress(const unsigned char *src, size_t *src_len, + unsigned char *dest, size_t actual_dest_len) { int ret = -1; int input_len = 0; @@ -302,8 +302,8 @@ exit_0: EXPORT_SYMBOL(lz4_decompress); #endif -int lz4_decompress_unknownoutputsize(const char *src, size_t src_len, - char *dest, size_t *dest_len) +int lz4_decompress_unknownoutputsize(const unsigned char *src, size_t src_len, + unsigned char *dest, size_t *dest_len) { int ret = -1; int out_len = 0; diff --git a/lib/radix-tree.c b/lib/radix-tree.c index e796429..7811ed3 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -32,6 +32,7 @@ #include <linux/string.h> #include <linux/bitops.h> #include <linux/rcupdate.h> +#include <linux/hardirq.h> /* in_interrupt() */ #ifdef __KERNEL__ @@ -207,7 +208,12 @@ radix_tree_node_alloc(struct radix_tree_root *root) struct radix_tree_node *ret = NULL; gfp_t gfp_mask = root_gfp_mask(root); - if (!(gfp_mask & __GFP_WAIT)) { + /* + * Preload code isn't irq safe and it doesn't make sence to use + * preloading in the interrupt anyway as all the allocations have to + * be atomic. So just do normal allocation when in interrupt. + */ + if (!(gfp_mask & __GFP_WAIT) && !in_interrupt()) { struct radix_tree_preload *rtp; /* @@ -264,7 +270,7 @@ radix_tree_node_free(struct radix_tree_node *node) * To make use of this facility, the radix tree must be initialised without * __GFP_WAIT being passed to INIT_RADIX_TREE(). */ -int radix_tree_preload(gfp_t gfp_mask) +static int __radix_tree_preload(gfp_t gfp_mask) { struct radix_tree_preload *rtp; struct radix_tree_node *node; @@ -288,9 +294,40 @@ int radix_tree_preload(gfp_t gfp_mask) out: return ret; } + +/* + * Load up this CPU's radix_tree_node buffer with sufficient objects to + * ensure that the addition of a single element in the tree cannot fail. On + * success, return zero, with preemption disabled. On error, return -ENOMEM + * with preemption not disabled. + * + * To make use of this facility, the radix tree must be initialised without + * __GFP_WAIT being passed to INIT_RADIX_TREE(). + */ +int radix_tree_preload(gfp_t gfp_mask) +{ + /* Warn on non-sensical use... */ + WARN_ON_ONCE(!(gfp_mask & __GFP_WAIT)); + return __radix_tree_preload(gfp_mask); +} EXPORT_SYMBOL(radix_tree_preload); /* + * The same as above function, except we don't guarantee preloading happens. + * We do it, if we decide it helps. On success, return zero with preemption + * disabled. On error, return -ENOMEM with preemption not disabled. + */ +int radix_tree_maybe_preload(gfp_t gfp_mask) +{ + if (gfp_mask & __GFP_WAIT) + return __radix_tree_preload(gfp_mask); + /* Preloading doesn't help anything with this gfp mask, skip it */ + preempt_disable(); + return 0; +} +EXPORT_SYMBOL(radix_tree_maybe_preload); + +/* * Return the maximum key which can be store into a * radix tree with height HEIGHT. */ diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index b462578..c7dab06 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -6,6 +6,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o +raid6_pq-$(CONFIG_TILEGX) += tilegx8.o hostprogs-y += mktables @@ -110,6 +111,11 @@ $(obj)/neon8.c: UNROLL := 8 $(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +targets += tilegx8.c +$(obj)/tilegx8.c: UNROLL := 8 +$(obj)/tilegx8.c: $(src)/tilegx.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + quiet_cmd_mktable = TABLE $@ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 74e6f56..f0b1aa3 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -66,6 +66,9 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_altivec4, &raid6_altivec8, #endif +#if defined(CONFIG_TILEGX) + &raid6_tilegx8, +#endif &raid6_intx1, &raid6_intx2, &raid6_intx4, diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 28afa1a..29090f3 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -40,13 +40,16 @@ else ifeq ($(HAS_NEON),yes) OBJS += neon.o neon1.o neon2.o neon4.o neon8.o CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else - HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ + HAS_ALTIVEC := $(shell printf '\#include <altivec.h>\nvector int a;\n' |\ gcc -c -x c - >&/dev/null && \ rm ./-.o && echo yes) ifeq ($(HAS_ALTIVEC),yes) OBJS += altivec1.o altivec2.o altivec4.o altivec8.o endif endif +ifeq ($(ARCH),tilegx) +OBJS += tilegx8.o +endif .c.o: $(CC) $(CFLAGS) -c -o $@ $< @@ -109,11 +112,15 @@ int16.c: int.uc ../unroll.awk int32.c: int.uc ../unroll.awk $(AWK) ../unroll.awk -vN=32 < int.uc > $@ +tilegx8.c: tilegx.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@ + tables.c: mktables ./mktables > tables.c clean: rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test + rm -f tilegx*.c spotless: clean rm -f *~ diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc new file mode 100644 index 0000000..e7c2945 --- /dev/null +++ b/lib/raid6/tilegx.uc @@ -0,0 +1,86 @@ +/* -*- linux-c -*- ------------------------------------------------------- * + * + * Copyright 2002 H. Peter Anvin - All Rights Reserved + * Copyright 2012 Tilera Corporation - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * tilegx$#.c + * + * $#-way unrolled TILE-Gx SIMD for RAID-6 math. + * + * This file is postprocessed using unroll.awk. + * + */ + +#include <linux/raid/pq.h> + +/* Create 8 byte copies of constant byte */ +# define NBYTES(x) (__insn_v1addi(0, x)) +# define NSIZE 8 + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline __attribute_const__ u64 SHLBYTE(u64 v) +{ + /* Vector One Byte Shift Left Immediate. */ + return __insn_v1shli(v, 1); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline __attribute_const__ u64 MASK(u64 v) +{ + /* Vector One Byte Shift Right Signed Immediate. */ + return __insn_v1shrsi(v, 7); +} + + +void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u64 *p, *q; + int d, z, z0; + + u64 wd$$, wq$$, wp$$, w1$$, w2$$; + u64 x1d = NBYTES(0x1d); + u64 * z0ptr; + + z0 = disks - 3; /* Highest data disk */ + p = (u64 *)dptr[z0+1]; /* XOR parity */ + q = (u64 *)dptr[z0+2]; /* RS syndrome */ + + z0ptr = (u64 *)&dptr[z0][0]; + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = *z0ptr++; + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE]; + wp$$ = wp$$ ^ wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ = w2$$ & x1d; + w1$$ = w1$$ ^ w2$$; + wq$$ = w1$$ ^ wd$$; + } + *p++ = wp$$; + *q++ = wq$$; + } +} + +const struct raid6_calls raid6_tilegx$# = { + raid6_tilegx$#_gen_syndrome, + NULL, + "tilegx$#", + 0 +}; diff --git a/lib/rbtree.c b/lib/rbtree.c index c0e31fe..65f4eff 100644 --- a/lib/rbtree.c +++ b/lib/rbtree.c @@ -518,3 +518,43 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, *new = *victim; } EXPORT_SYMBOL(rb_replace_node); + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} +EXPORT_SYMBOL(rb_next_postorder); + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} +EXPORT_SYMBOL(rb_first_postorder); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 122f02f..31dd4cc 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) return count; } +static void check_postorder(int nr_nodes) +{ + struct rb_node *rb; + int count = 0; + for (rb = rb_first_postorder(&root); rb; rb = rb_next_postorder(rb)) + count++; + + WARN_ON_ONCE(count != nr_nodes); +} + static void check(int nr_nodes) { struct rb_node *rb; @@ -136,6 +146,8 @@ static void check(int nr_nodes) WARN_ON_ONCE(count != nr_nodes); WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); + + check_postorder(nr_nodes); } static void check_augmented(int nr_nodes) |