diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 12 | ||||
-rw-r--r-- | lib/Kconfig.debug | 19 | ||||
-rw-r--r-- | lib/Makefile | 1 | ||||
-rw-r--r-- | lib/crc-t10dif.c | 74 | ||||
-rw-r--r-- | lib/debugobjects.c | 20 | ||||
-rw-r--r-- | lib/dump_stack.c | 4 | ||||
-rw-r--r-- | lib/dynamic_debug.c | 2 | ||||
-rw-r--r-- | lib/earlycpio.c | 27 | ||||
-rw-r--r-- | lib/kobject.c | 37 | ||||
-rw-r--r-- | lib/lockref.c | 166 | ||||
-rw-r--r-- | lib/raid6/.gitignore | 1 | ||||
-rw-r--r-- | lib/raid6/Makefile | 40 | ||||
-rw-r--r-- | lib/raid6/algos.c | 6 | ||||
-rw-r--r-- | lib/raid6/neon.c | 58 | ||||
-rw-r--r-- | lib/raid6/neon.uc | 80 | ||||
-rw-r--r-- | lib/raid6/test/Makefile | 26 | ||||
-rw-r--r-- | lib/swiotlb.c | 8 | ||||
-rw-r--r-- | lib/vsprintf.c | 82 |
18 files changed, 590 insertions, 73 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 71d9f81..b3c8be0 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -48,6 +48,16 @@ config STMP_DEVICE config PERCPU_RWSEM boolean +config ARCH_USE_CMPXCHG_LOCKREF + bool + +config CMPXCHG_LOCKREF + def_bool y if ARCH_USE_CMPXCHG_LOCKREF + depends on SMP + depends on !GENERIC_LOCKBREAK + depends on !DEBUG_SPINLOCK + depends on !DEBUG_LOCK_ALLOC + config CRC_CCITT tristate "CRC-CCITT functions" help @@ -66,6 +76,8 @@ config CRC16 config CRC_T10DIF tristate "CRC calculation for the T10 Data Integrity Field" + select CRYPTO + select CRYPTO_CRCT10DIF help This option is only needed if a module that's not in the kernel tree needs to calculate CRC checks for use with the diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1501aa5..444e1c1 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -981,6 +981,25 @@ config DEBUG_KOBJECT If you say Y here, some extra kobject debugging messages will be sent to the syslog. +config DEBUG_KOBJECT_RELEASE + bool "kobject release debugging" + depends on DEBUG_KERNEL + help + kobjects are reference counted objects. This means that their + last reference count put is not predictable, and the kobject can + live on past the point at which a driver decides to drop it's + initial reference to the kobject gained on allocation. An + example of this would be a struct device which has just been + unregistered. + + However, some buggy drivers assume that after such an operation, + the memory backing the kobject can be immediately freed. This + goes completely against the principles of a refcounted object. + + If you say Y here, the kernel will delay the release of kobjects + on the last reference count to improve the visibility of this + kind of kobject release bug. + config HAVE_DEBUG_BUGVERBOSE bool diff --git a/lib/Makefile b/lib/Makefile index 7baccfd..f2cb308 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -20,6 +20,7 @@ lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-y += kobject.o klist.o +obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ diff --git a/lib/crc-t10dif.c b/lib/crc-t10dif.c index fbbd66e..43bc5b0 100644 --- a/lib/crc-t10dif.c +++ b/lib/crc-t10dif.c @@ -11,57 +11,45 @@ #include <linux/types.h> #include <linux/module.h> #include <linux/crc-t10dif.h> +#include <linux/err.h> +#include <linux/init.h> +#include <crypto/hash.h> -/* Table generated using the following polynomium: - * x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 - * gt: 0x8bb7 - */ -static const __u16 t10_dif_crc_table[256] = { - 0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B, - 0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6, - 0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6, - 0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B, - 0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1, - 0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C, - 0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C, - 0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781, - 0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8, - 0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255, - 0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925, - 0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698, - 0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472, - 0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF, - 0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF, - 0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02, - 0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA, - 0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067, - 0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17, - 0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA, - 0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640, - 0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD, - 0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D, - 0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30, - 0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759, - 0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4, - 0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394, - 0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29, - 0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3, - 0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E, - 0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E, - 0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3 -}; +static struct crypto_shash *crct10dif_tfm; __u16 crc_t10dif(const unsigned char *buffer, size_t len) { - __u16 crc = 0; - unsigned int i; + struct { + struct shash_desc shash; + char ctx[2]; + } desc; + int err; + + desc.shash.tfm = crct10dif_tfm; + desc.shash.flags = 0; + *(__u16 *)desc.ctx = 0; - for (i = 0 ; i < len ; i++) - crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff]; + err = crypto_shash_update(&desc.shash, buffer, len); + BUG_ON(err); - return crc; + return *(__u16 *)desc.ctx; } EXPORT_SYMBOL(crc_t10dif); +static int __init crc_t10dif_mod_init(void) +{ + crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0); + return PTR_RET(crct10dif_tfm); +} + +static void __exit crc_t10dif_mod_fini(void) +{ + crypto_free_shash(crct10dif_tfm); +} + +module_init(crc_t10dif_mod_init); +module_exit(crc_t10dif_mod_fini); + MODULE_DESCRIPTION("T10 DIF CRC calculation"); MODULE_LICENSE("GPL"); +MODULE_SOFTDEP("pre: crct10dif"); diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 37061ed..bf2c8b1 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -381,19 +381,21 @@ void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) * debug_object_activate - debug checks when an object is activated * @addr: address of the object * @descr: pointer to an object specific debug description structure + * Returns 0 for success, -EINVAL for check failed. */ -void debug_object_activate(void *addr, struct debug_obj_descr *descr) +int debug_object_activate(void *addr, struct debug_obj_descr *descr) { enum debug_obj_state state; struct debug_bucket *db; struct debug_obj *obj; unsigned long flags; + int ret; struct debug_obj o = { .object = addr, .state = ODEBUG_STATE_NOTAVAILABLE, .descr = descr }; if (!debug_objects_enabled) - return; + return 0; db = get_bucket((unsigned long) addr); @@ -405,23 +407,26 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) case ODEBUG_STATE_INIT: case ODEBUG_STATE_INACTIVE: obj->state = ODEBUG_STATE_ACTIVE; + ret = 0; break; case ODEBUG_STATE_ACTIVE: debug_print_object(obj, "activate"); state = obj->state; raw_spin_unlock_irqrestore(&db->lock, flags); - debug_object_fixup(descr->fixup_activate, addr, state); - return; + ret = debug_object_fixup(descr->fixup_activate, addr, state); + return ret ? -EINVAL : 0; case ODEBUG_STATE_DESTROYED: debug_print_object(obj, "activate"); + ret = -EINVAL; break; default: + ret = 0; break; } raw_spin_unlock_irqrestore(&db->lock, flags); - return; + return ret; } raw_spin_unlock_irqrestore(&db->lock, flags); @@ -431,8 +436,11 @@ void debug_object_activate(void *addr, struct debug_obj_descr *descr) * true or not. */ if (debug_object_fixup(descr->fixup_activate, addr, - ODEBUG_STATE_NOTAVAILABLE)) + ODEBUG_STATE_NOTAVAILABLE)) { debug_print_object(&o, "activate"); + return -EINVAL; + } + return 0; } /** diff --git a/lib/dump_stack.c b/lib/dump_stack.c index c031541..f23b63f 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -23,7 +23,7 @@ static void __dump_stack(void) #ifdef CONFIG_SMP static atomic_t dump_lock = ATOMIC_INIT(-1); -void dump_stack(void) +asmlinkage void dump_stack(void) { int was_locked; int old; @@ -55,7 +55,7 @@ retry: preempt_enable(); } #else -void dump_stack(void) +asmlinkage void dump_stack(void) { __dump_stack(); } diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 99fec3a..c37aeac 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -309,7 +309,7 @@ static int ddebug_parse_query(char *words[], int nwords, struct ddebug_query *query, const char *modname) { unsigned int i; - int rc; + int rc = 0; /* check we have an even number of words */ if (nwords % 2 != 0) { diff --git a/lib/earlycpio.c b/lib/earlycpio.c index 7aa7ce2..3eb3e47 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -49,22 +49,23 @@ enum cpio_fields { /** * cpio_data find_cpio_data - Search for files in an uncompressed cpio - * @path: The directory to search for, including a slash at the end - * @data: Pointer to the the cpio archive or a header inside - * @len: Remaining length of the cpio based on data pointer - * @offset: When a matching file is found, this is the offset to the - * beginning of the cpio. It can be used to iterate through - * the cpio to find all files inside of a directory path + * @path: The directory to search for, including a slash at the end + * @data: Pointer to the the cpio archive or a header inside + * @len: Remaining length of the cpio based on data pointer + * @nextoff: When a matching file is found, this is the offset from the + * beginning of the cpio to the beginning of the next file, not the + * matching file itself. It can be used to iterate through the cpio + * to find all files inside of a directory path. * - * @return: struct cpio_data containing the address, length and - * filename (with the directory path cut off) of the found file. - * If you search for a filename and not for files in a directory, - * pass the absolute path of the filename in the cpio and make sure - * the match returned an empty filename string. + * @return: struct cpio_data containing the address, length and + * filename (with the directory path cut off) of the found file. + * If you search for a filename and not for files in a directory, + * pass the absolute path of the filename in the cpio and make sure + * the match returned an empty filename string. */ struct cpio_data find_cpio_data(const char *path, void *data, - size_t len, long *offset) + size_t len, long *nextoff) { const size_t cpio_header_len = 8*C_NFIELDS - 2; struct cpio_data cd = { NULL, 0, "" }; @@ -124,7 +125,7 @@ struct cpio_data find_cpio_data(const char *path, void *data, if ((ch[C_MODE] & 0170000) == 0100000 && ch[C_NAMESIZE] >= mypathsize && !memcmp(p, path, mypathsize)) { - *offset = (long)nptr - (long)data; + *nextoff = (long)nptr - (long)data; if (ch[C_NAMESIZE] - mypathsize >= MAX_CPIO_FILE_NAME) { pr_warn( "File %s exceeding MAX_CPIO_FILE_NAME [%d]\n", diff --git a/lib/kobject.c b/lib/kobject.c index 4a1f33d..9621751 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -545,8 +545,8 @@ static void kobject_cleanup(struct kobject *kobj) struct kobj_type *t = get_ktype(kobj); const char *name = kobj->name; - pr_debug("kobject: '%s' (%p): %s\n", - kobject_name(kobj), kobj, __func__); + pr_debug("kobject: '%s' (%p): %s, parent %p\n", + kobject_name(kobj), kobj, __func__, kobj->parent); if (t && !t->release) pr_debug("kobject: '%s' (%p): does not have a release() " @@ -580,9 +580,25 @@ static void kobject_cleanup(struct kobject *kobj) } } +#ifdef CONFIG_DEBUG_KOBJECT_RELEASE +static void kobject_delayed_cleanup(struct work_struct *work) +{ + kobject_cleanup(container_of(to_delayed_work(work), + struct kobject, release)); +} +#endif + static void kobject_release(struct kref *kref) { - kobject_cleanup(container_of(kref, struct kobject, kref)); + struct kobject *kobj = container_of(kref, struct kobject, kref); +#ifdef CONFIG_DEBUG_KOBJECT_RELEASE + pr_debug("kobject: '%s' (%p): %s, parent %p (delayed)\n", + kobject_name(kobj), kobj, __func__, kobj->parent); + INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); + schedule_delayed_work(&kobj->release, HZ); +#else + kobject_cleanup(kobj); +#endif } /** @@ -915,6 +931,21 @@ const struct kobj_ns_type_operations *kobj_ns_ops(struct kobject *kobj) return kobj_child_ns_ops(kobj->parent); } +bool kobj_ns_current_may_mount(enum kobj_ns_type type) +{ + bool may_mount = false; + + if (type == KOBJ_NS_TYPE_NONE) + return true; + + spin_lock(&kobj_ns_type_lock); + if ((type > KOBJ_NS_TYPE_NONE) && (type < KOBJ_NS_TYPES) && + kobj_ns_ops_tbl[type]) + may_mount = kobj_ns_ops_tbl[type]->current_may_mount(); + spin_unlock(&kobj_ns_type_lock); + + return may_mount; +} void *kobj_ns_grab_current(enum kobj_ns_type type) { diff --git a/lib/lockref.c b/lib/lockref.c new file mode 100644 index 0000000..e2cd2c0 --- /dev/null +++ b/lib/lockref.c @@ -0,0 +1,166 @@ +#include <linux/export.h> +#include <linux/lockref.h> + +#ifdef CONFIG_CMPXCHG_LOCKREF + +/* + * Note that the "cmpxchg()" reloads the "old" value for the + * failure case. + */ +#define CMPXCHG_LOOP(CODE, SUCCESS) do { \ + struct lockref old; \ + BUILD_BUG_ON(sizeof(old) != 8); \ + old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ + struct lockref new = old, prev = old; \ + CODE \ + old.lock_count = cmpxchg(&lockref->lock_count, \ + old.lock_count, new.lock_count); \ + if (likely(old.lock_count == prev.lock_count)) { \ + SUCCESS; \ + } \ + cpu_relax(); \ + } \ +} while (0) + +#else + +#define CMPXCHG_LOOP(CODE, SUCCESS) do { } while (0) + +#endif + +/** + * lockref_get - Increments reference count unconditionally + * @lockref: pointer to lockref structure + * + * This operation is only valid if you already hold a reference + * to the object, so you know the count cannot be zero. + */ +void lockref_get(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count++; + , + return; + ); + + spin_lock(&lockref->lock); + lockref->count++; + spin_unlock(&lockref->lock); +} +EXPORT_SYMBOL(lockref_get); + +/** + * lockref_get_not_zero - Increments count unless the count is 0 + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + */ +int lockref_get_not_zero(struct lockref *lockref) +{ + int retval; + + CMPXCHG_LOOP( + new.count++; + if (!old.count) + return 0; + , + return 1; + ); + + spin_lock(&lockref->lock); + retval = 0; + if (lockref->count) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_zero); + +/** + * lockref_get_or_lock - Increments count unless the count is 0 + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count was zero + * and we got the lock instead. + */ +int lockref_get_or_lock(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count++; + if (!old.count) + break; + , + return 1; + ); + + spin_lock(&lockref->lock); + if (!lockref->count) + return 0; + lockref->count++; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_get_or_lock); + +/** + * lockref_put_or_lock - decrements count unless count <= 1 before decrement + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken + */ +int lockref_put_or_lock(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 1) + break; + , + return 1; + ); + + spin_lock(&lockref->lock); + if (lockref->count <= 1) + return 0; + lockref->count--; + spin_unlock(&lockref->lock); + return 1; +} +EXPORT_SYMBOL(lockref_put_or_lock); + +/** + * lockref_mark_dead - mark lockref dead + * @lockref: pointer to lockref structure + */ +void lockref_mark_dead(struct lockref *lockref) +{ + assert_spin_locked(&lockref->lock); + lockref->count = -128; +} + +/** + * lockref_get_not_dead - Increments count unless the ref is dead + * @lockref: pointer to lockref structure + * Return: 1 if count updated successfully or 0 if lockref was dead + */ +int lockref_get_not_dead(struct lockref *lockref) +{ + int retval; + + CMPXCHG_LOOP( + new.count++; + if ((int)old.count < 0) + return 0; + , + return 1; + ); + + spin_lock(&lockref->lock); + retval = 0; + if ((int) lockref->count >= 0) { + lockref->count++; + retval = 1; + } + spin_unlock(&lockref->lock); + return retval; +} +EXPORT_SYMBOL(lockref_get_not_dead); diff --git a/lib/raid6/.gitignore b/lib/raid6/.gitignore index 162beca..0a7e494 100644 --- a/lib/raid6/.gitignore +++ b/lib/raid6/.gitignore @@ -2,3 +2,4 @@ mktables altivec*.c int*.c tables.c +neon?.c diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile index 9f7c184..b462578 100644 --- a/lib/raid6/Makefile +++ b/lib/raid6/Makefile @@ -5,6 +5,7 @@ raid6_pq-y += algos.o recov.o tables.o int1.o int2.o int4.o \ raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o +raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o hostprogs-y += mktables @@ -16,6 +17,21 @@ ifeq ($(CONFIG_ALTIVEC),y) altivec_flags := -maltivec -mabi=altivec endif +# The GCC option -ffreestanding is required in order to compile code containing +# ARM/NEON intrinsics in a non C99-compliant environment (such as the kernel) +ifeq ($(CONFIG_KERNEL_MODE_NEON),y) +NEON_FLAGS := -ffreestanding +ifeq ($(ARCH),arm) +NEON_FLAGS += -mfloat-abi=softfp -mfpu=neon +endif +ifeq ($(ARCH),arm64) +CFLAGS_REMOVE_neon1.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon2.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon4.o += -mgeneral-regs-only +CFLAGS_REMOVE_neon8.o += -mgeneral-regs-only +endif +endif + targets += int1.c $(obj)/int1.c: UNROLL := 1 $(obj)/int1.c: $(src)/int.uc $(src)/unroll.awk FORCE @@ -70,6 +86,30 @@ $(obj)/altivec8.c: UNROLL := 8 $(obj)/altivec8.c: $(src)/altivec.uc $(src)/unroll.awk FORCE $(call if_changed,unroll) +CFLAGS_neon1.o += $(NEON_FLAGS) +targets += neon1.c +$(obj)/neon1.c: UNROLL := 1 +$(obj)/neon1.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon2.o += $(NEON_FLAGS) +targets += neon2.c +$(obj)/neon2.c: UNROLL := 2 +$(obj)/neon2.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon4.o += $(NEON_FLAGS) +targets += neon4.c +$(obj)/neon4.c: UNROLL := 4 +$(obj)/neon4.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + +CFLAGS_neon8.o += $(NEON_FLAGS) +targets += neon8.c +$(obj)/neon8.c: UNROLL := 8 +$(obj)/neon8.c: $(src)/neon.uc $(src)/unroll.awk FORCE + $(call if_changed,unroll) + quiet_cmd_mktable = TABLE $@ cmd_mktable = $(obj)/mktables > $@ || ( rm -f $@ && exit 1 ) diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 6d7316f..74e6f56 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -70,6 +70,12 @@ const struct raid6_calls * const raid6_algos[] = { &raid6_intx2, &raid6_intx4, &raid6_intx8, +#ifdef CONFIG_KERNEL_MODE_NEON + &raid6_neonx1, + &raid6_neonx2, + &raid6_neonx4, + &raid6_neonx8, +#endif NULL }; diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c new file mode 100644 index 0000000..36ad470 --- /dev/null +++ b/lib/raid6/neon.c @@ -0,0 +1,58 @@ +/* + * linux/lib/raid6/neon.c - RAID6 syndrome calculation using ARM NEON intrinsics + * + * Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/raid/pq.h> + +#ifdef __KERNEL__ +#include <asm/neon.h> +#else +#define kernel_neon_begin() +#define kernel_neon_end() +#define cpu_has_neon() (1) +#endif + +/* + * There are 2 reasons these wrappers are kept in a separate compilation unit + * from the actual implementations in neonN.c (generated from neon.uc by + * unroll.awk): + * - the actual implementations use NEON intrinsics, and the GCC support header + * (arm_neon.h) is not fully compatible (type wise) with the kernel; + * - the neonN.c files are compiled with -mfpu=neon and optimization enabled, + * and we have to make sure that we never use *any* NEON/VFP instructions + * outside a kernel_neon_begin()/kernel_neon_end() pair. + */ + +#define RAID6_NEON_WRAPPER(_n) \ + static void raid6_neon ## _n ## _gen_syndrome(int disks, \ + size_t bytes, void **ptrs) \ + { \ + void raid6_neon ## _n ## _gen_syndrome_real(int, \ + unsigned long, void**); \ + kernel_neon_begin(); \ + raid6_neon ## _n ## _gen_syndrome_real(disks, \ + (unsigned long)bytes, ptrs); \ + kernel_neon_end(); \ + } \ + struct raid6_calls const raid6_neonx ## _n = { \ + raid6_neon ## _n ## _gen_syndrome, \ + raid6_have_neon, \ + "neonx" #_n, \ + 0 \ + } + +static int raid6_have_neon(void) +{ + return cpu_has_neon(); +} + +RAID6_NEON_WRAPPER(1); +RAID6_NEON_WRAPPER(2); +RAID6_NEON_WRAPPER(4); +RAID6_NEON_WRAPPER(8); diff --git a/lib/raid6/neon.uc b/lib/raid6/neon.uc new file mode 100644 index 0000000..1b9ed79 --- /dev/null +++ b/lib/raid6/neon.uc @@ -0,0 +1,80 @@ +/* ----------------------------------------------------------------------- + * + * neon.uc - RAID-6 syndrome calculation using ARM NEON instructions + * + * Copyright (C) 2012 Rob Herring + * + * Based on altivec.uc: + * Copyright 2002-2004 H. Peter Anvin - All Rights Reserved + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, Inc., 53 Temple Place Ste 330, + * Boston MA 02111-1307, USA; either version 2 of the License, or + * (at your option) any later version; incorporated herein by reference. + * + * ----------------------------------------------------------------------- */ + +/* + * neon$#.c + * + * $#-way unrolled NEON intrinsics math RAID-6 instruction set + * + * This file is postprocessed using unroll.awk + */ + +#include <arm_neon.h> + +typedef uint8x16_t unative_t; + +#define NBYTES(x) ((unative_t){x,x,x,x, x,x,x,x, x,x,x,x, x,x,x,x}) +#define NSIZE sizeof(unative_t) + +/* + * The SHLBYTE() operation shifts each byte left by 1, *not* + * rolling over into the next byte + */ +static inline unative_t SHLBYTE(unative_t v) +{ + return vshlq_n_u8(v, 1); +} + +/* + * The MASK() operation returns 0xFF in any byte for which the high + * bit is 1, 0x00 for any byte for which the high bit is 0. + */ +static inline unative_t MASK(unative_t v) +{ + const uint8x16_t temp = NBYTES(0); + return (unative_t)vcltq_s8((int8x16_t)v, (int8x16_t)temp); +} + +void raid6_neon$#_gen_syndrome_real(int disks, unsigned long bytes, void **ptrs) +{ + uint8_t **dptr = (uint8_t **)ptrs; + uint8_t *p, *q; + int d, z, z0; + + register unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + const unative_t x1d = NBYTES(0x1d); + + z0 = disks - 3; /* Highest data disk */ + p = dptr[z0+1]; /* XOR parity */ + q = dptr[z0+2]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + wq$$ = wp$$ = vld1q_u8(&dptr[z0][d+$$*NSIZE]); + for ( z = z0-1 ; z >= 0 ; z-- ) { + wd$$ = vld1q_u8(&dptr[z][d+$$*NSIZE]); + wp$$ = veorq_u8(wp$$, wd$$); + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + + w2$$ = vandq_u8(w2$$, x1d); + w1$$ = veorq_u8(w1$$, w2$$); + wq$$ = veorq_u8(w1$$, wd$$); + } + vst1q_u8(&p[d+NSIZE*$$], wp$$); + vst1q_u8(&q[d+NSIZE*$$], wq$$); + } +} diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile index 087332d..28afa1a 100644 --- a/lib/raid6/test/Makefile +++ b/lib/raid6/test/Makefile @@ -22,11 +22,23 @@ ifeq ($(ARCH),x86_64) IS_X86 = yes endif +ifeq ($(ARCH),arm) + CFLAGS += -I../../../arch/arm/include -mfpu=neon + HAS_NEON = yes +endif +ifeq ($(ARCH),arm64) + CFLAGS += -I../../../arch/arm64/include + HAS_NEON = yes +endif + ifeq ($(IS_X86),yes) OBJS += mmx.o sse1.o sse2.o avx2.o recov_ssse3.o recov_avx2.o CFLAGS += $(shell echo "vpbroadcastb %xmm0, %ymm1" | \ gcc -c -x assembler - >&/dev/null && \ rm ./-.o && echo -DCONFIG_AS_AVX2=1) +else ifeq ($(HAS_NEON),yes) + OBJS += neon.o neon1.o neon2.o neon4.o neon8.o + CFLAGS += -DCONFIG_KERNEL_MODE_NEON=1 else HAS_ALTIVEC := $(shell echo -e '\#include <altivec.h>\nvector int a;' |\ gcc -c -x c - >&/dev/null && \ @@ -55,6 +67,18 @@ raid6.a: $(OBJS) raid6test: test.c raid6.a $(CC) $(CFLAGS) -o raid6test $^ +neon1.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=1 < neon.uc > $@ + +neon2.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=2 < neon.uc > $@ + +neon4.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=4 < neon.uc > $@ + +neon8.c: neon.uc ../unroll.awk + $(AWK) ../unroll.awk -vN=8 < neon.uc > $@ + altivec1.c: altivec.uc ../unroll.awk $(AWK) ../unroll.awk -vN=1 < altivec.uc > $@ @@ -89,7 +113,7 @@ tables.c: mktables ./mktables > tables.c clean: - rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c tables.c raid6test + rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test spotless: clean rm -f *~ diff --git a/lib/swiotlb.c b/lib/swiotlb.c index d23762e..4e8686c 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -870,13 +870,13 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, swiotlb_full(hwdev, sg->length, dir, 0); swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); - sgl[0].dma_length = 0; + sg_dma_len(sgl) = 0; return 0; } sg->dma_address = phys_to_dma(hwdev, map); } else sg->dma_address = dev_addr; - sg->dma_length = sg->length; + sg_dma_len(sg) = sg->length; } return nelems; } @@ -904,7 +904,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg->dma_length, dir); + unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir); } EXPORT_SYMBOL(swiotlb_unmap_sg_attrs); @@ -934,7 +934,7 @@ swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, for_each_sg(sgl, sg, nelems, i) swiotlb_sync_single(hwdev, sg->dma_address, - sg->dma_length, dir, target); + sg_dma_len(sg), dir, target); } void diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 739a363..26559bd 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -26,6 +26,7 @@ #include <linux/math64.h> #include <linux/uaccess.h> #include <linux/ioport.h> +#include <linux/dcache.h> #include <net/addrconf.h> #include <asm/page.h> /* for PAGE_SIZE */ @@ -532,6 +533,81 @@ char *string(char *buf, char *end, const char *s, struct printf_spec spec) return buf; } +static void widen(char *buf, char *end, unsigned len, unsigned spaces) +{ + size_t size; + if (buf >= end) /* nowhere to put anything */ + return; + size = end - buf; + if (size <= spaces) { + memset(buf, ' ', size); + return; + } + if (len) { + if (len > size - spaces) + len = size - spaces; + memmove(buf + spaces, buf, len); + } + memset(buf, ' ', spaces); +} + +static noinline_for_stack +char *dentry_name(char *buf, char *end, const struct dentry *d, struct printf_spec spec, + const char *fmt) +{ + const char *array[4], *s; + const struct dentry *p; + int depth; + int i, n; + + switch (fmt[1]) { + case '2': case '3': case '4': + depth = fmt[1] - '0'; + break; + default: + depth = 1; + } + + rcu_read_lock(); + for (i = 0; i < depth; i++, d = p) { + p = ACCESS_ONCE(d->d_parent); + array[i] = ACCESS_ONCE(d->d_name.name); + if (p == d) { + if (i) + array[i] = ""; + i++; + break; + } + } + s = array[--i]; + for (n = 0; n != spec.precision; n++, buf++) { + char c = *s++; + if (!c) { + if (!i) + break; + c = '/'; + s = array[--i]; + } + if (buf < end) + *buf = c; + } + rcu_read_unlock(); + if (n < spec.field_width) { + /* we want to pad the sucker */ + unsigned spaces = spec.field_width - n; + if (!(spec.flags & LEFT)) { + widen(buf - n, end, n, spaces); + return buf + spaces; + } + while (spaces--) { + if (buf < end) + *buf = ' '; + ++buf; + } + } + return buf; +} + static noinline_for_stack char *symbol_string(char *buf, char *end, void *ptr, struct printf_spec spec, const char *fmt) @@ -1253,6 +1329,12 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, spec.base = 16; return number(buf, end, (unsigned long long) *((phys_addr_t *)ptr), spec); + case 'd': + return dentry_name(buf, end, ptr, spec, fmt); + case 'D': + return dentry_name(buf, end, + ((const struct file *)ptr)->f_path.dentry, + spec, fmt); } spec.flags |= SMALL; if (spec.field_width == -1) { |