From ef12496022d5917cfe0b04cf8fd685fc6bc08400 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 17 Dec 2012 15:59:58 -0800 Subject: lib/vsprintf.c: fix handling of %zd when using ssize_t Documentation/printk-formats.txt says to use %zd for a ssize_t argument and some drivers do. Unfortunately this prints a positive number for negative values eg: tpm_tis 70030000.tpm_tis: tpm_transmit: tpm_send: error 4294967234 Add a case to va_args a ssize_t type if the interpretation should be signed. Tested on PPC32. Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 39c99fe..41da074 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1485,7 +1485,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) num = va_arg(args, long); break; case FORMAT_TYPE_SIZE_T: - num = va_arg(args, size_t); + if (spec.flags & SIGN) + num = va_arg(args, ssize_t); + else + num = va_arg(args, size_t); break; case FORMAT_TYPE_PTRDIFF: num = va_arg(args, ptrdiff_t); -- cgit v1.1 From 35367ab28d024ef026dbd797b4076c8f008ec08c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 17 Dec 2012 16:01:21 -0800 Subject: lib: dynamic_debug: use kbasename() Remove the custom implementation of the functionality similar to kbasename(). Signed-off-by: Andy Shevchenko Cc: Jason Baron Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/dynamic_debug.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index e7f7d99..1db1fc6 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -62,13 +62,6 @@ static LIST_HEAD(ddebug_tables); static int verbose = 0; module_param(verbose, int, 0644); -/* Return the last part of a pathname */ -static inline const char *basename(const char *path) -{ - const char *tail = strrchr(path, '/'); - return tail ? tail+1 : path; -} - /* Return the path relative to source root */ static inline const char *trim_prefix(const char *path) { @@ -154,7 +147,7 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && strcmp(query->filename, dp->filename) && - strcmp(query->filename, basename(dp->filename)) && + strcmp(query->filename, kbasename(dp->filename)) && strcmp(query->filename, trim_prefix(dp->filename))) continue; -- cgit v1.1 From 53809751ac230a3611b5cdd375f3389f3207d471 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Mon, 17 Dec 2012 16:01:31 -0800 Subject: sscanf: don't ignore field widths for numeric conversions This is another step towards better standard conformance. Rather than adding a local buffer to store the specified portion of the string (with the need to enforce an arbitrary maximum supported width to limit the buffer size), do a maximum width conversion and then drop as much of it as is necessary to meet the caller's request. Also fail on negative field widths. Uses the deprecated simple_strto*() functions because kstrtoXX() fail on non-zero terminated strings. Signed-off-by: Jan Beulich Cc: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 96 ++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 53 insertions(+), 43 deletions(-) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 41da074..292fcb1 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -23,12 +23,12 @@ #include #include #include +#include #include #include #include #include /* for PAGE_SIZE */ -#include #include /* for dereference_function_descriptor() */ #include "kstrtox.h" @@ -2016,7 +2016,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) char digit; int num = 0; u8 qualifier; - u8 base; + unsigned int base; + union { + long long s; + unsigned long long u; + } val; s16 field_width; bool is_sign; @@ -2056,8 +2060,11 @@ int vsscanf(const char *buf, const char *fmt, va_list args) /* get field width */ field_width = -1; - if (isdigit(*fmt)) + if (isdigit(*fmt)) { field_width = skip_atoi(&fmt); + if (field_width <= 0) + break; + } /* get conversion qualifier */ qualifier = -1; @@ -2157,58 +2164,61 @@ int vsscanf(const char *buf, const char *fmt, va_list args) || (base == 0 && !isdigit(digit))) break; + if (is_sign) + val.s = qualifier != 'L' ? + simple_strtol(str, &next, base) : + simple_strtoll(str, &next, base); + else + val.u = qualifier != 'L' ? + simple_strtoul(str, &next, base) : + simple_strtoull(str, &next, base); + + if (field_width > 0 && next - str > field_width) { + if (base == 0) + _parse_integer_fixup_radix(str, &base); + while (next - str > field_width) { + if (is_sign) + val.s = div_s64(val.s, base); + else + val.u = div_u64(val.u, base); + --next; + } + } + switch (qualifier) { case 'H': /* that's 'hh' in format */ - if (is_sign) { - signed char *s = (signed char *)va_arg(args, signed char *); - *s = (signed char)simple_strtol(str, &next, base); - } else { - unsigned char *s = (unsigned char *)va_arg(args, unsigned char *); - *s = (unsigned char)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, signed char *) = val.s; + else + *va_arg(args, unsigned char *) = val.u; break; case 'h': - if (is_sign) { - short *s = (short *)va_arg(args, short *); - *s = (short)simple_strtol(str, &next, base); - } else { - unsigned short *s = (unsigned short *)va_arg(args, unsigned short *); - *s = (unsigned short)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, short *) = val.s; + else + *va_arg(args, unsigned short *) = val.u; break; case 'l': - if (is_sign) { - long *l = (long *)va_arg(args, long *); - *l = simple_strtol(str, &next, base); - } else { - unsigned long *l = (unsigned long *)va_arg(args, unsigned long *); - *l = simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, long *) = val.s; + else + *va_arg(args, unsigned long *) = val.u; break; case 'L': - if (is_sign) { - long long *l = (long long *)va_arg(args, long long *); - *l = simple_strtoll(str, &next, base); - } else { - unsigned long long *l = (unsigned long long *)va_arg(args, unsigned long long *); - *l = simple_strtoull(str, &next, base); - } + if (is_sign) + *va_arg(args, long long *) = val.s; + else + *va_arg(args, unsigned long long *) = val.u; break; case 'Z': case 'z': - { - size_t *s = (size_t *)va_arg(args, size_t *); - *s = (size_t)simple_strtoul(str, &next, base); - } - break; + *va_arg(args, size_t *) = val.u; + break; default: - if (is_sign) { - int *i = (int *)va_arg(args, int *); - *i = (int)simple_strtol(str, &next, base); - } else { - unsigned int *i = (unsigned int *)va_arg(args, unsigned int*); - *i = (unsigned int)simple_strtoul(str, &next, base); - } + if (is_sign) + *va_arg(args, int *) = val.s; + else + *va_arg(args, unsigned int *) = val.u; break; } num++; -- cgit v1.1 From a1fd3e24d8a484b3265a6d485202afe093c058f3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:32 -0800 Subject: percpu_rw_semaphore: reimplement to not block the readers unnecessarily Currently the writer does msleep() plus synchronize_sched() 3 times to acquire/release the semaphore, and during this time the readers are blocked completely. Even if the "write" section was not actually started or if it was already finished. With this patch down_write/up_write does synchronize_sched() twice and down_read/up_read are still possible during this time, just they use the slow path. percpu_down_write() first forces the readers to use rw_semaphore and increment the "slow" counter to take the lock for reading, then it takes that rw_semaphore for writing and blocks the readers. Also. With this patch the code relies on the documented behaviour of synchronize_sched(), it doesn't try to pair synchronize_sched() with barrier. Signed-off-by: Oleg Nesterov Reviewed-by: Paul E. McKenney Cc: Linus Torvalds Cc: Mikulas Patocka Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Srikar Dronamraju Cc: Ananth N Mavinakayanahalli Cc: Anton Arapov Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Makefile | 2 +- lib/percpu-rwsem.c | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 155 insertions(+), 1 deletion(-) create mode 100644 lib/percpu-rwsem.c (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile index e2152fa..e959c20 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,7 +9,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ - idr.o int_sqrt.o extable.o \ + idr.o int_sqrt.o extable.o percpu-rwsem.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 0000000..2e03bcf --- /dev/null +++ b/lib/percpu-rwsem.c @@ -0,0 +1,154 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +int percpu_init_rwsem(struct percpu_rw_semaphore *brw) +{ + brw->fast_read_ctr = alloc_percpu(int); + if (unlikely(!brw->fast_read_ctr)) + return -ENOMEM; + + mutex_init(&brw->writer_mutex); + init_rwsem(&brw->rw_sem); + atomic_set(&brw->slow_read_ctr, 0); + init_waitqueue_head(&brw->write_waitq); + return 0; +} + +void percpu_free_rwsem(struct percpu_rw_semaphore *brw) +{ + free_percpu(brw->fast_read_ctr); + brw->fast_read_ctr = NULL; /* catch use after free bugs */ +} + +/* + * This is the fast-path for down_read/up_read, it only needs to ensure + * there is no pending writer (!mutex_is_locked() check) and inc/dec the + * fast per-cpu counter. The writer uses synchronize_sched_expedited() to + * serialize with the preempt-disabled section below. + * + * The nontrivial part is that we should guarantee acquire/release semantics + * in case when + * + * R_W: down_write() comes after up_read(), the writer should see all + * changes done by the reader + * or + * W_R: down_read() comes after up_write(), the reader should see all + * changes done by the writer + * + * If this helper fails the callers rely on the normal rw_semaphore and + * atomic_dec_and_test(), so in this case we have the necessary barriers. + * + * But if it succeeds we do not have any barriers, mutex_is_locked() or + * __this_cpu_add() below can be reordered with any LOAD/STORE done by the + * reader inside the critical section. See the comments in down_write and + * up_write below. + */ +static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) +{ + bool success = false; + + preempt_disable(); + if (likely(!mutex_is_locked(&brw->writer_mutex))) { + __this_cpu_add(*brw->fast_read_ctr, val); + success = true; + } + preempt_enable(); + + return success; +} + +/* + * Like the normal down_read() this is not recursive, the writer can + * come after the first percpu_down_read() and create the deadlock. + */ +void percpu_down_read(struct percpu_rw_semaphore *brw) +{ + if (likely(update_fast_ctr(brw, +1))) + return; + + down_read(&brw->rw_sem); + atomic_inc(&brw->slow_read_ctr); + up_read(&brw->rw_sem); +} + +void percpu_up_read(struct percpu_rw_semaphore *brw) +{ + if (likely(update_fast_ctr(brw, -1))) + return; + + /* false-positive is possible but harmless */ + if (atomic_dec_and_test(&brw->slow_read_ctr)) + wake_up_all(&brw->write_waitq); +} + +static int clear_fast_ctr(struct percpu_rw_semaphore *brw) +{ + unsigned int sum = 0; + int cpu; + + for_each_possible_cpu(cpu) { + sum += per_cpu(*brw->fast_read_ctr, cpu); + per_cpu(*brw->fast_read_ctr, cpu) = 0; + } + + return sum; +} + +/* + * A writer takes ->writer_mutex to exclude other writers and to force the + * readers to switch to the slow mode, note the mutex_is_locked() check in + * update_fast_ctr(). + * + * After that the readers can only inc/dec the slow ->slow_read_ctr counter, + * ->fast_read_ctr is stable. Once the writer moves its sum into the slow + * counter it represents the number of active readers. + * + * Finally the writer takes ->rw_sem for writing and blocks the new readers, + * then waits until the slow counter becomes zero. + */ +void percpu_down_write(struct percpu_rw_semaphore *brw) +{ + /* also blocks update_fast_ctr() which checks mutex_is_locked() */ + mutex_lock(&brw->writer_mutex); + + /* + * 1. Ensures mutex_is_locked() is visible to any down_read/up_read + * so that update_fast_ctr() can't succeed. + * + * 2. Ensures we see the result of every previous this_cpu_add() in + * update_fast_ctr(). + * + * 3. Ensures that if any reader has exited its critical section via + * fast-path, it executes a full memory barrier before we return. + * See R_W case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ + atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); + + /* block the new readers completely */ + down_write(&brw->rw_sem); + + /* wait for all readers to complete their percpu_up_read() */ + wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); +} + +void percpu_up_write(struct percpu_rw_semaphore *brw) +{ + /* allow the new readers, but only the slow-path */ + up_write(&brw->rw_sem); + + /* + * Insert the barrier before the next fast-path in down_read, + * see W_R case in the comment above update_fast_ctr(). + */ + synchronize_sched_expedited(); + mutex_unlock(&brw->writer_mutex); +} -- cgit v1.1 From 9390ef0c85fd065f01045fef708b046c98cda04c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:36 -0800 Subject: percpu_rw_semaphore: kill ->writer_mutex, add ->write_ctr percpu_rw_semaphore->writer_mutex was only added to simplify the initial rewrite, the only thing it protects is clear_fast_ctr() which otherwise could be called by multiple writers. ->rw_sem is enough to serialize the writers. Kill this mutex and add "atomic_t write_ctr" instead. The writers increment/decrement this counter, the readers check it is zero instead of mutex_is_locked(). Move atomic_add(clear_fast_ctr(), slow_read_ctr) under down_write() to avoid the race with other writers. This is a bit sub-optimal, only the first writer needs this and we do not need to exclude the readers at this stage. But this is simple, we do not want another internal lock until we add more features. And this speeds up the write-contended case. Before this patch the racing writers sleep in synchronize_sched_expedited() sequentially, with this patch multiple synchronize_sched_expedited's can "overlap" with each other. Note: we can do more optimizations, this is only the first step. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/percpu-rwsem.c | 34 ++++++++++++++++------------------ 1 file changed, 16 insertions(+), 18 deletions(-) (limited to 'lib') diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index 2e03bcf..ce92ab5 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include @@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw) if (unlikely(!brw->fast_read_ctr)) return -ENOMEM; - mutex_init(&brw->writer_mutex); init_rwsem(&brw->rw_sem); + atomic_set(&brw->write_ctr, 0); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); return 0; @@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) /* * This is the fast-path for down_read/up_read, it only needs to ensure - * there is no pending writer (!mutex_is_locked() check) and inc/dec the + * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the * fast per-cpu counter. The writer uses synchronize_sched_expedited() to * serialize with the preempt-disabled section below. * @@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) * If this helper fails the callers rely on the normal rw_semaphore and * atomic_dec_and_test(), so in this case we have the necessary barriers. * - * But if it succeeds we do not have any barriers, mutex_is_locked() or + * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or * __this_cpu_add() below can be reordered with any LOAD/STORE done by the * reader inside the critical section. See the comments in down_write and * up_write below. @@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) bool success = false; preempt_disable(); - if (likely(!mutex_is_locked(&brw->writer_mutex))) { + if (likely(!atomic_read(&brw->write_ctr))) { __this_cpu_add(*brw->fast_read_ctr, val); success = true; } @@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) } /* - * A writer takes ->writer_mutex to exclude other writers and to force the - * readers to switch to the slow mode, note the mutex_is_locked() check in - * update_fast_ctr(). + * A writer increments ->write_ctr to force the readers to switch to the + * slow mode, note the atomic_read() check in update_fast_ctr(). * * After that the readers can only inc/dec the slow ->slow_read_ctr counter, * ->fast_read_ctr is stable. Once the writer moves its sum into the slow @@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) */ void percpu_down_write(struct percpu_rw_semaphore *brw) { - /* also blocks update_fast_ctr() which checks mutex_is_locked() */ - mutex_lock(&brw->writer_mutex); - + /* tell update_fast_ctr() there is a pending writer */ + atomic_inc(&brw->write_ctr); /* - * 1. Ensures mutex_is_locked() is visible to any down_read/up_read + * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read * so that update_fast_ctr() can't succeed. * * 2. Ensures we see the result of every previous this_cpu_add() in @@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) */ synchronize_sched_expedited(); + /* exclude other writers, and block the new readers completely */ + down_write(&brw->rw_sem); + /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); - /* block the new readers completely */ - down_write(&brw->rw_sem); - /* wait for all readers to complete their percpu_up_read() */ wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); } void percpu_up_write(struct percpu_rw_semaphore *brw) { - /* allow the new readers, but only the slow-path */ + /* release the lock, but the readers can't use the fast-path */ up_write(&brw->rw_sem); - /* * Insert the barrier before the next fast-path in down_read, * see W_R case in the comment above update_fast_ctr(). */ synchronize_sched_expedited(); - mutex_unlock(&brw->writer_mutex); + /* the last writer unblocks update_fast_ctr() */ + atomic_dec(&brw->write_ctr); } -- cgit v1.1 From 8ebe34731a0d1a9d89b536430afd98d0fadec99b Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:38 -0800 Subject: percpu_rw_semaphore: add lockdep annotations Add lockdep annotations. Not only this can help to find the potential problems, we do not want the false warnings if, say, the task takes two different percpu_rw_semaphore's for reading. IOW, at least ->rw_sem should not use a single class. This patch exposes this internal lock to lockdep so that it represents the whole percpu_rw_semaphore. This way we do not need to add another "fake" ->lockdep_map and lock_class_key. More importantly, this also makes the output from lockdep much more understandable if it finds the problem. In short, with this patch from lockdep pov percpu_down_read() and percpu_up_read() acquire/release ->rw_sem for reading, this matches the actual semantics. This abuses __up_read() but I hope this is fine and in fact I'd like to have down_read_no_lockdep() as well, percpu_down_read_recursive_readers() will need it. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/percpu-rwsem.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index ce92ab5..652a8ee 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c @@ -2,18 +2,21 @@ #include #include #include +#include #include #include #include #include -int percpu_init_rwsem(struct percpu_rw_semaphore *brw) +int __percpu_init_rwsem(struct percpu_rw_semaphore *brw, + const char *name, struct lock_class_key *rwsem_key) { brw->fast_read_ctr = alloc_percpu(int); if (unlikely(!brw->fast_read_ctr)) return -ENOMEM; - init_rwsem(&brw->rw_sem); + /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */ + __init_rwsem(&brw->rw_sem, name, rwsem_key); atomic_set(&brw->write_ctr, 0); atomic_set(&brw->slow_read_ctr, 0); init_waitqueue_head(&brw->write_waitq); @@ -66,19 +69,29 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) /* * Like the normal down_read() this is not recursive, the writer can * come after the first percpu_down_read() and create the deadlock. + * + * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep, + * percpu_up_read() does rwsem_release(). This pairs with the usage + * of ->rw_sem in percpu_down/up_write(). */ void percpu_down_read(struct percpu_rw_semaphore *brw) { - if (likely(update_fast_ctr(brw, +1))) + might_sleep(); + if (likely(update_fast_ctr(brw, +1))) { + rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_); return; + } down_read(&brw->rw_sem); atomic_inc(&brw->slow_read_ctr); - up_read(&brw->rw_sem); + /* avoid up_read()->rwsem_release() */ + __up_read(&brw->rw_sem); } void percpu_up_read(struct percpu_rw_semaphore *brw) { + rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_); + if (likely(update_fast_ctr(brw, -1))) return; -- cgit v1.1 From 22b361d1df7bc25b558e52f22e779286a3d323e4 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 17 Dec 2012 16:01:39 -0800 Subject: percpu_rw_semaphore: introduce CONFIG_PERCPU_RWSEM Currently only block_dev and uprobes use percpu_rw_semaphore, add the config option selected by BLOCK || UPROBES. Signed-off-by: Oleg Nesterov Cc: Anton Arapov Cc: Ingo Molnar Cc: Linus Torvalds Cc: Michal Marek Cc: Mikulas Patocka Cc: "Paul E. McKenney" Cc: Peter Zijlstra Cc: Srikar Dronamraju Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig | 3 +++ lib/Makefile | 3 ++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index 4b31a46..75cdb77 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -42,6 +42,9 @@ config GENERIC_IO config STMP_DEVICE bool +config PERCPU_RWSEM + boolean + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/Makefile b/lib/Makefile index e959c20..5558e35 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -9,7 +9,7 @@ endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ rbtree.o radix-tree.o dump_stack.o timerqueue.o\ - idr.o int_sqrt.o extable.o percpu-rwsem.o \ + idr.o int_sqrt.o extable.o \ sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ @@ -40,6 +40,7 @@ obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o +lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o -- cgit v1.1 From 918854a65e856574523d94763ef2a2b48ad55a25 Mon Sep 17 00:00:00 2001 From: Cong Ding Date: Mon, 17 Dec 2012 16:01:43 -0800 Subject: lib/rbtree_test.c: fix uninitialized variable warning Fix this warning: lib/rbtree_test.c: In function `check': lib/rbtree_test.c:121: warning: `blacks' may be used uninitialized in this function Signed-off-by: Cong Ding Cc: Michel Lespinasse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/rbtree_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 268b239..d7f491a 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -118,7 +118,7 @@ static void check(int nr_nodes) { struct rb_node *rb; int count = 0; - int blacks; + int blacks = 0; u32 prev_key = 0; for (rb = rb_first(&root); rb; rb = rb_next(rb)) { -- cgit v1.1 From 4c925d6031f719fad6ea8b1c94a636f4c0fea39b Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Mon, 17 Dec 2012 16:03:04 -0800 Subject: kstrto*: add documentation As Bruce Fields pointed out, kstrto* is currently lacking kerneldoc comments. This patch adds kerneldoc comments to common variants of kstrto*: kstrto(u)l, kstrto(u)ll and kstrto(u)int. Signed-off-by: Eldad Zack Cc: J. Bruce Fields Cc: Joe Perches Cc: Randy Dunlap Cc: Alexey Dobriyan Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/kstrtox.c | 64 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) (limited to 'lib') diff --git a/lib/kstrtox.c b/lib/kstrtox.c index c3615ea..f78ae0c 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -104,6 +104,22 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) return 0; } +/** + * kstrtoull - convert a string to an unsigned long long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) { if (s[0] == '+') @@ -112,6 +128,22 @@ int kstrtoull(const char *s, unsigned int base, unsigned long long *res) } EXPORT_SYMBOL(kstrtoull); +/** + * kstrtoll - convert a string to a long long + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtoll(const char *s, unsigned int base, long long *res) { unsigned long long tmp; @@ -168,6 +200,22 @@ int _kstrtol(const char *s, unsigned int base, long *res) } EXPORT_SYMBOL(_kstrtol); +/** + * kstrtouint - convert a string to an unsigned int + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign, but not a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtouint(const char *s, unsigned int base, unsigned int *res) { unsigned long long tmp; @@ -183,6 +231,22 @@ int kstrtouint(const char *s, unsigned int base, unsigned int *res) } EXPORT_SYMBOL(kstrtouint); +/** + * kstrtoint - convert a string to an int + * @s: The start of the string. The string must be null-terminated, and may also + * include a single newline before its terminating null. The first character + * may also be a plus sign or a minus sign. + * @base: The number base to use. The maximum supported base is 16. If base is + * given as 0, then the base of the string is automatically detected with the + * conventional semantics - If it begins with 0x the number will be parsed as a + * hexadecimal (case insensitive), if it otherwise begins with 0, it will be + * parsed as an octal number. Otherwise it will be parsed as a decimal. + * @res: Where to write the result of the conversion on success. + * + * Returns 0 on success, -ERANGE on overflow and -EINVAL on parsing error. + * Used as a replacement for the obsolete simple_strtoull. Return code must + * be checked. + */ int kstrtoint(const char *s, unsigned int base, int *res) { long long tmp; -- cgit v1.1 From 462e471107624fe9bd8b6353ac13e06305c3f3fd Mon Sep 17 00:00:00 2001 From: Eldad Zack Date: Mon, 17 Dec 2012 16:03:05 -0800 Subject: simple_strto*: annotate function as obsolete Update the documentation for simple_strto* to reflect that it has been obsoleted and advise the usage of kstrto*. Signed-off-by: Eldad Zack Cc: J. Bruce Fields Cc: Joe Perches Cc: Randy Dunlap Cc: Alexey Dobriyan Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'lib') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 292fcb1..fab33a9 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -38,6 +38,8 @@ * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtoull instead. */ unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base) { @@ -61,6 +63,8 @@ EXPORT_SYMBOL(simple_strtoull); * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtoul instead. */ unsigned long simple_strtoul(const char *cp, char **endp, unsigned int base) { @@ -73,6 +77,8 @@ EXPORT_SYMBOL(simple_strtoul); * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtol instead. */ long simple_strtol(const char *cp, char **endp, unsigned int base) { @@ -88,6 +94,8 @@ EXPORT_SYMBOL(simple_strtol); * @cp: The start of the string * @endp: A pointer to the end of the parsed string will be placed here * @base: The number base to use + * + * This function is obsolete. Please use kstrtoll instead. */ long long simple_strtoll(const char *cp, char **endp, unsigned int base) { -- cgit v1.1 From 496f2f93b1cc286f5a4f4f9acdc1e5314978683f Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 17 Dec 2012 16:04:23 -0800 Subject: random32: rename random32 to prandom This renames all random32 functions to have 'prandom_' prefix as follows: void prandom_seed(u32 seed); /* rename from srandom32() */ u32 prandom_u32(void); /* rename from random32() */ void prandom_seed_state(struct rnd_state *state, u64 seed); /* rename from prandom32_seed() */ u32 prandom_u32_state(struct rnd_state *state); /* rename from prandom32() */ The purpose of this renaming is to prevent some kernel developers from assuming that prandom32() and random32() might imply that only prandom32() was the one using a pseudo-random number generator by prandom32's "p", and the result may be a very embarassing security exposure. This concern was expressed by Theodore Ts'o. And furthermore, I'm going to introduce new functions for getting the requested number of pseudo-random bytes. If I continue to use both prandom32 and random32 prefixes for these functions, the confusion is getting worse. As a result of this renaming, "prandom_" is the common prefix for pseudo-random number library. Currently, srandom32() and random32() are preserved because it is difficult to rename too many users at once. Signed-off-by: Akinobu Mita Cc: "Theodore Ts'o" Cc: Robert Love Cc: Michel Lespinasse Cc: Valdis Kletnieks Cc: David Laight Cc: Adrian Hunter Cc: Artem Bityutskiy Cc: David Woodhouse Cc: Eilon Greenstein Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/interval_tree_test_main.c | 7 ++++--- lib/random32.c | 48 +++++++++++++++++++++---------------------- lib/rbtree_test.c | 6 +++--- 3 files changed, 31 insertions(+), 30 deletions(-) (limited to 'lib') diff --git a/lib/interval_tree_test_main.c b/lib/interval_tree_test_main.c index b259039..245900b 100644 --- a/lib/interval_tree_test_main.c +++ b/lib/interval_tree_test_main.c @@ -30,7 +30,8 @@ static void init(void) { int i; for (i = 0; i < NODES; i++) { - u32 a = prandom32(&rnd), b = prandom32(&rnd); + u32 a = prandom_u32_state(&rnd); + u32 b = prandom_u32_state(&rnd); if (a <= b) { nodes[i].start = a; nodes[i].last = b; @@ -40,7 +41,7 @@ static void init(void) } } for (i = 0; i < SEARCHES; i++) - queries[i] = prandom32(&rnd); + queries[i] = prandom_u32_state(&rnd); } static int interval_tree_test_init(void) @@ -51,7 +52,7 @@ static int interval_tree_test_init(void) printk(KERN_ALERT "interval tree insert/remove"); - prandom32_seed(&rnd, 3141592653589793238ULL); + prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); diff --git a/lib/random32.c b/lib/random32.c index 938bde5..d1830fa 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -42,13 +42,13 @@ static DEFINE_PER_CPU(struct rnd_state, net_rand_state); /** - * prandom32 - seeded pseudo-random number generator. + * prandom_u32_state - seeded pseudo-random number generator. * @state: pointer to state structure holding seeded state. * * This is used for pseudo-randomness with no outside seeding. - * For more random results, use random32(). + * For more random results, use prandom_u32(). */ -u32 prandom32(struct rnd_state *state) +u32 prandom_u32_state(struct rnd_state *state) { #define TAUSWORTHE(s,a,b,c,d) ((s&c)<>b) @@ -58,32 +58,32 @@ u32 prandom32(struct rnd_state *state) return (state->s1 ^ state->s2 ^ state->s3); } -EXPORT_SYMBOL(prandom32); +EXPORT_SYMBOL(prandom_u32_state); /** - * random32 - pseudo random number generator + * prandom_u32 - pseudo random number generator * * A 32 bit pseudo-random number is generated using a fast * algorithm suitable for simulation. This algorithm is NOT * considered safe for cryptographic use. */ -u32 random32(void) +u32 prandom_u32(void) { unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = prandom32(state); + r = prandom_u32_state(state); put_cpu_var(state); return r; } -EXPORT_SYMBOL(random32); +EXPORT_SYMBOL(prandom_u32); /** - * srandom32 - add entropy to pseudo random number generator + * prandom_seed - add entropy to pseudo random number generator * @seed: seed value * - * Add some additional seeding to the random32() pool. + * Add some additional seeding to the prandom pool. */ -void srandom32(u32 entropy) +void prandom_seed(u32 entropy) { int i; /* @@ -95,13 +95,13 @@ void srandom32(u32 entropy) state->s1 = __seed(state->s1 ^ entropy, 1); } } -EXPORT_SYMBOL(srandom32); +EXPORT_SYMBOL(prandom_seed); /* * Generate some initially weak seeding values to allow - * to start the random32() engine. + * to start the prandom_u32() engine. */ -static int __init random32_init(void) +static int __init prandom_init(void) { int i; @@ -114,22 +114,22 @@ static int __init random32_init(void) state->s3 = __seed(LCG(state->s2), 15); /* "warm it up" */ - prandom32(state); - prandom32(state); - prandom32(state); - prandom32(state); - prandom32(state); - prandom32(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); + prandom_u32_state(state); } return 0; } -core_initcall(random32_init); +core_initcall(prandom_init); /* * Generate better values after random number generator * is fully initialized. */ -static int __init random32_reseed(void) +static int __init prandom_reseed(void) { int i; @@ -143,8 +143,8 @@ static int __init random32_reseed(void) state->s3 = __seed(seeds[2], 15); /* mix it in */ - prandom32(state); + prandom_u32_state(state); } return 0; } -late_initcall(random32_reseed); +late_initcall(prandom_reseed); diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index d7f491a..af38aed 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -96,8 +96,8 @@ static void init(void) { int i; for (i = 0; i < NODES; i++) { - nodes[i].key = prandom32(&rnd); - nodes[i].val = prandom32(&rnd); + nodes[i].key = prandom_u32_state(&rnd); + nodes[i].val = prandom_u32_state(&rnd); } } @@ -155,7 +155,7 @@ static int rbtree_test_init(void) printk(KERN_ALERT "rbtree testing"); - prandom32_seed(&rnd, 3141592653589793238ULL); + prandom_seed_state(&rnd, 3141592653589793238ULL); init(); time1 = get_cycles(); -- cgit v1.1 From 6582c665d6b882dad8329e05749fbcf119f1ab88 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Mon, 17 Dec 2012 16:04:25 -0800 Subject: prandom: introduce prandom_bytes() and prandom_bytes_state() Add functions to get the requested number of pseudo-random bytes. The difference from get_random_bytes() is that it generates pseudo-random numbers by prandom_u32(). It doesn't consume the entropy pool, and the sequence is reproducible if the same rnd_state is used. So it is suitable for generating random bytes for testing. Signed-off-by: Akinobu Mita Cc: "Theodore Ts'o" Cc: Artem Bityutskiy Cc: Adrian Hunter Cc: David Woodhouse Cc: Eilon Greenstein Cc: David Laight Cc: Michel Lespinasse Cc: Robert Love Cc: Valdis Kletnieks Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/random32.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) (limited to 'lib') diff --git a/lib/random32.c b/lib/random32.c index d1830fa..52280d5 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -77,6 +77,55 @@ u32 prandom_u32(void) } EXPORT_SYMBOL(prandom_u32); +/* + * prandom_bytes_state - get the requested number of pseudo-random bytes + * + * @state: pointer to state structure holding seeded state. + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + * + * This is used for pseudo-randomness with no outside seeding. + * For more random results, use prandom_bytes(). + */ +void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes) +{ + unsigned char *p = buf; + int i; + + for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) { + u32 random = prandom_u32_state(state); + int j; + + for (j = 0; j < sizeof(u32); j++) { + p[i + j] = random; + random >>= BITS_PER_BYTE; + } + } + if (i < bytes) { + u32 random = prandom_u32_state(state); + + for (; i < bytes; i++) { + p[i] = random; + random >>= BITS_PER_BYTE; + } + } +} +EXPORT_SYMBOL(prandom_bytes_state); + +/** + * prandom_bytes - get the requested number of pseudo-random bytes + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + */ +void prandom_bytes(void *buf, int bytes) +{ + struct rnd_state *state = &get_cpu_var(net_rand_state); + + prandom_bytes_state(state, buf, bytes); + put_cpu_var(state); +} +EXPORT_SYMBOL(prandom_bytes); + /** * prandom_seed - add entropy to pseudo random number generator * @seed: seed value -- cgit v1.1 From 6fd59a83b9261fa53eaf98fb5514abba504a3ea3 Mon Sep 17 00:00:00 2001 From: Nick Bowler Date: Mon, 17 Dec 2012 16:05:20 -0800 Subject: scatterlist: don't BUG when we can trivially return a proper error. There is absolutely no reason to crash the kernel when we have a perfectly good return value already available to use for conveying failure status. Let's return an error code instead of crashing the kernel: that sounds like a much better plan. [akpm@linux-foundation.org: s/E2BIG/EINVAL/] Signed-off-by: Nick Bowler Cc: Maxim Levitsky Cc: Tejun Heo Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/scatterlist.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 3675452b..7874b01 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -248,7 +248,8 @@ int __sg_alloc_table(struct sg_table *table, unsigned int nents, unsigned int left; #ifndef ARCH_HAS_SG_CHAIN - BUG_ON(nents > max_ents); + if (WARN_ON_ONCE(nents > max_ents)) + return -EINVAL; #endif memset(table, 0, sizeof(*table)); -- cgit v1.1