diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/futex.c | 100 | ||||
-rw-r--r-- | kernel/locking/Makefile | 1 | ||||
-rw-r--r-- | kernel/locking/qrwlock.c | 23 | ||||
-rw-r--r-- | kernel/locking/rtmutex-tester.c | 420 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 2 | ||||
-rw-r--r-- | kernel/locking/rtmutex_common.h | 22 | ||||
-rw-r--r-- | kernel/resource.c | 6 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 52 |
8 files changed, 146 insertions, 480 deletions
diff --git a/kernel/futex.c b/kernel/futex.c index c4a182f..6e443ef 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -64,6 +64,7 @@ #include <linux/hugetlb.h> #include <linux/freezer.h> #include <linux/bootmem.h> +#include <linux/fault-inject.h> #include <asm/futex.h> @@ -258,6 +259,66 @@ static unsigned long __read_mostly futex_hashsize; static struct futex_hash_bucket *futex_queues; +/* + * Fault injections for futexes. + */ +#ifdef CONFIG_FAIL_FUTEX + +static struct { + struct fault_attr attr; + + u32 ignore_private; +} fail_futex = { + .attr = FAULT_ATTR_INITIALIZER, + .ignore_private = 0, +}; + +static int __init setup_fail_futex(char *str) +{ + return setup_fault_attr(&fail_futex.attr, str); +} +__setup("fail_futex=", setup_fail_futex); + +static bool should_fail_futex(bool fshared) +{ + if (fail_futex.ignore_private && !fshared) + return false; + + return should_fail(&fail_futex.attr, 1); +} + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + +static int __init fail_futex_debugfs(void) +{ + umode_t mode = S_IFREG | S_IRUSR | S_IWUSR; + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_futex", NULL, + &fail_futex.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + if (!debugfs_create_bool("ignore-private", mode, dir, + &fail_futex.ignore_private)) { + debugfs_remove_recursive(dir); + return -ENOMEM; + } + + return 0; +} + +late_initcall(fail_futex_debugfs); + +#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ + +#else +static inline bool should_fail_futex(bool fshared) +{ + return false; +} +#endif /* CONFIG_FAIL_FUTEX */ + static inline void futex_get_mm(union futex_key *key) { atomic_inc(&key->private.mm->mm_count); @@ -413,6 +474,9 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) if (unlikely(!access_ok(rw, uaddr, sizeof(u32)))) return -EFAULT; + if (unlikely(should_fail_futex(fshared))) + return -EFAULT; + /* * PROCESS_PRIVATE futexes are fast. * As the mm cannot disappear under us and the 'key' only needs @@ -428,6 +492,10 @@ get_futex_key(u32 __user *uaddr, int fshared, union futex_key *key, int rw) } again: + /* Ignore any VERIFY_READ mapping (futex common case) */ + if (unlikely(should_fail_futex(fshared))) + return -EFAULT; + err = get_user_pages_fast(address, 1, 1, &page); /* * If write access is not required (eg. FUTEX_WAIT), try @@ -516,7 +584,7 @@ again: * A RO anonymous page will never change and thus doesn't make * sense for futex operations. */ - if (ro) { + if (unlikely(should_fail_futex(fshared)) || ro) { err = -EFAULT; goto out; } @@ -974,6 +1042,9 @@ static int lock_pi_update_atomic(u32 __user *uaddr, u32 uval, u32 newval) { u32 uninitialized_var(curval); + if (unlikely(should_fail_futex(true))) + return -EFAULT; + if (unlikely(cmpxchg_futex_value_locked(&curval, uaddr, uval, newval))) return -EFAULT; @@ -1015,12 +1086,18 @@ static int futex_lock_pi_atomic(u32 __user *uaddr, struct futex_hash_bucket *hb, if (get_futex_value_locked(&uval, uaddr)) return -EFAULT; + if (unlikely(should_fail_futex(true))) + return -EFAULT; + /* * Detect deadlocks. */ if ((unlikely((uval & FUTEX_TID_MASK) == vpid))) return -EDEADLK; + if ((unlikely(should_fail_futex(true)))) + return -EDEADLK; + /* * Lookup existing state first. If it exists, try to attach to * its pi_state. @@ -1155,6 +1232,9 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, */ newval = FUTEX_WAITERS | task_pid_vnr(new_owner); + if (unlikely(should_fail_futex(true))) + ret = -EFAULT; + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) ret = -EFAULT; else if (curval != uval) @@ -1457,6 +1537,9 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex, if (get_futex_value_locked(&curval, pifutex)) return -EFAULT; + if (unlikely(should_fail_futex(true))) + return -EFAULT; + /* * Find the top_waiter and determine if there are additional waiters. * If the caller intends to requeue more than 1 waiter to pifutex, @@ -2268,8 +2351,11 @@ static long futex_wait_restart(struct restart_block *restart) /* * Userspace tried a 0 -> TID atomic transition of the futex value * and failed. The kernel side here does the whole locking operation: - * if there are waiters then it will block, it does PI, etc. (Due to - * races the kernel might see a 0 value of the futex too.) + * if there are waiters then it will block as a consequence of relying + * on rt-mutexes, it does PI, etc. (Due to races the kernel might see + * a 0 value of the futex too.). + * + * Also serves as futex trylock_pi()'ing, and due semantics. */ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags, ktime_t *time, int trylock) @@ -2300,6 +2386,10 @@ retry_private: ret = futex_lock_pi_atomic(uaddr, hb, &q.key, &q.pi_state, current, 0); if (unlikely(ret)) { + /* + * Atomic work succeeded and we got the lock, + * or failed. Either way, we do _not_ block. + */ switch (ret) { case 1: /* We got the lock. */ @@ -2530,7 +2620,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb, * futex_wait_requeue_pi() - Wait on uaddr and take uaddr2 * @uaddr: the futex we initially wait on (non-pi) * @flags: futex flags (FLAGS_SHARED, FLAGS_CLOCKRT, etc.), they must be - * the same type, no requeueing from private to shared, etc. + * the same type, no requeueing from private to shared, etc. * @val: the expected value of uaddr * @abs_time: absolute timeout * @bitset: 32 bit wakeup bitset set by userspace, defaults to all @@ -3005,6 +3095,8 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val, if (utime && (cmd == FUTEX_WAIT || cmd == FUTEX_LOCK_PI || cmd == FUTEX_WAIT_BITSET || cmd == FUTEX_WAIT_REQUEUE_PI)) { + if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG)))) + return -EFAULT; if (copy_from_user(&ts, utime, sizeof(ts)) != 0) return -EFAULT; if (!timespec_valid(&ts)) diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 7dd5c99..3694204 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile @@ -20,7 +20,6 @@ obj-$(CONFIG_PROVE_LOCKING) += spinlock.o obj-$(CONFIG_QUEUED_SPINLOCKS) += qspinlock.o obj-$(CONFIG_RT_MUTEXES) += rtmutex.o obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o -obj-$(CONFIG_RT_MUTEX_TESTER) += rtmutex-tester.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c index 6c5da483..d9c36c5 100644 --- a/kernel/locking/qrwlock.c +++ b/kernel/locking/qrwlock.c @@ -60,22 +60,23 @@ rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts) } /** - * queue_read_lock_slowpath - acquire read lock of a queue rwlock + * queued_read_lock_slowpath - acquire read lock of a queue rwlock * @lock: Pointer to queue rwlock structure + * @cnts: Current qrwlock lock value */ -void queue_read_lock_slowpath(struct qrwlock *lock) +void queued_read_lock_slowpath(struct qrwlock *lock, u32 cnts) { - u32 cnts; - /* * Readers come here when they cannot get the lock without waiting */ if (unlikely(in_interrupt())) { /* - * Readers in interrupt context will spin until the lock is - * available without waiting in the queue. + * Readers in interrupt context will get the lock immediately + * if the writer is just waiting (not holding the lock yet). + * The rspin_until_writer_unlock() function returns immediately + * in this case. Otherwise, they will spin until the lock + * is available without waiting in the queue. */ - cnts = smp_load_acquire((u32 *)&lock->cnts); rspin_until_writer_unlock(lock, cnts); return; } @@ -104,13 +105,13 @@ void queue_read_lock_slowpath(struct qrwlock *lock) */ arch_spin_unlock(&lock->lock); } -EXPORT_SYMBOL(queue_read_lock_slowpath); +EXPORT_SYMBOL(queued_read_lock_slowpath); /** - * queue_write_lock_slowpath - acquire write lock of a queue rwlock + * queued_write_lock_slowpath - acquire write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ -void queue_write_lock_slowpath(struct qrwlock *lock) +void queued_write_lock_slowpath(struct qrwlock *lock) { u32 cnts; @@ -149,4 +150,4 @@ void queue_write_lock_slowpath(struct qrwlock *lock) unlock: arch_spin_unlock(&lock->lock); } -EXPORT_SYMBOL(queue_write_lock_slowpath); +EXPORT_SYMBOL(queued_write_lock_slowpath); diff --git a/kernel/locking/rtmutex-tester.c b/kernel/locking/rtmutex-tester.c deleted file mode 100644 index 1d96dd0..0000000 --- a/kernel/locking/rtmutex-tester.c +++ /dev/null @@ -1,420 +0,0 @@ -/* - * RT-Mutex-tester: scriptable tester for rt mutexes - * - * started by Thomas Gleixner: - * - * Copyright (C) 2006, Timesys Corp., Thomas Gleixner <tglx@timesys.com> - * - */ -#include <linux/device.h> -#include <linux/kthread.h> -#include <linux/export.h> -#include <linux/sched.h> -#include <linux/sched/rt.h> -#include <linux/spinlock.h> -#include <linux/timer.h> -#include <linux/freezer.h> -#include <linux/stat.h> - -#include "rtmutex.h" - -#define MAX_RT_TEST_THREADS 8 -#define MAX_RT_TEST_MUTEXES 8 - -static spinlock_t rttest_lock; -static atomic_t rttest_event; - -struct test_thread_data { - int opcode; - int opdata; - int mutexes[MAX_RT_TEST_MUTEXES]; - int event; - struct device dev; -}; - -static struct test_thread_data thread_data[MAX_RT_TEST_THREADS]; -static struct task_struct *threads[MAX_RT_TEST_THREADS]; -static struct rt_mutex mutexes[MAX_RT_TEST_MUTEXES]; - -enum test_opcodes { - RTTEST_NOP = 0, - RTTEST_SCHEDOT, /* 1 Sched other, data = nice */ - RTTEST_SCHEDRT, /* 2 Sched fifo, data = prio */ - RTTEST_LOCK, /* 3 Lock uninterruptible, data = lockindex */ - RTTEST_LOCKNOWAIT, /* 4 Lock uninterruptible no wait in wakeup, data = lockindex */ - RTTEST_LOCKINT, /* 5 Lock interruptible, data = lockindex */ - RTTEST_LOCKINTNOWAIT, /* 6 Lock interruptible no wait in wakeup, data = lockindex */ - RTTEST_LOCKCONT, /* 7 Continue locking after the wakeup delay */ - RTTEST_UNLOCK, /* 8 Unlock, data = lockindex */ - /* 9, 10 - reserved for BKL commemoration */ - RTTEST_SIGNAL = 11, /* 11 Signal other test thread, data = thread id */ - RTTEST_RESETEVENT = 98, /* 98 Reset event counter */ - RTTEST_RESET = 99, /* 99 Reset all pending operations */ -}; - -static int handle_op(struct test_thread_data *td, int lockwakeup) -{ - int i, id, ret = -EINVAL; - - switch(td->opcode) { - - case RTTEST_NOP: - return 0; - - case RTTEST_LOCKCONT: - td->mutexes[td->opdata] = 1; - td->event = atomic_add_return(1, &rttest_event); - return 0; - - case RTTEST_RESET: - for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) { - if (td->mutexes[i] == 4) { - rt_mutex_unlock(&mutexes[i]); - td->mutexes[i] = 0; - } - } - return 0; - - case RTTEST_RESETEVENT: - atomic_set(&rttest_event, 0); - return 0; - - default: - if (lockwakeup) - return ret; - } - - switch(td->opcode) { - - case RTTEST_LOCK: - case RTTEST_LOCKNOWAIT: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES) - return ret; - - td->mutexes[id] = 1; - td->event = atomic_add_return(1, &rttest_event); - rt_mutex_lock(&mutexes[id]); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = 4; - return 0; - - case RTTEST_LOCKINT: - case RTTEST_LOCKINTNOWAIT: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES) - return ret; - - td->mutexes[id] = 1; - td->event = atomic_add_return(1, &rttest_event); - ret = rt_mutex_lock_interruptible(&mutexes[id], 0); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = ret ? 0 : 4; - return ret ? -EINTR : 0; - - case RTTEST_UNLOCK: - id = td->opdata; - if (id < 0 || id >= MAX_RT_TEST_MUTEXES || td->mutexes[id] != 4) - return ret; - - td->event = atomic_add_return(1, &rttest_event); - rt_mutex_unlock(&mutexes[id]); - td->event = atomic_add_return(1, &rttest_event); - td->mutexes[id] = 0; - return 0; - - default: - break; - } - return ret; -} - -/* - * Schedule replacement for rtsem_down(). Only called for threads with - * PF_MUTEX_TESTER set. - * - * This allows us to have finegrained control over the event flow. - * - */ -void schedule_rt_mutex_test(struct rt_mutex *mutex) -{ - int tid, op, dat; - struct test_thread_data *td; - - /* We have to lookup the task */ - for (tid = 0; tid < MAX_RT_TEST_THREADS; tid++) { - if (threads[tid] == current) - break; - } - - BUG_ON(tid == MAX_RT_TEST_THREADS); - - td = &thread_data[tid]; - - op = td->opcode; - dat = td->opdata; - - switch (op) { - case RTTEST_LOCK: - case RTTEST_LOCKINT: - case RTTEST_LOCKNOWAIT: - case RTTEST_LOCKINTNOWAIT: - if (mutex != &mutexes[dat]) - break; - - if (td->mutexes[dat] != 1) - break; - - td->mutexes[dat] = 2; - td->event = atomic_add_return(1, &rttest_event); - break; - - default: - break; - } - - schedule(); - - - switch (op) { - case RTTEST_LOCK: - case RTTEST_LOCKINT: - if (mutex != &mutexes[dat]) - return; - - if (td->mutexes[dat] != 2) - return; - - td->mutexes[dat] = 3; - td->event = atomic_add_return(1, &rttest_event); - break; - - case RTTEST_LOCKNOWAIT: - case RTTEST_LOCKINTNOWAIT: - if (mutex != &mutexes[dat]) - return; - - if (td->mutexes[dat] != 2) - return; - - td->mutexes[dat] = 1; - td->event = atomic_add_return(1, &rttest_event); - return; - - default: - return; - } - - td->opcode = 0; - - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - - if (td->opcode > 0) { - int ret; - - set_current_state(TASK_RUNNING); - ret = handle_op(td, 1); - set_current_state(TASK_INTERRUPTIBLE); - if (td->opcode == RTTEST_LOCKCONT) - break; - td->opcode = ret; - } - - /* Wait for the next command to be executed */ - schedule(); - } - - /* Restore previous command and data */ - td->opcode = op; - td->opdata = dat; -} - -static int test_func(void *data) -{ - struct test_thread_data *td = data; - int ret; - - current->flags |= PF_MUTEX_TESTER; - set_freezable(); - allow_signal(SIGHUP); - - for(;;) { - - set_current_state(TASK_INTERRUPTIBLE); - - if (td->opcode > 0) { - set_current_state(TASK_RUNNING); - ret = handle_op(td, 0); - set_current_state(TASK_INTERRUPTIBLE); - td->opcode = ret; - } - - /* Wait for the next command to be executed */ - schedule(); - try_to_freeze(); - - if (signal_pending(current)) - flush_signals(current); - - if(kthread_should_stop()) - break; - } - return 0; -} - -/** - * sysfs_test_command - interface for test commands - * @dev: thread reference - * @buf: command for actual step - * @count: length of buffer - * - * command syntax: - * - * opcode:data - */ -static ssize_t sysfs_test_command(struct device *dev, struct device_attribute *attr, - const char *buf, size_t count) -{ - struct sched_param schedpar; - struct test_thread_data *td; - char cmdbuf[32]; - int op, dat, tid, ret; - - td = container_of(dev, struct test_thread_data, dev); - tid = td->dev.id; - - /* strings from sysfs write are not 0 terminated! */ - if (count >= sizeof(cmdbuf)) - return -EINVAL; - - /* strip of \n: */ - if (buf[count-1] == '\n') - count--; - if (count < 1) - return -EINVAL; - - memcpy(cmdbuf, buf, count); - cmdbuf[count] = 0; - - if (sscanf(cmdbuf, "%d:%d", &op, &dat) != 2) - return -EINVAL; - - switch (op) { - case RTTEST_SCHEDOT: - schedpar.sched_priority = 0; - ret = sched_setscheduler(threads[tid], SCHED_NORMAL, &schedpar); - if (ret) - return ret; - set_user_nice(current, 0); - break; - - case RTTEST_SCHEDRT: - schedpar.sched_priority = dat; - ret = sched_setscheduler(threads[tid], SCHED_FIFO, &schedpar); - if (ret) - return ret; - break; - - case RTTEST_SIGNAL: - send_sig(SIGHUP, threads[tid], 0); - break; - - default: - if (td->opcode > 0) - return -EBUSY; - td->opdata = dat; - td->opcode = op; - wake_up_process(threads[tid]); - } - - return count; -} - -/** - * sysfs_test_status - sysfs interface for rt tester - * @dev: thread to query - * @buf: char buffer to be filled with thread status info - */ -static ssize_t sysfs_test_status(struct device *dev, struct device_attribute *attr, - char *buf) -{ - struct test_thread_data *td; - struct task_struct *tsk; - char *curr = buf; - int i; - - td = container_of(dev, struct test_thread_data, dev); - tsk = threads[td->dev.id]; - - spin_lock(&rttest_lock); - - curr += sprintf(curr, - "O: %4d, E:%8d, S: 0x%08lx, P: %4d, N: %4d, B: %p, M:", - td->opcode, td->event, tsk->state, - (MAX_RT_PRIO - 1) - tsk->prio, - (MAX_RT_PRIO - 1) - tsk->normal_prio, - tsk->pi_blocked_on); - - for (i = MAX_RT_TEST_MUTEXES - 1; i >=0 ; i--) - curr += sprintf(curr, "%d", td->mutexes[i]); - - spin_unlock(&rttest_lock); - - curr += sprintf(curr, ", T: %p, R: %p\n", tsk, - mutexes[td->dev.id].owner); - - return curr - buf; -} - -static DEVICE_ATTR(status, S_IRUSR, sysfs_test_status, NULL); -static DEVICE_ATTR(command, S_IWUSR, NULL, sysfs_test_command); - -static struct bus_type rttest_subsys = { - .name = "rttest", - .dev_name = "rttest", -}; - -static int init_test_thread(int id) -{ - thread_data[id].dev.bus = &rttest_subsys; - thread_data[id].dev.id = id; - - threads[id] = kthread_run(test_func, &thread_data[id], "rt-test-%d", id); - if (IS_ERR(threads[id])) - return PTR_ERR(threads[id]); - - return device_register(&thread_data[id].dev); -} - -static int init_rttest(void) -{ - int ret, i; - - spin_lock_init(&rttest_lock); - - for (i = 0; i < MAX_RT_TEST_MUTEXES; i++) - rt_mutex_init(&mutexes[i]); - - ret = subsys_system_register(&rttest_subsys, NULL); - if (ret) - return ret; - - for (i = 0; i < MAX_RT_TEST_THREADS; i++) { - ret = init_test_thread(i); - if (ret) - break; - ret = device_create_file(&thread_data[i].dev, &dev_attr_status); - if (ret) - break; - ret = device_create_file(&thread_data[i].dev, &dev_attr_command); - if (ret) - break; - } - - printk("Initializing RT-Tester: %s\n", ret ? "Failed" : "OK" ); - - return ret; -} - -device_initcall(init_rttest); diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 5674b07..7781d80 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1120,7 +1120,7 @@ __rt_mutex_slowlock(struct rt_mutex *lock, int state, debug_rt_mutex_print_deadlock(waiter); - schedule_rt_mutex(lock); + schedule(); raw_spin_lock(&lock->wait_lock); set_current_state(state); diff --git a/kernel/locking/rtmutex_common.h b/kernel/locking/rtmutex_common.h index 7844f8f..4f5f83c 100644 --- a/kernel/locking/rtmutex_common.h +++ b/kernel/locking/rtmutex_common.h @@ -15,28 +15,6 @@ #include <linux/rtmutex.h> /* - * The rtmutex in kernel tester is independent of rtmutex debugging. We - * call schedule_rt_mutex_test() instead of schedule() for the tasks which - * belong to the tester. That way we can delay the wakeup path of those - * threads to provoke lock stealing and testing of complex boosting scenarios. - */ -#ifdef CONFIG_RT_MUTEX_TESTER - -extern void schedule_rt_mutex_test(struct rt_mutex *lock); - -#define schedule_rt_mutex(_lock) \ - do { \ - if (!(current->flags & PF_MUTEX_TESTER)) \ - schedule(); \ - else \ - schedule_rt_mutex_test(_lock); \ - } while (0) - -#else -# define schedule_rt_mutex(_lock) schedule() -#endif - -/* * This is the control structure for tasks blocked on a rt_mutex, * which is allocated on the kernel stack on of the blocked task. * diff --git a/kernel/resource.c b/kernel/resource.c index 90552aa..fed052a 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -504,13 +504,13 @@ int region_is_ram(resource_size_t start, unsigned long size) { struct resource *p; resource_size_t end = start + size - 1; - int flags = IORESOURCE_MEM | IORESOURCE_BUSY; + unsigned long flags = IORESOURCE_MEM | IORESOURCE_BUSY; const char *name = "System RAM"; int ret = -1; read_lock(&resource_lock); for (p = iomem_resource.child; p ; p = p->sibling) { - if (end < p->start) + if (p->end < start) continue; if (p->start <= start && end <= p->end) { @@ -521,7 +521,7 @@ int region_is_ram(resource_size_t start, unsigned long size) ret = 1; break; } - if (p->end < start) + if (end < p->start) break; /* not found */ } read_unlock(&resource_lock); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 02bece4..eb11011 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -98,6 +98,13 @@ struct ftrace_pid { struct pid *pid; }; +static bool ftrace_pids_enabled(void) +{ + return !list_empty(&ftrace_pids); +} + +static void ftrace_update_trampoline(struct ftrace_ops *ops); + /* * ftrace_disabled is set when an anomaly is discovered. * ftrace_disabled is much stronger than ftrace_enabled. @@ -109,7 +116,6 @@ static DEFINE_MUTEX(ftrace_lock); static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end; static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end; ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub; -ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub; static struct ftrace_ops global_ops; static struct ftrace_ops control_ops; @@ -183,14 +189,7 @@ static void ftrace_pid_func(unsigned long ip, unsigned long parent_ip, if (!test_tsk_trace_trace(current)) return; - ftrace_pid_function(ip, parent_ip, op, regs); -} - -static void set_ftrace_pid_function(ftrace_func_t func) -{ - /* do not set ftrace_pid_function to itself! */ - if (func != ftrace_pid_func) - ftrace_pid_function = func; + op->saved_func(ip, parent_ip, op, regs); } /** @@ -202,7 +201,6 @@ static void set_ftrace_pid_function(ftrace_func_t func) void clear_ftrace_function(void) { ftrace_trace_function = ftrace_stub; - ftrace_pid_function = ftrace_stub; } static void control_ops_disable_all(struct ftrace_ops *ops) @@ -436,6 +434,12 @@ static int __register_ftrace_function(struct ftrace_ops *ops) } else add_ftrace_ops(&ftrace_ops_list, ops); + /* Always save the function, and reset at unregistering */ + ops->saved_func = ops->func; + + if (ops->flags & FTRACE_OPS_FL_PID && ftrace_pids_enabled()) + ops->func = ftrace_pid_func; + ftrace_update_trampoline(ops); if (ftrace_enabled) @@ -463,15 +467,28 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops) if (ftrace_enabled) update_ftrace_function(); + ops->func = ops->saved_func; + return 0; } static void ftrace_update_pid_func(void) { + bool enabled = ftrace_pids_enabled(); + struct ftrace_ops *op; + /* Only do something if we are tracing something */ if (ftrace_trace_function == ftrace_stub) return; + do_for_each_ftrace_op(op, ftrace_ops_list) { + if (op->flags & FTRACE_OPS_FL_PID) { + op->func = enabled ? ftrace_pid_func : + op->saved_func; + ftrace_update_trampoline(op); + } + } while_for_each_ftrace_op(op); + update_ftrace_function(); } @@ -1133,7 +1150,8 @@ static struct ftrace_ops global_ops = { .local_hash.filter_hash = EMPTY_HASH, INIT_OPS_HASH(global_ops) .flags = FTRACE_OPS_FL_RECURSION_SAFE | - FTRACE_OPS_FL_INITIALIZED, + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; /* @@ -5023,7 +5041,9 @@ static void ftrace_update_trampoline(struct ftrace_ops *ops) static struct ftrace_ops global_ops = { .func = ftrace_stub, - .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED, + .flags = FTRACE_OPS_FL_RECURSION_SAFE | + FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID, }; static int __init ftrace_nodyn_init(void) @@ -5080,11 +5100,6 @@ void ftrace_init_array_ops(struct trace_array *tr, ftrace_func_t func) if (WARN_ON(tr->ops->func != ftrace_stub)) printk("ftrace ops had %pS for function\n", tr->ops->func); - /* Only the top level instance does pid tracing */ - if (!list_empty(&ftrace_pids)) { - set_ftrace_pid_function(func); - func = ftrace_pid_func; - } } tr->ops->func = func; tr->ops->private = tr; @@ -5371,7 +5386,7 @@ static void *fpid_start(struct seq_file *m, loff_t *pos) { mutex_lock(&ftrace_lock); - if (list_empty(&ftrace_pids) && (!*pos)) + if (!ftrace_pids_enabled() && (!*pos)) return (void *) 1; return seq_list_start(&ftrace_pids, *pos); @@ -5610,6 +5625,7 @@ static struct ftrace_ops graph_ops = { .func = ftrace_stub, .flags = FTRACE_OPS_FL_RECURSION_SAFE | FTRACE_OPS_FL_INITIALIZED | + FTRACE_OPS_FL_PID | FTRACE_OPS_FL_STUB, #ifdef FTRACE_GRAPH_TRAMP_ADDR .trampoline = FTRACE_GRAPH_TRAMP_ADDR, |