22 files changed, 408 insertions, 156 deletions
diff --git a/kernel/audit.c b/kernel/audit.c
index d417ca1..0a36091e 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -690,9 +690,7 @@ static const struct inotify_operations audit_inotify_ops = {
 /* Initialize audit support at boot time. */
 static int __init audit_init(void)
 {
-#ifdef CONFIG_AUDITSYSCALL
 	int i;
-#endif
 
 	printk(KERN_INFO "audit: initializing netlink socket (%s)\n",
 	       audit_default ? "enabled" : "disabled");
@@ -717,10 +715,10 @@ static int __init audit_init(void)
 	audit_ih = inotify_init(&audit_inotify_ops);
 	if (IS_ERR(audit_ih))
 		audit_panic("cannot initialize inotify handle");
+#endif
 
 	for (i = 0; i < AUDIT_INODE_BUCKETS; i++)
 		INIT_LIST_HEAD(&audit_inode_hash[i]);
-#endif
 
 	return 0;
 }
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index 5b4e162..6a9a5c5a 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -442,6 +442,7 @@ static struct audit_entry *audit_rule_to_entry(struct audit_rule *rule)
 		case AUDIT_EQUAL:
 			break;
 		default:
+			err = -EINVAL;
 			goto exit_free;
 		}
 	}
@@ -579,6 +580,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		case AUDIT_EQUAL:
 			break;
 		default:
+			err = -EINVAL;
 			goto exit_free;
 		}
 	}
@@ -1134,6 +1136,14 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	struct audit_watch *watch = entry->rule.watch;
 	struct nameidata *ndp, *ndw;
 	int h, err, putnd_needed = 0;
+#ifdef CONFIG_AUDITSYSCALL
+	int dont_count = 0;
+
+	/* If either of these, don't count towards total */
+	if (entry->rule.listnr == AUDIT_FILTER_USER ||
+		entry->rule.listnr == AUDIT_FILTER_TYPE)
+		dont_count = 1;
+#endif
 
 	if (inode_f) {
 		h = audit_hash_ino(inode_f->val);
@@ -1174,6 +1184,10 @@ static inline int audit_add_rule(struct audit_entry *entry,
 	} else {
 		list_add_tail_rcu(&entry->list, list);
 	}
+#ifdef CONFIG_AUDITSYSCALL
+	if (!dont_count)
+		audit_n_rules++;
+#endif
 	mutex_unlock(&audit_filter_mutex);
 
 	if (putnd_needed)
@@ -1198,6 +1212,14 @@ static inline int audit_del_rule(struct audit_entry *entry,
 	struct audit_watch *watch, *tmp_watch = entry->rule.watch;
 	LIST_HEAD(inotify_list);
 	int h, ret = 0;
+#ifdef CONFIG_AUDITSYSCALL
+	int dont_count = 0;
+
+	/* If either of these, don't count towards total */
+	if (entry->rule.listnr == AUDIT_FILTER_USER ||
+		entry->rule.listnr == AUDIT_FILTER_TYPE)
+		dont_count = 1;
+#endif
 
 	if (inode_f) {
 		h = audit_hash_ino(inode_f->val);
@@ -1235,6 +1257,10 @@ static inline int audit_del_rule(struct audit_entry *entry,
 	list_del_rcu(&e->list);
 	call_rcu(&e->rcu, audit_free_rule_rcu);
 
+#ifdef CONFIG_AUDITSYSCALL
+	if (!dont_count)
+		audit_n_rules--;
+#endif
 	mutex_unlock(&audit_filter_mutex);
 
 	if (!list_empty(&inotify_list))
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index ae40ac8..efc1b74 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -85,6 +85,9 @@ extern int audit_enabled;
 /* Indicates that audit should log the full pathname. */
 #define AUDIT_NAME_FULL -1
 
+/* number of audit rules */
+int audit_n_rules;
+
 /* When fs/namei.c:getname() is called, we store the pointer in name and
  * we don't let putname() free it (instead we free all of the saved
  * pointers at syscall exit time).
@@ -174,6 +177,7 @@ struct audit_aux_data_path {
 
 /* The per-task audit context. */
 struct audit_context {
+	int		    dummy;	/* must be the first element */
 	int		    in_syscall;	/* 1 if task is in a syscall */
 	enum audit_state    state;
 	unsigned int	    serial;     /* serial number for record */
@@ -514,7 +518,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	context->return_valid = return_valid;
 	context->return_code  = return_code;
 
-	if (context->in_syscall && !context->auditable) {
+	if (context->in_syscall && !context->dummy && !context->auditable) {
 		enum audit_state state;
 
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_EXIT]);
@@ -530,17 +534,7 @@ static inline struct audit_context *audit_get_context(struct task_struct *tsk,
 	}
 
 get_context:
-	context->pid = tsk->pid;
-	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
-	context->uid = tsk->uid;
-	context->gid = tsk->gid;
-	context->euid = tsk->euid;
-	context->suid = tsk->suid;
-	context->fsuid = tsk->fsuid;
-	context->egid = tsk->egid;
-	context->sgid = tsk->sgid;
-	context->fsgid = tsk->fsgid;
-	context->personality = tsk->personality;
+
 	tsk->audit_context = NULL;
 	return context;
 }
@@ -749,6 +743,17 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts
 	const char *tty;
 
 	/* tsk == current */
+	context->pid = tsk->pid;
+	context->ppid = sys_getppid();	/* sic.  tsk == current in all cases */
+	context->uid = tsk->uid;
+	context->gid = tsk->gid;
+	context->euid = tsk->euid;
+	context->suid = tsk->suid;
+	context->fsuid = tsk->fsuid;
+	context->egid = tsk->egid;
+	context->sgid = tsk->sgid;
+	context->fsgid = tsk->fsgid;
+	context->personality = tsk->personality;
 
 	ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL);
 	if (!ab)
@@ -1066,7 +1071,8 @@ void audit_syscall_entry(int arch, int major,
 	context->argv[3]    = a4;
 
 	state = context->state;
-	if (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT)
+	context->dummy = !audit_n_rules;
+	if (!context->dummy && (state == AUDIT_SETUP_CONTEXT || state == AUDIT_BUILD_CONTEXT))
 		state = audit_filter_syscall(tsk, context, &audit_filter_list[AUDIT_FILTER_ENTRY]);
 	if (likely(state == AUDIT_DISABLED))
 		return;
@@ -1199,14 +1205,18 @@ void audit_putname(const char *name)
 #endif
 }
 
-static void audit_inode_context(int idx, const struct inode *inode)
+/* Copy inode data into an audit_names. */
+static void audit_copy_inode(struct audit_names *name, const struct inode *inode)
 {
-	struct audit_context *context = current->audit_context;
-
-	selinux_get_inode_sid(inode, &context->names[idx].osid);
+	name->ino   = inode->i_ino;
+	name->dev   = inode->i_sb->s_dev;
+	name->mode  = inode->i_mode;
+	name->uid   = inode->i_uid;
+	name->gid   = inode->i_gid;
+	name->rdev  = inode->i_rdev;
+	selinux_get_inode_sid(inode, &name->osid);
 }
 
-
 /**
  * audit_inode - store the inode and device from a lookup
  * @name: name being audited
@@ -1240,20 +1250,14 @@ void __audit_inode(const char *name, const struct inode *inode)
 		++context->ino_count;
 #endif
 	}
-	context->names[idx].ino   = inode->i_ino;
-	context->names[idx].dev	  = inode->i_sb->s_dev;
-	context->names[idx].mode  = inode->i_mode;
-	context->names[idx].uid   = inode->i_uid;
-	context->names[idx].gid   = inode->i_gid;
-	context->names[idx].rdev  = inode->i_rdev;
-	audit_inode_context(idx, inode);
+	audit_copy_inode(&context->names[idx], inode);
 }
 
 /**
  * audit_inode_child - collect inode info for created/removed objects
  * @dname: inode's dentry name
  * @inode: inode being audited
- * @pino: inode number of dentry parent
+ * @parent: inode of dentry parent
  *
  * For syscalls that create or remove filesystem objects, audit_inode
  * can only collect information for the filesystem object's parent.
@@ -1264,7 +1268,7 @@ void __audit_inode(const char *name, const struct inode *inode)
  * unsuccessful attempts.
  */
 void __audit_inode_child(const char *dname, const struct inode *inode,
-			 unsigned long pino)
+			 const struct inode *parent)
 {
 	int idx;
 	struct audit_context *context = current->audit_context;
@@ -1278,7 +1282,7 @@ void __audit_inode_child(const char *dname, const struct inode *inode,
 	if (!dname)
 		goto update_context;
 	for (idx = 0; idx < context->name_count; idx++)
-		if (context->names[idx].ino == pino) {
+		if (context->names[idx].ino == parent->i_ino) {
 			const char *name = context->names[idx].name;
 
 			if (!name)
@@ -1302,16 +1306,47 @@ update_context:
 	context->names[idx].name_len = AUDIT_NAME_FULL;
 	context->names[idx].name_put = 0;	/* don't call __putname() */
 
-	if (inode) {
-		context->names[idx].ino   = inode->i_ino;
-		context->names[idx].dev	  = inode->i_sb->s_dev;
-		context->names[idx].mode  = inode->i_mode;
-		context->names[idx].uid   = inode->i_uid;
-		context->names[idx].gid   = inode->i_gid;
-		context->names[idx].rdev  = inode->i_rdev;
-		audit_inode_context(idx, inode);
-	} else
-		context->names[idx].ino   = (unsigned long)-1;
+	if (!inode)
+		context->names[idx].ino = (unsigned long)-1;
+	else
+		audit_copy_inode(&context->names[idx], inode);
+
+	/* A parent was not found in audit_names, so copy the inode data for the
+	 * provided parent. */
+	if (!found_name) {
+		idx = context->name_count++;
+#if AUDIT_DEBUG
+		context->ino_count++;
+#endif
+		audit_copy_inode(&context->names[idx], parent);
+	}
+}
+
+/**
+ * audit_inode_update - update inode info for last collected name
+ * @inode: inode being audited
+ *
+ * When open() is called on an existing object with the O_CREAT flag, the inode
+ * data audit initially collects is incorrect.  This additional hook ensures
+ * audit has the inode data for the actual object to be opened.
+ */
+void __audit_inode_update(const struct inode *inode)
+{
+	struct audit_context *context = current->audit_context;
+	int idx;
+
+	if (!context->in_syscall || !inode)
+		return;
+
+	if (context->name_count == 0) {
+		context->name_count++;
+#if AUDIT_DEBUG
+		context->ino_count++;
+#endif
+	}
+	idx = context->name_count - 1;
+
+	audit_copy_inode(&context->names[idx], inode);
 }
 
 /**
@@ -1642,7 +1677,7 @@ int audit_bprm(struct linux_binprm *bprm)
 	unsigned long p, next;
 	void *to;
 
-	if (likely(!audit_enabled || !context))
+	if (likely(!audit_enabled || !context || context->dummy))
 		return 0;
 
 	ax = kmalloc(sizeof(*ax) + PAGE_SIZE * MAX_ARG_PAGES - bprm->p,
@@ -1680,7 +1715,7 @@ int audit_socketcall(int nargs, unsigned long *args)
 	struct audit_aux_data_socketcall *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
+	if (likely(!context || context->dummy))
 		return 0;
 
 	ax = kmalloc(sizeof(*ax) + nargs * sizeof(unsigned long), GFP_KERNEL);
@@ -1708,7 +1743,7 @@ int audit_sockaddr(int len, void *a)
 	struct audit_aux_data_sockaddr *ax;
 	struct audit_context *context = current->audit_context;
 
-	if (likely(!context))
+	if (likely(!context || context->dummy))
 		return 0;
 
 	ax = kmalloc(sizeof(*ax) + len, GFP_KERNEL);
diff --git a/kernel/delayacct.c b/kernel/delayacct.c
index f05392d..57ca373 100644
--- a/kernel/delayacct.c
+++ b/kernel/delayacct.c
@@ -19,15 +19,15 @@
 #include <linux/sysctl.h>
 #include <linux/delayacct.h>
 
-int delayacct_on __read_mostly;	/* Delay accounting turned on/off */
+int delayacct_on __read_mostly = 1;	/* Delay accounting turned on/off */
 kmem_cache_t *delayacct_cache;
 
-static int __init delayacct_setup_enable(char *str)
+static int __init delayacct_setup_disable(char *str)
 {
-	delayacct_on = 1;
+	delayacct_on = 0;
 	return 1;
 }
-__setup("delayacct", delayacct_setup_enable);
+__setup("nodelayacct", delayacct_setup_disable);
 
 void delayacct_init(void)
 {
diff --git a/kernel/fork.c b/kernel/fork.c
index 1b0f7b1..aa36c43 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1387,8 +1387,10 @@ long do_fork(unsigned long clone_flags,
 
 		if (clone_flags & CLONE_VFORK) {
 			wait_for_completion(&vfork);
-			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE))
+			if (unlikely (current->ptrace & PT_TRACE_VFORK_DONE)) {
+				current->ptrace_message = nr;
 				ptrace_notify ((PTRACE_EVENT_VFORK_DONE << 8) | SIGTRAP);
+			}
 		}
 	} else {
 		free_pid(pid);
diff --git a/kernel/futex.c b/kernel/futex.c
index cf0c8e2..c2b2e0b 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -415,15 +415,15 @@ out_unlock:
  */
 void exit_pi_state_list(struct task_struct *curr)
 {
-	struct futex_hash_bucket *hb;
 	struct list_head *next, *head = &curr->pi_state_list;
 	struct futex_pi_state *pi_state;
+	struct futex_hash_bucket *hb;
 	union futex_key key;
 
 	/*
 	 * We are a ZOMBIE and nobody can enqueue itself on
 	 * pi_state_list anymore, but we have to be careful
-	 * versus waiters unqueueing themselfs
+	 * versus waiters unqueueing themselves:
 	 */
 	spin_lock_irq(&curr->pi_lock);
 	while (!list_empty(head)) {
@@ -431,21 +431,24 @@ void exit_pi_state_list(struct task_struct *curr)
 		next = head->next;
 		pi_state = list_entry(next, struct futex_pi_state, list);
 		key = pi_state->key;
+		hb = hash_futex(&key);
 		spin_unlock_irq(&curr->pi_lock);
 
-		hb = hash_futex(&key);
 		spin_lock(&hb->lock);
 
 		spin_lock_irq(&curr->pi_lock);
+		/*
+		 * We dropped the pi-lock, so re-check whether this
+		 * task still owns the PI-state:
+		 */
 		if (head->next != next) {
 			spin_unlock(&hb->lock);
 			continue;
 		}
 
-		list_del_init(&pi_state->list);
-
 		WARN_ON(pi_state->owner != curr);
-
+		WARN_ON(list_empty(&pi_state->list));
+		list_del_init(&pi_state->list);
 		pi_state->owner = NULL;
 		spin_unlock_irq(&curr->pi_lock);
 
@@ -470,7 +473,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
 	head = &hb->chain;
 
 	list_for_each_entry_safe(this, next, head, list) {
-		if (match_futex (&this->key, &me->key)) {
+		if (match_futex(&this->key, &me->key)) {
 			/*
 			 * Another waiter already exists - bump up
 			 * the refcount and return its pi_state:
@@ -482,6 +485,8 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
 			if (unlikely(!pi_state))
 				return -EINVAL;
 
+			WARN_ON(!atomic_read(&pi_state->refcount));
+
 			atomic_inc(&pi_state->refcount);
 			me->pi_state = pi_state;
 
@@ -490,10 +495,13 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
 	}
 
 	/*
-	 * We are the first waiter - try to look up the real owner and
-	 * attach the new pi_state to it:
+	 * We are the first waiter - try to look up the real owner and attach
+	 * the new pi_state to it, but bail out when the owner died bit is set
+	 * and TID = 0:
 	 */
 	pid = uval & FUTEX_TID_MASK;
+	if (!pid && (uval & FUTEX_OWNER_DIED))
+		return -ESRCH;
 	p = futex_find_get_task(pid);
 	if (!p)
 		return -ESRCH;
@@ -510,6 +518,7 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb, struct futex_q *me)
 	pi_state->key = me->key;
 
 	spin_lock_irq(&p->pi_lock);
+	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &p->pi_state_list);
 	pi_state->owner = p;
 	spin_unlock_irq(&p->pi_lock);
@@ -573,20 +582,29 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this)
 	 * kept enabled while there is PI state around. We must also
 	 * preserve the owner died bit.)
 	 */
-	newval = (uval & FUTEX_OWNER_DIED) | FUTEX_WAITERS | new_owner->pid;
+	if (!(uval & FUTEX_OWNER_DIED)) {
+		newval = FUTEX_WAITERS | new_owner->pid;
 
-	inc_preempt_count();
-	curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
-	dec_preempt_count();
+		inc_preempt_count();
+		curval = futex_atomic_cmpxchg_inatomic(uaddr, uval, newval);
+		dec_preempt_count();
+		if (curval == -EFAULT)
+			return -EFAULT;
+		if (curval != uval)
+			return -EINVAL;
+	}
 
-	if (curval == -EFAULT)
-		return -EFAULT;
-	if (curval != uval)
-		return -EINVAL;
+	spin_lock_irq(&pi_state->owner->pi_lock);
+	WARN_ON(list_empty(&pi_state->list));
+	list_del_init(&pi_state->list);
+	spin_unlock_irq(&pi_state->owner->pi_lock);
 
-	list_del_init(&pi_state->owner->pi_state_list);
+	spin_lock_irq(&new_owner->pi_lock);
+	WARN_ON(!list_empty(&pi_state->list));
 	list_add(&pi_state->list, &new_owner->pi_state_list);
 	pi_state->owner = new_owner;
+	spin_unlock_irq(&new_owner->pi_lock);
+
 	rt_mutex_unlock(&pi_state->pi_mutex);
 
 	return 0;
@@ -930,6 +948,7 @@ static int unqueue_me(struct futex_q *q)
 	/* In the common case we don't take the spinlock, which is nice. */
  retry:
 	lock_ptr = q->lock_ptr;
+	barrier();
 	if (lock_ptr != 0) {
 		spin_lock(lock_ptr);
 		/*
@@ -1236,6 +1255,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
 		/* Owner died? */
 		if (q.pi_state->owner != NULL) {
 			spin_lock_irq(&q.pi_state->owner->pi_lock);
+			WARN_ON(list_empty(&q.pi_state->list));
 			list_del_init(&q.pi_state->list);
 			spin_unlock_irq(&q.pi_state->owner->pi_lock);
 		} else
@@ -1244,6 +1264,7 @@ static int do_futex_lock_pi(u32 __user *uaddr, int detect, int trylock,
 		q.pi_state->owner = current;
 
 		spin_lock_irq(&current->pi_lock);
+		WARN_ON(!list_empty(&q.pi_state->list));
 		list_add(&q.pi_state->list, &current->pi_state_list);
 		spin_unlock_irq(&current->pi_lock);
 
@@ -1427,9 +1448,11 @@ retry_locked:
 	 * again. If it succeeds then we can return without waking
 	 * anyone else up:
 	 */
-	inc_preempt_count();
-	uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
-	dec_preempt_count();
+	if (!(uval & FUTEX_OWNER_DIED)) {
+		inc_preempt_count();
+		uval = futex_atomic_cmpxchg_inatomic(uaddr, current->pid, 0);
+		dec_preempt_count();
+	}
 
 	if (unlikely(uval == -EFAULT))
 		goto pi_faulted;
@@ -1462,9 +1485,11 @@ retry_locked:
 	/*
 	 * No waiters - kernel unlocks the futex:
 	 */
-	ret = unlock_futex_pi(uaddr, uval);
-	if (ret == -EFAULT)
-		goto pi_faulted;
+	if (!(uval & FUTEX_OWNER_DIED)) {
+		ret = unlock_futex_pi(uaddr, uval);
+		if (ret == -EFAULT)
+			goto pi_faulted;
+	}
 
 out_unlock:
 	spin_unlock(&hb->lock);
@@ -1683,9 +1708,9 @@ err_unlock:
  * Process a futex-list entry, check whether it's owned by the
  * dying task, and do notification if so:
  */
-int handle_futex_death(u32 __user *uaddr, struct task_struct *curr)
+int handle_futex_death(u32 __user *uaddr, struct task_struct *curr, int pi)
 {
-	u32 uval, nval;
+	u32 uval, nval, mval;
 
 retry:
 	if (get_user(uval, uaddr))
@@ -1702,21 +1727,45 @@ retry:
 		 * thread-death.) The rest of the cleanup is done in
 		 * userspace.
 		 */
-		nval = futex_atomic_cmpxchg_inatomic(uaddr, uval,
-						     uval | FUTEX_OWNER_DIED);
+		mval = (uval & FUTEX_WAITERS) | FUTEX_OWNER_DIED;
+		nval = futex_atomic_cmpxchg_inatomic(uaddr, uval, mval);
+
 		if (nval == -EFAULT)
 			return -1;
 
 		if (nval != uval)
 			goto retry;
 
-		if (uval & FUTEX_WAITERS)
-			futex_wake(uaddr, 1);
+		/*
+		 * Wake robust non-PI futexes here. The wakeup of
+		 * PI futexes happens in exit_pi_state():
+		 */
+		if (!pi) {
+			if (uval & FUTEX_WAITERS)
+				futex_wake(uaddr, 1);
+		}
 	}
 	return 0;
 }
 
 /*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int fetch_robust_entry(struct robust_list __user **entry,
+				     struct robust_list __user **head, int *pi)
+{
+	unsigned long uentry;
+
+	if (get_user(uentry, (unsigned long *)head))
+		return -EFAULT;
+
+	*entry = (void *)(uentry & ~1UL);
+	*pi = uentry & 1;
+
+	return 0;
+}
+
+/*
  * Walk curr->robust_list (very carefully, it's a userspace list!)
  * and mark any locks found there dead, and notify any waiters.
  *
@@ -1726,14 +1775,14 @@ void exit_robust_list(struct task_struct *curr)
 {
 	struct robust_list_head __user *head = curr->robust_list;
 	struct robust_list __user *entry, *pending;
-	unsigned int limit = ROBUST_LIST_LIMIT;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
 	unsigned long futex_offset;
 
 	/*
 	 * Fetch the list head (which was registered earlier, via
 	 * sys_set_robust_list()):
 	 */
-	if (get_user(entry, &head->list.next))
+	if (fetch_robust_entry(&entry, &head->list.next, &pi))
 		return;
 	/*
 	 * Fetch the relative futex offset:
@@ -1744,10 +1793,11 @@ void exit_robust_list(struct task_struct *curr)
 	 * Fetch any possibly pending lock-add first, and handle it
 	 * if it exists:
 	 */
-	if (get_user(pending, &head->list_op_pending))
+	if (fetch_robust_entry(&pending, &head->list_op_pending, &pip))
 		return;
+
 	if (pending)
-		handle_futex_death((void *)pending + futex_offset, curr);
+		handle_futex_death((void *)pending + futex_offset, curr, pip);
 
 	while (entry != &head->list) {
 		/*
@@ -1756,12 +1806,12 @@ void exit_robust_list(struct task_struct *curr)
 		 */
 		if (entry != pending)
 			if (handle_futex_death((void *)entry + futex_offset,
-						curr))
+						curr, pi))
 				return;
 		/*
 		 * Fetch the next entry in the list:
 		 */
-		if (get_user(entry, &entry->next))
+		if (fetch_robust_entry(&entry, &entry->next, &pi))
 			return;
 		/*
 		 * Avoid excessively long or circular lists:
diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c
index d1d92b4..c5cca3f 100644
--- a/kernel/futex_compat.c
+++ b/kernel/futex_compat.c
@@ -12,6 +12,23 @@
 
 #include <asm/uaccess.h>
 
+
+/*
+ * Fetch a robust-list pointer. Bit 0 signals PI futexes:
+ */
+static inline int
+fetch_robust_entry(compat_uptr_t *uentry, struct robust_list __user **entry,
+		   compat_uptr_t *head, int *pi)
+{
+	if (get_user(*uentry, head))
+		return -EFAULT;
+
+	*entry = compat_ptr((*uentry) & ~1);
+	*pi = (unsigned int)(*uentry) & 1;
+
+	return 0;
+}
+
 /*
  * Walk curr->robust_list (very carefully, it's a userspace list!)
  * and mark any locks found there dead, and notify any waiters.
@@ -22,17 +39,16 @@ void compat_exit_robust_list(struct task_struct *curr)
 {
 	struct compat_robust_list_head __user *head = curr->compat_robust_list;
 	struct robust_list __user *entry, *pending;
+	unsigned int limit = ROBUST_LIST_LIMIT, pi, pip;
 	compat_uptr_t uentry, upending;
-	unsigned int limit = ROBUST_LIST_LIMIT;
 	compat_long_t futex_offset;
 
 	/*
 	 * Fetch the list head (which was registered earlier, via
 	 * sys_set_robust_list()):
 	 */
-	if (get_user(uentry, &head->list.next))
+	if (fetch_robust_entry(&uentry, &entry, &head->list.next, &pi))
 		return;
-	entry = compat_ptr(uentry);
 	/*
 	 * Fetch the relative futex offset:
 	 */
@@ -42,11 +58,11 @@ void compat_exit_robust_list(struct task_struct *curr)
 	 * Fetch any possibly pending lock-add first, and handle it
 	 * if it exists:
 	 */
-	if (get_user(upending, &head->list_op_pending))
+	if (fetch_robust_entry(&upending, &pending,
+			       &head->list_op_pending, &pip))
 		return;
-	pending = compat_ptr(upending);
 	if (upending)
-		handle_futex_death((void *)pending + futex_offset, curr);
+		handle_futex_death((void *)pending + futex_offset, curr, pip);
 
 	while (compat_ptr(uentry) != &head->list) {
 		/*
@@ -55,15 +71,15 @@ void compat_exit_robust_list(struct task_struct *curr)
 		 */
 		if (entry != pending)
 			if (handle_futex_death((void *)entry + futex_offset,
-						curr))
+						curr, pi))
 				return;
 
 		/*
 		 * Fetch the next entry in the list:
 		 */
-		if (get_user(uentry, (compat_uptr_t *)&entry->next))
+		if (fetch_robust_entry(&uentry, &entry,
+				       (compat_uptr_t *)&entry->next, &pi))
 			return;
-		entry = compat_ptr(uentry);
 		/*
 		 * Avoid excessively long or circular lists:
 		 */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index d17766d..be989ef 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -835,7 +835,7 @@ static void migrate_hrtimers(int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
+static int __cpuinit hrtimer_cpu_notify(struct notifier_block *self,
 					unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -859,7 +859,7 @@ static int __devinit hrtimer_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __devinitdata hrtimers_nb = {
+static struct notifier_block __cpuinitdata hrtimers_nb = {
 	.notifier_call = hrtimer_cpu_notify,
 };
 
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 4e46143..92be519 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -137,16 +137,40 @@ EXPORT_SYMBOL(enable_irq);
  *	@irq:	interrupt to control
  *	@on:	enable/disable power management wakeup
  *
- *	Enable/disable power management wakeup mode
+ *	Enable/disable power management wakeup mode, which is
+ *	disabled by default.  Enables and disables must match,
+ *	just as they match for non-wakeup mode support.
+ *
+ *	Wakeup mode lets this IRQ wake the system from sleep
+ *	states like "suspend to RAM".
  */
 int set_irq_wake(unsigned int irq, unsigned int on)
 {
 	struct irq_desc *desc = irq_desc + irq;
 	unsigned long flags;
 	int ret = -ENXIO;
+	int (*set_wake)(unsigned, unsigned) = desc->chip->set_wake;
 
+	/* wakeup-capable irqs can be shared between drivers that
+	 * don't need to have the same sleep mode behaviors.
+	 */
 	spin_lock_irqsave(&desc->lock, flags);
-	if (desc->chip->set_wake)
+	if (on) {
+		if (desc->wake_depth++ == 0)
+			desc->status |= IRQ_WAKEUP;
+		else
+			set_wake = NULL;
+	} else {
+		if (desc->wake_depth == 0) {
+			printk(KERN_WARNING "Unbalanced IRQ %d "
+					"wake disable\n", irq);
+			WARN_ON(1);
+		} else if (--desc->wake_depth == 0)
+			desc->status &= ~IRQ_WAKEUP;
+		else
+			set_wake = NULL;
+	}
+	if (set_wake)
 		ret = desc->chip->set_wake(irq, on);
 	spin_unlock_irqrestore(&desc->lock, flags);
 	return ret;
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 64aab08..3f57dfd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -393,6 +393,7 @@ static int __kprobes add_new_kprobe(struct kprobe *old_p, struct kprobe *p)
 static inline void add_aggr_kprobe(struct kprobe *ap, struct kprobe *p)
 {
 	copy_kprobe(p, ap);
+	flush_insn_slot(ap);
 	ap->addr = p->addr;
 	ap->pre_handler = aggr_pre_handler;
 	ap->fault_handler = aggr_fault_handler;
diff --git a/kernel/power/process.c b/kernel/power/process.c
index b2a5f67..72e72d2 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -66,13 +66,25 @@ static inline void freeze_process(struct task_struct *p)
 	}
 }
 
+static void cancel_freezing(struct task_struct *p)
+{
+	unsigned long flags;
+
+	if (freezing(p)) {
+		pr_debug("  clean up: %s\n", p->comm);
+		do_not_freeze(p);
+		spin_lock_irqsave(&p->sighand->siglock, flags);
+		recalc_sigpending_tsk(p);
+		spin_unlock_irqrestore(&p->sighand->siglock, flags);
+	}
+}
+
 /* 0 = success, else # of processes that we failed to stop */
 int freeze_processes(void)
 {
 	int todo, nr_user, user_frozen;
 	unsigned long start_time;
 	struct task_struct *g, *p;
-	unsigned long flags;
 
 	printk( "Stopping tasks: " );
 	start_time = jiffies;
@@ -85,6 +97,10 @@ int freeze_processes(void)
 				continue;
 			if (frozen(p))
 				continue;
+			if (p->state == TASK_TRACED && frozen(p->parent)) {
+				cancel_freezing(p);
+				continue;
+			}
 			if (p->mm && !(p->flags & PF_BORROWED_MM)) {
 				/* The task is a user-space one.
 				 * Freeze it unless there's a vfork completion
@@ -126,13 +142,7 @@ int freeze_processes(void)
 		do_each_thread(g, p) {
 			if (freezeable(p) && !frozen(p))
 				printk(KERN_ERR "  %s\n", p->comm);
-			if (freezing(p)) {
-				pr_debug("  clean up: %s\n", p->comm);
-				p->flags &= ~PF_FREEZE;
-				spin_lock_irqsave(&p->sighand->siglock, flags);
-				recalc_sigpending_tsk(p);
-				spin_unlock_irqrestore(&p->sighand->siglock, flags);
-			}
+			cancel_freezing(p);
 		} while_each_thread(g, p);
 		read_unlock(&tasklist_lock);
 		return todo;
diff --git a/kernel/printk.c b/kernel/printk.c
index 65ca068..1149365 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -799,6 +799,9 @@ void release_console_sem(void)
 		up(&secondary_console_sem);
 		return;
 	}
+
+	console_may_schedule = 0;
+
 	for ( ; ; ) {
 		spin_lock_irqsave(&logbuf_lock, flags);
 		wake_klogd |= log_start - log_end;
@@ -812,7 +815,6 @@ void release_console_sem(void)
 		local_irq_restore(flags);
 	}
 	console_locked = 0;
-	console_may_schedule = 0;
 	up(&console_sem);
 	spin_unlock_irqrestore(&logbuf_lock, flags);
 	if (wake_klogd && !oops_in_progress && waitqueue_active(&log_wait)) {
diff --git a/kernel/rcupdate.c b/kernel/rcupdate.c
index 759805c..436ab35 100644
--- a/kernel/rcupdate.c
+++ b/kernel/rcupdate.c
@@ -548,7 +548,7 @@ static void __devinit rcu_online_cpu(int cpu)
 	tasklet_init(&per_cpu(rcu_tasklet, cpu), rcu_process_callbacks, 0UL);
 }
 
-static int __devinit rcu_cpu_notify(struct notifier_block *self,
+static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -565,7 +565,7 @@ static int __devinit rcu_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __devinitdata rcu_nb = {
+static struct notifier_block __cpuinitdata rcu_nb = {
 	.notifier_call	= rcu_cpu_notify,
 };
 
diff --git a/kernel/resource.c b/kernel/resource.c
index 0dd3a85..4628643 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -244,6 +244,7 @@ int find_next_system_ram(struct resource *res)
 
 	start = res->start;
 	end = res->end;
+	BUG_ON(start >= end);
 
 	read_lock(&resource_lock);
 	for (p = iomem_resource.child; p ; p = p->sibling) {
@@ -254,15 +255,17 @@ int find_next_system_ram(struct resource *res)
 			p = NULL;
 			break;
 		}
-		if (p->start >= start)
+		if ((p->end >= start) && (p->start < end))
 			break;
 	}
 	read_unlock(&resource_lock);
 	if (!p)
 		return -1;
 	/* copy data */
-	res->start = p->start;
-	res->end = p->end;
+	if (res->start < p->start)
+		res->start = p->start;
+	if (res->end > p->end)
+		res->end = p->end;
 	return 0;
 }
 #endif
diff --git a/kernel/rtmutex.c b/kernel/rtmutex.c
index d2ef13b..3e13a1e 100644
--- a/kernel/rtmutex.c
+++ b/kernel/rtmutex.c
@@ -7,6 +7,8 @@
  *  Copyright (C) 2005-2006 Timesys Corp., Thomas Gleixner <tglx@timesys.com>
  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  *  Copyright (C) 2006 Esben Nielsen
+ *
+ *  See Documentation/rt-mutex-design.txt for details.
  */
 #include <linux/spinlock.h>
 #include <linux/module.h>
diff --git a/kernel/sched.c b/kernel/sched.c
index b44b9a4..a2be2d0 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -4456,9 +4456,9 @@ asmlinkage long sys_sched_yield(void)
 	return 0;
 }
 
-static inline int __resched_legal(void)
+static inline int __resched_legal(int expected_preempt_count)
 {
-	if (unlikely(preempt_count()))
+	if (unlikely(preempt_count() != expected_preempt_count))
 		return 0;
 	if (unlikely(system_state != SYSTEM_RUNNING))
 		return 0;
@@ -4484,7 +4484,7 @@ static void __cond_resched(void)
 
 int __sched cond_resched(void)
 {
-	if (need_resched() && __resched_legal()) {
+	if (need_resched() && __resched_legal(0)) {
 		__cond_resched();
 		return 1;
 	}
@@ -4510,7 +4510,7 @@ int cond_resched_lock(spinlock_t *lock)
 		ret = 1;
 		spin_lock(lock);
 	}
-	if (need_resched() && __resched_legal()) {
+	if (need_resched() && __resched_legal(1)) {
 		spin_release(&lock->dep_map, 1, _THIS_IP_);
 		_raw_spin_unlock(lock);
 		preempt_enable_no_resched();
@@ -4526,7 +4526,7 @@ int __sched cond_resched_softirq(void)
 {
 	BUG_ON(!in_softirq());
 
-	if (need_resched() && __resched_legal()) {
+	if (need_resched() && __resched_legal(0)) {
 		raw_local_irq_disable();
 		_local_bh_enable();
 		raw_local_irq_enable();
@@ -6494,7 +6494,12 @@ static int build_sched_domains(const cpumask_t *cpu_map)
 	for (i = 0; i < MAX_NUMNODES; i++)
 		init_numa_sched_groups_power(sched_group_nodes[i]);
 
-	init_numa_sched_groups_power(sched_group_allnodes);
+	if (sched_group_allnodes) {
+		int group = cpu_to_allnodes_group(first_cpu(*cpu_map));
+		struct sched_group *sg = &sched_group_allnodes[group];
+
+		init_numa_sched_groups_power(sg);
+	}
 #endif
 
 	/* Attach the domains */
@@ -6761,6 +6766,11 @@ void __init sched_init(void)
 	}
 
 	set_load_weight(&init_task);
+
+#ifdef CONFIG_RT_MUTEXES
+	plist_head_init(&init_task.pi_waiters, &init_task.pi_lock);
+#endif
+
 	/*
 	 * The boot idle thread does lazy MMU switching as well:
 	 */
diff --git a/kernel/signal.c b/kernel/signal.c
index 7fe874d..bfdb5686 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -791,22 +791,31 @@ out:
 /*
  * Force a signal that the process can't ignore: if necessary
  * we unblock the signal and change any SIG_IGN to SIG_DFL.
+ *
+ * Note: If we unblock the signal, we always reset it to SIG_DFL,
+ * since we do not want to have a signal handler that was blocked
+ * be invoked when user space had explicitly blocked it.
+ *
+ * We don't want to have recursive SIGSEGV's etc, for example.
  */
-
 int
 force_sig_info(int sig, struct siginfo *info, struct task_struct *t)
 {
 	unsigned long int flags;
-	int ret;
+	int ret, blocked, ignored;
+	struct k_sigaction *action;
 
 	spin_lock_irqsave(&t->sighand->siglock, flags);
-	if (t->sighand->action[sig-1].sa.sa_handler == SIG_IGN) {
-		t->sighand->action[sig-1].sa.sa_handler = SIG_DFL;
-	}
-	if (sigismember(&t->blocked, sig)) {
-		sigdelset(&t->blocked, sig);
+	action = &t->sighand->action[sig-1];
+	ignored = action->sa.sa_handler == SIG_IGN;
+	blocked = sigismember(&t->blocked, sig);
+	if (blocked || ignored) {
+		action->sa.sa_handler = SIG_DFL;
+		if (blocked) {
+			sigdelset(&t->blocked, sig);
+			recalc_sigpending_tsk(t);
+		}
 	}
-	recalc_sigpending_tsk(t);
 	ret = specific_send_sig_info(sig, info, t);
 	spin_unlock_irqrestore(&t->sighand->siglock, flags);
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0f08a84..3789ca9 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -65,6 +65,7 @@ static inline void wakeup_softirqd(void)
  * This one is for softirq.c-internal use,
  * where hardirqs are disabled legitimately:
  */
+#ifdef CONFIG_TRACE_IRQFLAGS
 static void __local_bh_disable(unsigned long ip)
 {
 	unsigned long flags;
@@ -80,6 +81,13 @@ static void __local_bh_disable(unsigned long ip)
 		trace_softirqs_off(ip);
 	raw_local_irq_restore(flags);
 }
+#else /* !CONFIG_TRACE_IRQFLAGS */
+static inline void __local_bh_disable(unsigned long ip)
+{
+	add_preempt_count(SOFTIRQ_OFFSET);
+	barrier();
+}
+#endif /* CONFIG_TRACE_IRQFLAGS */
 
 void local_bh_disable(void)
 {
@@ -121,12 +129,16 @@ EXPORT_SYMBOL(_local_bh_enable);
 
 void local_bh_enable(void)
 {
+#ifdef CONFIG_TRACE_IRQFLAGS
 	unsigned long flags;
 
 	WARN_ON_ONCE(in_irq());
+#endif
 	WARN_ON_ONCE(irqs_disabled());
 
+#ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_save(flags);
+#endif
 	/*
 	 * Are softirqs going to be turned on now:
 	 */
@@ -142,18 +154,22 @@ void local_bh_enable(void)
 		do_softirq();
 
 	dec_preempt_count();
+#ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_restore(flags);
+#endif
 	preempt_check_resched();
 }
 EXPORT_SYMBOL(local_bh_enable);
 
 void local_bh_enable_ip(unsigned long ip)
 {
+#ifdef CONFIG_TRACE_IRQFLAGS
 	unsigned long flags;
 
 	WARN_ON_ONCE(in_irq());
 
 	local_irq_save(flags);
+#endif
 	/*
 	 * Are softirqs going to be turned on now:
 	 */
@@ -169,7 +185,9 @@ void local_bh_enable_ip(unsigned long ip)
 		do_softirq();
 
 	dec_preempt_count();
+#ifdef CONFIG_TRACE_IRQFLAGS
 	local_irq_restore(flags);
+#endif
 	preempt_check_resched();
 }
 EXPORT_SYMBOL(local_bh_enable_ip);
@@ -547,7 +565,7 @@ static void takeover_tasklets(unsigned int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int __devinit cpu_callback(struct notifier_block *nfb,
+static int __cpuinit cpu_callback(struct notifier_block *nfb,
 				  unsigned long action,
 				  void *hcpu)
 {
@@ -587,7 +605,7 @@ static int __devinit cpu_callback(struct notifier_block *nfb,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __devinitdata cpu_nfb = {
+static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
diff --git a/kernel/softlockup.c b/kernel/softlockup.c
index 6b76caa..03e6a2b 100644
--- a/kernel/softlockup.c
+++ b/kernel/softlockup.c
@@ -104,7 +104,7 @@ static int watchdog(void * __bind_cpu)
 /*
  * Create/destroy watchdog threads as CPUs come and go:
  */
-static int __devinit
+static int __cpuinit
 cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 {
 	int hotcpu = (unsigned long)hcpu;
@@ -142,7 +142,7 @@ cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __devinitdata cpu_nfb = {
+static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index f45179c..e781876 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -121,46 +121,45 @@ static int send_reply(struct sk_buff *skb, pid_t pid)
 /*
  * Send taskstats data in @skb to listeners registered for @cpu's exit data
  */
-static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
+static void send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
 	struct listener_list *listeners;
 	struct listener *s, *tmp;
 	struct sk_buff *skb_next, *skb_cur = skb;
 	void *reply = genlmsg_data(genlhdr);
-	int rc, ret, delcount = 0;
+	int rc, delcount = 0;
 
 	rc = genlmsg_end(skb, reply);
 	if (rc < 0) {
 		nlmsg_free(skb);
-		return rc;
+		return;
 	}
 
 	rc = 0;
 	listeners = &per_cpu(listener_array, cpu);
 	down_read(&listeners->sem);
-	list_for_each_entry_safe(s, tmp, &listeners->list, list) {
+	list_for_each_entry(s, &listeners->list, list) {
 		skb_next = NULL;
 		if (!list_is_last(&s->list, &listeners->list)) {
 			skb_next = skb_clone(skb_cur, GFP_KERNEL);
-			if (!skb_next) {
-				nlmsg_free(skb_cur);
-				rc = -ENOMEM;
+			if (!skb_next)
 				break;
-			}
 		}
-		ret = genlmsg_unicast(skb_cur, s->pid);
-		if (ret == -ECONNREFUSED) {
+		rc = genlmsg_unicast(skb_cur, s->pid);
+		if (rc == -ECONNREFUSED) {
 			s->valid = 0;
 			delcount++;
-			rc = ret;
 		}
 		skb_cur = skb_next;
 	}
 	up_read(&listeners->sem);
 
+	if (skb_cur)
+		nlmsg_free(skb_cur);
+
 	if (!delcount)
-		return rc;
+		return;
 
 	/* Delete invalidated entries */
 	down_write(&listeners->sem);
@@ -171,13 +170,12 @@ static int send_cpu_listeners(struct sk_buff *skb, unsigned int cpu)
 		}
 	}
 	up_write(&listeners->sem);
-	return rc;
 }
 
 static int fill_pid(pid_t pid, struct task_struct *pidtsk,
 		struct taskstats *stats)
 {
-	int rc;
+	int rc = 0;
 	struct task_struct *tsk = pidtsk;
 
 	if (!pidtsk) {
@@ -196,12 +194,10 @@ static int fill_pid(pid_t pid, struct task_struct *pidtsk,
 	 * Each accounting subsystem adds calls to its functions to
 	 * fill in relevant parts of struct taskstsats as follows
 	 *
-	 *	rc = per-task-foo(stats, tsk);
-	 *	if (rc)
-	 *		goto err;
+	 *	per-task-foo(stats, tsk);
 	 */
 
-	rc = delayacct_add_tsk(stats, tsk);
+	delayacct_add_tsk(stats, tsk);
 	stats->version = TASKSTATS_VERSION;
 
 	/* Define err: label here if needed */
diff --git a/kernel/timer.c b/kernel/timer.c
index 05809c2..b650f04 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -84,7 +84,7 @@ typedef struct tvec_t_base_s tvec_base_t;
 
 tvec_base_t boot_tvec_bases;
 EXPORT_SYMBOL(boot_tvec_bases);
-static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = { &boot_tvec_bases };
+static DEFINE_PER_CPU(tvec_base_t *, tvec_bases) = &boot_tvec_bases;
 
 static inline void set_running_timer(tvec_base_t *base,
 					struct timer_list *timer)
@@ -408,7 +408,7 @@ static int cascade(tvec_base_t *base, tvec_t *tv, int index)
  * This function cascades all vectors and executes all expired timer
  * vectors.
  */
-#define INDEX(N) (base->timer_jiffies >> (TVR_BITS + N * TVN_BITS)) & TVN_MASK
+#define INDEX(N) ((base->timer_jiffies >> (TVR_BITS + (N) * TVN_BITS)) & TVN_MASK)
 
 static inline void __run_timers(tvec_base_t *base)
 {
@@ -1688,7 +1688,7 @@ static void __devinit migrate_timers(int cpu)
 }
 #endif /* CONFIG_HOTPLUG_CPU */
 
-static int __devinit timer_cpu_notify(struct notifier_block *self,
+static int __cpuinit timer_cpu_notify(struct notifier_block *self,
 				unsigned long action, void *hcpu)
 {
 	long cpu = (long)hcpu;
@@ -1708,7 +1708,7 @@ static int __devinit timer_cpu_notify(struct notifier_block *self,
 	return NOTIFY_OK;
 }
 
-static struct notifier_block __devinitdata timers_nb = {
+static struct notifier_block __cpuinitdata timers_nb = {
 	.notifier_call	= timer_cpu_notify,
 };
 
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index eebb1d8..448e8f7 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -93,9 +93,12 @@ static void __queue_work(struct cpu_workqueue_struct *cwq,
 	spin_unlock_irqrestore(&cwq->lock, flags);
 }
 
-/*
- * Queue work on a workqueue. Return non-zero if it was successfully
- * added.
+/**
+ * queue_work - queue work on a workqueue
+ * @wq: workqueue to use
+ * @work: work to queue
+ *
+ * Returns non-zero if it was successfully added.
  *
  * We queue the work to the CPU it was submitted, but there is no
  * guarantee that it will be processed by that CPU.
@@ -128,6 +131,14 @@ static void delayed_work_timer_fn(unsigned long __data)
 	__queue_work(per_cpu_ptr(wq->cpu_wq, cpu), work);
 }
 
+/**
+ * queue_delayed_work - queue work on a workqueue after delay
+ * @wq: workqueue to use
+ * @work: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Returns non-zero if it was successfully added.
+ */
 int fastcall queue_delayed_work(struct workqueue_struct *wq,
 			struct work_struct *work, unsigned long delay)
 {
@@ -150,6 +161,15 @@ int fastcall queue_delayed_work(struct workqueue_struct *wq,
 }
 EXPORT_SYMBOL_GPL(queue_delayed_work);
 
+/**
+ * queue_delayed_work_on - queue work on specific CPU after delay
+ * @cpu: CPU number to execute work on
+ * @wq: workqueue to use
+ * @work: work to queue
+ * @delay: number of jiffies to wait before queueing
+ *
+ * Returns non-zero if it was successfully added.
+ */
 int queue_delayed_work_on(int cpu, struct workqueue_struct *wq,
 			struct work_struct *work, unsigned long delay)
 {
@@ -275,8 +295,9 @@ static void flush_cpu_workqueue(struct cpu_workqueue_struct *cwq)
 	}
 }
 
-/*
+/**
  * flush_workqueue - ensure that any scheduled work has run to completion.
+ * @wq: workqueue to flush
  *
  * Forces execution of the workqueue and blocks until its completion.
  * This is typically used in driver shutdown handlers.
@@ -400,6 +421,12 @@ static void cleanup_workqueue_thread(struct workqueue_struct *wq, int cpu)
 		kthread_stop(p);
 }
 
+/**
+ * destroy_workqueue - safely terminate a workqueue
+ * @wq: target workqueue
+ *
+ * Safely destroy a workqueue. All work currently pending will be done first.
+ */
 void destroy_workqueue(struct workqueue_struct *wq)
 {
 	int cpu;
@@ -425,18 +452,41 @@ EXPORT_SYMBOL_GPL(destroy_workqueue);
 
 static struct workqueue_struct *keventd_wq;
 
+/**
+ * schedule_work - put work task in global workqueue
+ * @work: job to be done
+ *
+ * This puts a job in the kernel-global workqueue.
+ */
 int fastcall schedule_work(struct work_struct *work)
 {
 	return queue_work(keventd_wq, work);
 }
 EXPORT_SYMBOL(schedule_work);
 
+/**
+ * schedule_delayed_work - put work task in global workqueue after delay
+ * @work: job to be done
+ * @delay: number of jiffies to wait
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue.
+ */
 int fastcall schedule_delayed_work(struct work_struct *work, unsigned long delay)
 {
 	return queue_delayed_work(keventd_wq, work, delay);
 }
 EXPORT_SYMBOL(schedule_delayed_work);
 
+/**
+ * schedule_delayed_work_on - queue work in global workqueue on CPU after delay
+ * @cpu: cpu to use
+ * @work: job to be done
+ * @delay: number of jiffies to wait
+ *
+ * After waiting for a given time this puts a job in the kernel-global
+ * workqueue on the specified CPU.
+ */
 int schedule_delayed_work_on(int cpu,
 			struct work_struct *work, unsigned long delay)
 {