From a2000572ad511f5f43091ed7bd2cc3b913104a1e Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Fri, 10 Feb 2006 01:51:02 -0800
Subject: [PATCH] sched: remove smpnice

I don't think the code is quite ready, which is why I asked for Peter's
additions to also be merged before I acked it (although it turned out that
it still isn't quite ready with his additions either).

Basically I have had similar observations to Suresh in that it does not
play nicely with the rest of the balancing infrastructure (and raised
similar concerns in my review).

The samples (group of 4) I got for "maximum recorded imbalance" on a 2x2
SMP+HT Xeon are as follows:

            | Following boot | hackbench 20        | hackbench 40
 -----------+----------------+---------------------+---------------------
 2.6.16-rc2 | 30,37,100,112  | 5600,5530,6020,6090 | 6390,7090,8760,8470
 +nosmpnice |  3, 2,  4,  2  |   28, 150, 294, 132 |  348, 348, 294, 347

Hackbench raw performance is down around 15% with smpnice (but that in
itself isn't a huge deal because it is just a benchmark).  However, the
samples show that the imbalance passed into move_tasks is increased by
about a factor of 10-30.  I think this would also go some way to explaining
latency blips turning up in the balancing code (though I haven't actually
measured that).

We'll probably have to revert this in the SUSE kernel.

Cc: "Siddha, Suresh B" <suresh.b.siddha@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Peter Williams <pwil3058@bigpond.net.au>
Cc: "Martin J. Bligh" <mbligh@aracnet.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 129 ++++++++-------------------------------------------------
 1 file changed, 18 insertions(+), 111 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index bc38804..87d93be 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -215,7 +215,6 @@ struct runqueue {
 	 */
 	unsigned long nr_running;
 #ifdef CONFIG_SMP
-	unsigned long prio_bias;
 	unsigned long cpu_load[3];
 #endif
 	unsigned long long nr_switches;
@@ -669,68 +668,13 @@ static int effective_prio(task_t *p)
 	return prio;
 }
 
-#ifdef CONFIG_SMP
-static inline void inc_prio_bias(runqueue_t *rq, int prio)
-{
-	rq->prio_bias += MAX_PRIO - prio;
-}
-
-static inline void dec_prio_bias(runqueue_t *rq, int prio)
-{
-	rq->prio_bias -= MAX_PRIO - prio;
-}
-
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
-{
-	rq->nr_running++;
-	if (rt_task(p)) {
-		if (p != rq->migration_thread)
-			/*
-			 * The migration thread does the actual balancing. Do
-			 * not bias by its priority as the ultra high priority
-			 * will skew balancing adversely.
-			 */
-			inc_prio_bias(rq, p->prio);
-	} else
-		inc_prio_bias(rq, p->static_prio);
-}
-
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
-{
-	rq->nr_running--;
-	if (rt_task(p)) {
-		if (p != rq->migration_thread)
-			dec_prio_bias(rq, p->prio);
-	} else
-		dec_prio_bias(rq, p->static_prio);
-}
-#else
-static inline void inc_prio_bias(runqueue_t *rq, int prio)
-{
-}
-
-static inline void dec_prio_bias(runqueue_t *rq, int prio)
-{
-}
-
-static inline void inc_nr_running(task_t *p, runqueue_t *rq)
-{
-	rq->nr_running++;
-}
-
-static inline void dec_nr_running(task_t *p, runqueue_t *rq)
-{
-	rq->nr_running--;
-}
-#endif
-
 /*
  * __activate_task - move a task to the runqueue.
  */
 static inline void __activate_task(task_t *p, runqueue_t *rq)
 {
 	enqueue_task(p, rq->active);
-	inc_nr_running(p, rq);
+	rq->nr_running++;
 }
 
 /*
@@ -739,7 +683,7 @@ static inline void __activate_task(task_t *p, runqueue_t *rq)
 static inline void __activate_idle_task(task_t *p, runqueue_t *rq)
 {
 	enqueue_task_head(p, rq->active);
-	inc_nr_running(p, rq);
+	rq->nr_running++;
 }
 
 static int recalc_task_prio(task_t *p, unsigned long long now)
@@ -863,7 +807,7 @@ static void activate_task(task_t *p, runqueue_t *rq, int local)
  */
 static void deactivate_task(struct task_struct *p, runqueue_t *rq)
 {
-	dec_nr_running(p, rq);
+	rq->nr_running--;
 	dequeue_task(p, p->array);
 	p->array = NULL;
 }
@@ -1007,61 +951,27 @@ void kick_process(task_t *p)
  * We want to under-estimate the load of migration sources, to
  * balance conservatively.
  */
-static unsigned long __source_load(int cpu, int type, enum idle_type idle)
+static inline unsigned long source_load(int cpu, int type)
 {
 	runqueue_t *rq = cpu_rq(cpu);
-	unsigned long running = rq->nr_running;
-	unsigned long source_load, cpu_load = rq->cpu_load[type-1],
-		load_now = running * SCHED_LOAD_SCALE;
-
+	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
 	if (type == 0)
-		source_load = load_now;
-	else
-		source_load = min(cpu_load, load_now);
-
-	if (running > 1 || (idle == NOT_IDLE && running))
-		/*
-		 * If we are busy rebalancing the load is biased by
-		 * priority to create 'nice' support across cpus. When
-		 * idle rebalancing we should only bias the source_load if
-		 * there is more than one task running on that queue to
-		 * prevent idle rebalance from trying to pull tasks from a
-		 * queue with only one running task.
-		 */
-		source_load = source_load * rq->prio_bias / running;
+		return load_now;
 
-	return source_load;
-}
-
-static inline unsigned long source_load(int cpu, int type)
-{
-	return __source_load(cpu, type, NOT_IDLE);
+	return min(rq->cpu_load[type-1], load_now);
 }
 
 /*
  * Return a high guess at the load of a migration-target cpu
  */
-static inline unsigned long __target_load(int cpu, int type, enum idle_type idle)
+static inline unsigned long target_load(int cpu, int type)
 {
 	runqueue_t *rq = cpu_rq(cpu);
-	unsigned long running = rq->nr_running;
-	unsigned long target_load, cpu_load = rq->cpu_load[type-1],
-		load_now = running * SCHED_LOAD_SCALE;
-
+	unsigned long load_now = rq->nr_running * SCHED_LOAD_SCALE;
 	if (type == 0)
-		target_load = load_now;
-	else
-		target_load = max(cpu_load, load_now);
+		return load_now;
 
-	if (running > 1 || (idle == NOT_IDLE && running))
-		target_load = target_load * rq->prio_bias / running;
-
-	return target_load;
-}
-
-static inline unsigned long target_load(int cpu, int type)
-{
-	return __target_load(cpu, type, NOT_IDLE);
+	return max(rq->cpu_load[type-1], load_now);
 }
 
 /*
@@ -1530,7 +1440,7 @@ void fastcall wake_up_new_task(task_t *p, unsigned long clone_flags)
 				list_add_tail(&p->run_list, &current->run_list);
 				p->array = current->array;
 				p->array->nr_active++;
-				inc_nr_running(p, rq);
+				rq->nr_running++;
 			}
 			set_need_resched();
 		} else
@@ -1875,9 +1785,9 @@ void pull_task(runqueue_t *src_rq, prio_array_t *src_array, task_t *p,
 	       runqueue_t *this_rq, prio_array_t *this_array, int this_cpu)
 {
 	dequeue_task(p, src_array);
-	dec_nr_running(p, src_rq);
+	src_rq->nr_running--;
 	set_task_cpu(p, this_cpu);
-	inc_nr_running(p, this_rq);
+	this_rq->nr_running++;
 	enqueue_task(p, this_array);
 	p->timestamp = (p->timestamp - src_rq->timestamp_last_tick)
 				+ this_rq->timestamp_last_tick;
@@ -2056,9 +1966,9 @@ find_busiest_group(struct sched_domain *sd, int this_cpu,
 
 			/* Bias balancing toward cpus of our domain */
 			if (local_group)
-				load = __target_load(i, load_idx, idle);
+				load = target_load(i, load_idx);
 			else
-				load = __source_load(i, load_idx, idle);
+				load = source_load(i, load_idx);
 
 			avg_load += load;
 		}
@@ -2171,7 +2081,7 @@ static runqueue_t *find_busiest_queue(struct sched_group *group,
 	int i;
 
 	for_each_cpu_mask(i, group->cpumask) {
-		load = __source_load(i, 0, idle);
+		load = source_load(i, 0);
 
 		if (load > max_load) {
 			max_load = load;
@@ -3571,10 +3481,8 @@ void set_user_nice(task_t *p, long nice)
 		goto out_unlock;
 	}
 	array = p->array;
-	if (array) {
+	if (array)
 		dequeue_task(p, array);
-		dec_prio_bias(rq, p->static_prio);
-	}
 
 	old_prio = p->prio;
 	new_prio = NICE_TO_PRIO(nice);
@@ -3584,7 +3492,6 @@ void set_user_nice(task_t *p, long nice)
 
 	if (array) {
 		enqueue_task(p, array);
-		inc_prio_bias(rq, p->static_prio);
 		/*
 		 * If the task increased its priority or is running and
 		 * lowered its priority, then reschedule its CPU:
-- 
cgit v1.1


From c22db9412736204b25aeba19d18e5ea922f7d632 Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Fri, 10 Feb 2006 01:51:11 -0800
Subject: [PATCH] prevent recursive panic from softlockup watchdog

When panic_timeout is zero, suppress triggering a nested panic due to soft
lockup detection.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/panic.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'kernel')

diff --git a/kernel/panic.c b/kernel/panic.c
index c5c4ab2..126dc43 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -130,6 +130,7 @@ NORET_TYPE void panic(const char * fmt, ...)
 #endif
 	local_irq_enable();
 	for (i = 0;;) {
+		touch_softlockup_watchdog();
 		i += panic_blink(i);
 		mdelay(1);
 		i++;
-- 
cgit v1.1


From 16bf134840da3920ded1290973c56ec214636f12 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 14 Feb 2006 13:52:59 -0800
Subject: [PATCH] compound page: no access_process_vm check

The PageCompound check before access_process_vm's set_page_dirty_lock is no
longer necessary, so remove it.  But leave the PageCompound checks in
bio_set_pages_dirty, dio_bio_complete and nfs_free_user_pages: at least some
of those were introduced as a little optimization on hugetlb pages.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/ptrace.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 5f33cdb..d2cf144 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -242,8 +242,7 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 		if (write) {
 			copy_to_user_page(vma, page, addr,
 					  maddr + offset, buf, bytes);
-			if (!PageCompound(page))
-				set_page_dirty_lock(page);
+			set_page_dirty_lock(page);
 		} else {
 			copy_from_user_page(vma, page, addr,
 					    buf, maddr + offset, bytes);
-- 
cgit v1.1


From d6077cb80cde4506720f9165eba99ee07438513f Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Tue, 14 Feb 2006 13:53:10 -0800
Subject: [PATCH] sched: revert "filter affine wakeups"

Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6:

    [PATCH] sched: filter affine wakeups

Apparently caused more than 10% performance regression for aim7 benchmark.
The setup in use is 16-cpu HP rx8620, 64Gb of memory and 12 MSA1000s with 144
disks.  Each disk is 72Gb with a single ext3 filesystem (courtesy of HP, who
supplied benchmark results).

The problem is, for aim7, the wake-up pattern is random, but it still needs
load balancing action in the wake-up path to achieve best performance.  With
the above commit, lack of load balancing hurts that workload.

However, for workloads like database transaction processing, the requirement
is exactly opposite.  In the wake up path, best performance is achieved with
absolutely zero load balancing.  We simply wake up the process on the CPU that
it was previously run.  Worst performance is obtained when we do load
balancing at wake up.

There isn't an easy way to auto detect the workload characteristics.  Ingo's
earlier patch that detects idle CPU and decide whether to load balance or not
doesn't perform with aim7 either since all CPUs are busy (it causes even
bigger perf.  regression).

Revert commit d7102e95b7b9c00277562c29aad421d2d521c5f6, which causes more
than 10% performance regression with aim7.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 87d93be..66d9572 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1204,9 +1204,6 @@ static int try_to_wake_up(task_t *p, unsigned int state, int sync)
 		}
 	}
 
-	if (p->last_waker_cpu != this_cpu)
-		goto out_set_cpu;
-
 	if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
 		goto out_set_cpu;
 
@@ -1277,8 +1274,6 @@ out_set_cpu:
 		cpu = task_cpu(p);
 	}
 
-	p->last_waker_cpu = this_cpu;
-
 out_activate:
 #endif /* CONFIG_SMP */
 	if (old_state == TASK_UNINTERRUPTIBLE) {
@@ -1360,12 +1355,9 @@ void fastcall sched_fork(task_t *p, int clone_flags)
 #ifdef CONFIG_SCHEDSTATS
 	memset(&p->sched_info, 0, sizeof(p->sched_info));
 #endif
-#if defined(CONFIG_SMP)
-	p->last_waker_cpu = cpu;
-#if defined(__ARCH_WANT_UNLOCKED_CTXSW)
+#if defined(CONFIG_SMP) && defined(__ARCH_WANT_UNLOCKED_CTXSW)
 	p->oncpu = 0;
 #endif
-#endif
 #ifdef CONFIG_PREEMPT
 	/* Want to start with kernel preemption disabled. */
 	task_thread_info(p)->preempt_count = 1;
-- 
cgit v1.1


From 06027bdd278a32a84b273e41db68a5db8ffd2bb6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 14 Feb 2006 13:53:15 -0800
Subject: [PATCH] hrtimer: round up relative start time on low-res arches

CONFIG_TIME_LOW_RES is a temporary way for architectures to signal that
they simply return xtime in do_gettimeoffset().  In this corner-case we
want to round up by resolution when starting a relative timer, to avoid
short timeouts.  This will go away with the GTOD framework.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Roman Zippel <zippel@linux-m68k.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/hrtimer.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 2b6e175..5ae51f1 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -418,8 +418,19 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
 	/* Switch the timer base, if necessary: */
 	new_base = switch_hrtimer_base(timer, base);
 
-	if (mode == HRTIMER_REL)
+	if (mode == HRTIMER_REL) {
 		tim = ktime_add(tim, new_base->get_time());
+		/*
+		 * CONFIG_TIME_LOW_RES is a temporary way for architectures
+		 * to signal that they simply return xtime in
+		 * do_gettimeoffset(). In this case we want to round up by
+		 * resolution when starting a relative timer, to avoid short
+		 * timeouts. This will go away with the GTOD framework.
+		 */
+#ifdef CONFIG_TIME_LOW_RES
+		tim = ktime_add(tim, base->resolution);
+#endif
+	}
 	timer->expires = tim;
 
 	enqueue_hrtimer(timer, new_base);
-- 
cgit v1.1


From 3f17da699431ec48540beabc55c54d4b5e66c8e7 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:13:24 +0300
Subject: [PATCH] fix kill_proc_info() vs CLONE_THREAD race

There is a window after copy_process() unlocks ->sighand.siglock
and before it adds the new thread to the thread list.

In that window __group_complete_signal(SIGKILL) will not see the
new thread yet, so this thread will start running while the whole
thread group was supposed to exit.

I beleive we have another good reason to place attach_pid(PID/TGID)
under ->sighand.siglock. We can do the same for

	release_task()->__unhash_process()

	de_thread()->switch_exec_pids()

After that we don't need tasklist_lock to iterate over the thread
list, and we can simplify things, see for example do_sigaction()
or sys_times().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 8e88b37..3683ce1 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1123,8 +1123,8 @@ static task_t *copy_process(unsigned long clone_flags,
 		p->real_parent = current;
 	p->parent = p->real_parent;
 
+	spin_lock(&current->sighand->siglock);
 	if (clone_flags & CLONE_THREAD) {
-		spin_lock(&current->sighand->siglock);
 		/*
 		 * Important: if an exit-all has been started then
 		 * do not create this new thread - the whole thread
@@ -1162,8 +1162,6 @@ static task_t *copy_process(unsigned long clone_flags,
 			 */
 			p->it_prof_expires = jiffies_to_cputime(1);
 		}
-
-		spin_unlock(&current->sighand->siglock);
 	}
 
 	/*
@@ -1189,6 +1187,7 @@ static task_t *copy_process(unsigned long clone_flags,
 
 	nr_threads++;
 	total_forks++;
+	spin_unlock(&current->sighand->siglock);
 	write_unlock_irq(&tasklist_lock);
 	proc_fork_connector(p);
 	return p;
-- 
cgit v1.1


From dadac81b1b86196fcc48fb87620403c4a7174f06 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:13:26 +0300
Subject: [PATCH] fix kill_proc_info() vs fork() theoretical race

copy_process:

	attach_pid(p, PIDTYPE_PID, p->pid);
	attach_pid(p, PIDTYPE_TGID, p->tgid);

What if kill_proc_info(p->pid) happens in between?

copy_process() holds current->sighand.siglock, so we are safe
in CLONE_THREAD case, because current->sighand == p->sighand.

Otherwise, p->sighand is unlocked, the new process is already
visible to the find_task_by_pid(), but have a copy of parent's
'struct pid' in ->pids[PIDTYPE_TGID].

This means that __group_complete_signal() may hang while doing

	do ... while (next_thread() != p)

We can solve this problem if we reverse these 2 attach_pid()s:

	attach_pid() does wmb()

	group_send_sig_info() calls spin_lock(), which
	provides a read barrier. // Yes ?

I don't think we can hit this race in practice, but still.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/fork.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/fork.c b/kernel/fork.c
index 3683ce1..fbea12d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1173,8 +1173,6 @@ static task_t *copy_process(unsigned long clone_flags,
 	if (unlikely(p->ptrace & PT_PTRACED))
 		__ptrace_link(p, current->parent);
 
-	attach_pid(p, PIDTYPE_PID, p->pid);
-	attach_pid(p, PIDTYPE_TGID, p->tgid);
 	if (thread_group_leader(p)) {
 		p->signal->tty = current->signal->tty;
 		p->signal->pgrp = process_group(current);
@@ -1184,6 +1182,8 @@ static task_t *copy_process(unsigned long clone_flags,
 		if (p->pid)
 			__get_cpu_var(process_counts)++;
 	}
+	attach_pid(p, PIDTYPE_TGID, p->tgid);
+	attach_pid(p, PIDTYPE_PID, p->pid);
 
 	nr_threads++;
 	total_forks++;
-- 
cgit v1.1


From 5ecfbae093f0c37311e89b29bfc0c9d586eace87 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Wed, 15 Feb 2006 22:50:10 +0300
Subject: [PATCH] fix zap_thread's ptrace related problems

1. The tracee can go from ptrace_stop() to do_signal_stop()
   after __ptrace_unlink(p).

2. It is unsafe to __ptrace_unlink(p) while p->parent may wait
   for tasklist_lock in ptrace_detach().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Roland McGrath <roland@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/ptrace.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

(limited to 'kernel')

diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index d2cf144..d95a72c 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -72,8 +72,8 @@ void ptrace_untrace(task_t *child)
  */
 void __ptrace_unlink(task_t *child)
 {
-	if (!child->ptrace)
-		BUG();
+	BUG_ON(!child->ptrace);
+
 	child->ptrace = 0;
 	if (!list_empty(&child->ptrace_list)) {
 		list_del_init(&child->ptrace_list);
@@ -184,22 +184,27 @@ bad:
 	return retval;
 }
 
+void __ptrace_detach(struct task_struct *child, unsigned int data)
+{
+	child->exit_code = data;
+	/* .. re-parent .. */
+	__ptrace_unlink(child);
+	/* .. and wake it up. */
+	if (child->exit_state != EXIT_ZOMBIE)
+		wake_up_process(child);
+}
+
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
 	if (!valid_signal(data))
-		return	-EIO;
+		return -EIO;
 
 	/* Architecture-specific hardware disable .. */
 	ptrace_disable(child);
 
-	/* .. re-parent .. */
-	child->exit_code = data;
-
 	write_lock_irq(&tasklist_lock);
-	__ptrace_unlink(child);
-	/* .. and wake it up. */
-	if (child->exit_state != EXIT_ZOMBIE)
-		wake_up_process(child);
+	if (child->ptrace)
+		__ptrace_detach(child, data);
 	write_unlock_irq(&tasklist_lock);
 
 	return 0;
-- 
cgit v1.1


From 06fed33849c13af637c4d09e9ba27828fac9edd5 Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Wed, 15 Feb 2006 15:17:38 -0800
Subject: [PATCH] cpuset: oops in exit on null cpuset fix

Fix a latent bug in cpuset_exit() handling.  If a task tried to allocate
memory after calling cpuset_exit(), it oops'd in
cpuset_update_task_memory_state() on a NULL cpuset pointer.

So set the exiting tasks cpuset to the root cpuset instead of to NULL.

A distro kernel hit this with an added kernel package that had just such a
hook (allocating memory) in the exit code path.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/cpuset.c | 35 ++++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index ba42b0a..12815d3 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1977,6 +1977,39 @@ void cpuset_fork(struct task_struct *child)
  * We don't need to task_lock() this reference to tsk->cpuset,
  * because tsk is already marked PF_EXITING, so attach_task() won't
  * mess with it, or task is a failed fork, never visible to attach_task.
+ *
+ * Hack:
+ *
+ *    Set the exiting tasks cpuset to the root cpuset (top_cpuset).
+ *
+ *    Don't leave a task unable to allocate memory, as that is an
+ *    accident waiting to happen should someone add a callout in
+ *    do_exit() after the cpuset_exit() call that might allocate.
+ *    If a task tries to allocate memory with an invalid cpuset,
+ *    it will oops in cpuset_update_task_memory_state().
+ *
+ *    We call cpuset_exit() while the task is still competent to
+ *    handle notify_on_release(), then leave the task attached to
+ *    the root cpuset (top_cpuset) for the remainder of its exit.
+ *
+ *    To do this properly, we would increment the reference count on
+ *    top_cpuset, and near the very end of the kernel/exit.c do_exit()
+ *    code we would add a second cpuset function call, to drop that
+ *    reference.  This would just create an unnecessary hot spot on
+ *    the top_cpuset reference count, to no avail.
+ *
+ *    Normally, holding a reference to a cpuset without bumping its
+ *    count is unsafe.   The cpuset could go away, or someone could
+ *    attach us to a different cpuset, decrementing the count on
+ *    the first cpuset that we never incremented.  But in this case,
+ *    top_cpuset isn't going away, and either task has PF_EXITING set,
+ *    which wards off any attach_task() attempts, or task is a failed
+ *    fork, never visible to attach_task.
+ *
+ *    Another way to do this would be to set the cpuset pointer
+ *    to NULL here, and check in cpuset_update_task_memory_state()
+ *    for a NULL pointer.  This hack avoids that NULL check, for no
+ *    cost (other than this way too long comment ;).
  **/
 
 void cpuset_exit(struct task_struct *tsk)
@@ -1984,7 +2017,7 @@ void cpuset_exit(struct task_struct *tsk)
 	struct cpuset *cs;
 
 	cs = tsk->cpuset;
-	tsk->cpuset = NULL;
+	tsk->cpuset = &top_cpuset;	/* Hack - see comment above */
 
 	if (notify_on_release(cs)) {
 		char *pathbuf = NULL;
-- 
cgit v1.1


From c8adb494a6df6b2be8e50a8dafd5bab231df3505 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 15 Feb 2006 15:17:42 -0800
Subject: [PATCH] swsusp: nuke noisy message

I get about 88 squillion of these when suspending an old ad450nx server.

Cc: Pavel Roskin <proski@gnu.org>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/snapshot.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c
index 41f6636..8d5a598 100644
--- a/kernel/power/snapshot.c
+++ b/kernel/power/snapshot.c
@@ -91,10 +91,8 @@ static int save_highmem_zone(struct zone *zone)
 		 * corrected eventually when the cases giving rise to this
 		 * are better understood.
 		 */
-		if (PageReserved(page)) {
-			printk("highmem reserved page?!\n");
+		if (PageReserved(page))
 			continue;
-		}
 		BUG_ON(PageNosave(page));
 		if (PageNosaveFree(page))
 			continue;
-- 
cgit v1.1


From a62eaf151d9cb478d127cfbc2e93c498869785b0 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Thu, 16 Feb 2006 23:41:58 +0100
Subject: [PATCH] x86_64: Add boot option to disable randomized mappings and
 cleanup

AMD SimNow!'s JIT doesn't like them at all in the guest. For distribution
installation it's easiest if it's a boot time option.

Also I moved the variable to a more appropiate place and make
it independent from sysctl

And marked __read_mostly which it is.

Signed-off-by: Andi Kleen <ak@suse.de>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 2 --
 1 file changed, 2 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 71dd6f6..7654d55 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -126,8 +126,6 @@ extern int sysctl_hz_timer;
 extern int acct_parm[];
 #endif
 
-int randomize_va_space = 1;
-
 static int parse_table(int __user *, int, void __user *, size_t __user *, void __user *, size_t,
 		       ctl_table *, void **);
 static int proc_doutsstring(ctl_table *table, int write, struct file *filp,
-- 
cgit v1.1


From 726c14bf499e91e7ede4f1728830aba05c675061 Mon Sep 17 00:00:00 2001
From: Paul Mackerras <paulus@samba.org>
Date: Fri, 17 Feb 2006 10:30:23 +1100
Subject: [PATCH] Provide an interface for getting the current tick length

This provides an interface for arch code to find out how many
nanoseconds are going to be added on to xtime by the next call to
do_timer.  The value returned is a fixed-point number in 52.12 format
in nanoseconds.  The reason for this format is that it gives the
full precision that the timekeeping code is using internally.

The motivation for this is to fix a problem that has arisen on 32-bit
powerpc in that the value returned by do_gettimeofday drifts apart
from xtime if NTP is being used.  PowerPC is now using a lockless
do_gettimeofday based on reading the timebase register and performing
some simple arithmetic.  (This method of getting the time is also
exported to userspace via the VDSO.)  However, the factor and offset
it uses were calculated based on the nominal tick length and weren't
being adjusted when NTP varied the tick length.

Note that 64-bit powerpc has had the lockless do_gettimeofday for a
long time now.  It also had an extremely hairy routine that got called
from the 32-bit compat routine for adjtimex, which adjusted the
factor and offset according to what it thought the timekeeping code
was going to do.  Not only was this only called if a 32-bit task did
adjtimex (i.e. not if a 64-bit task did adjtimex), it was also
duplicating computations from kernel/timer.c and it wasn't clear that
it was (still) correct.

The simple solution is to ask the timekeeping code how long the
current jiffy will be on each timer interrupt, after calling
do_timer.  If this jiffy will be a different length from the last one,
we then need to compute new values for the factor and offset used in
the lockless do_gettimeofday.  In this way we can keep xtime and
do_gettimeofday in sync, even when NTP is varying the tick length.

Note that when adjtimex varies the tick length, it almost always
introduces the variation from the next tick on.  The only case I could
see where adjtimex would vary the length of the current tick is when
an old-style adjtime adjustment is being cancelled.  (It's not clear
to me why the adjustment has to be cancelled immediately rather than
from the next tick on.)  Thus I don't see any real need for a hook in
adjtimex; the rare case of an old-style adjustment being cancelled can
be fixed up at the next tick.

Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: john stultz <johnstul@us.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/timer.c | 39 ++++++++++++++++++++++++++++++++++-----
 1 file changed, 34 insertions(+), 5 deletions(-)

(limited to 'kernel')

diff --git a/kernel/timer.c b/kernel/timer.c
index b9dad39..fe3a9a9 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -717,12 +717,16 @@ static void second_overflow(void)
 #endif
 }
 
-/* in the NTP reference this is called "hardclock()" */
-static void update_wall_time_one_tick(void)
+/*
+ * Returns how many microseconds we need to add to xtime this tick
+ * in doing an adjustment requested with adjtime.
+ */
+static long adjtime_adjustment(void)
 {
-	long time_adjust_step, delta_nsec;
+	long time_adjust_step;
 
-	if ((time_adjust_step = time_adjust) != 0 ) {
+	time_adjust_step = time_adjust;
+	if (time_adjust_step) {
 		/*
 		 * We are doing an adjtime thing.  Prepare time_adjust_step to
 		 * be within bounds.  Note that a positive time_adjust means we
@@ -733,10 +737,19 @@ static void update_wall_time_one_tick(void)
 		 */
 		time_adjust_step = min(time_adjust_step, (long)tickadj);
 		time_adjust_step = max(time_adjust_step, (long)-tickadj);
+	}
+	return time_adjust_step;
+}
 
+/* in the NTP reference this is called "hardclock()" */
+static void update_wall_time_one_tick(void)
+{
+	long time_adjust_step, delta_nsec;
+
+	time_adjust_step = adjtime_adjustment();
+	if (time_adjust_step)
 		/* Reduce by this step the amount of time left  */
 		time_adjust -= time_adjust_step;
-	}
 	delta_nsec = tick_nsec + time_adjust_step * 1000;
 	/*
 	 * Advance the phase, once it gets to one microsecond, then
@@ -759,6 +772,22 @@ static void update_wall_time_one_tick(void)
 }
 
 /*
+ * Return how long ticks are at the moment, that is, how much time
+ * update_wall_time_one_tick will add to xtime next time we call it
+ * (assuming no calls to do_adjtimex in the meantime).
+ * The return value is in fixed-point nanoseconds with SHIFT_SCALE-10
+ * bits to the right of the binary point.
+ * This function has no side-effects.
+ */
+u64 current_tick_length(void)
+{
+	long delta_nsec;
+
+	delta_nsec = tick_nsec + adjtime_adjustment() * 1000;
+	return ((u64) delta_nsec << (SHIFT_SCALE - 10)) + time_adj;
+}
+
+/*
  * Using a loop looks inefficient, but "ticks" is
  * usually just one (we shouldn't be losing ticks,
  * we're doing this this way mainly for interrupt
-- 
cgit v1.1


From 4bbf39c29bc3409d6454faf0dfa1b3b0aa2ac2af Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 17 Feb 2006 13:52:44 -0800
Subject: [PATCH] Introduce CONFIG_DEFAULT_MIGRATION_COST

Heiko Carstens <heiko.carstens@de.ibm.com> wrote:

  The boot sequence on s390 sometimes takes ages and we spend a very long
  time (up to one or two minutes) in calibrate_migration_costs.  The time
  spent there differs from boot to boot.  Also the calculated costs differ
  a lot.  I've seen differences by up to a factor of 15 (yes, factor not
  percent).  Also I doubt that making these measurements make much sense on
  a completely virtualized architecture where you cannot tell how much cpu
  time you will get anyway.

So introduce the CONFIG_DEFAULT_MIGRATION_COST method for an architecture
to set the scheduler migration costs.  This turns off automatic detection
of migration costs.  Makes sense on virtual platforms, where migration
costs are hard to measure accurately.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sched.c | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

(limited to 'kernel')

diff --git a/kernel/sched.c b/kernel/sched.c
index 66d9572..12d291b 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5058,7 +5058,18 @@ static void init_sched_build_groups(struct sched_group groups[], cpumask_t span,
 #define MAX_DOMAIN_DISTANCE 32
 
 static unsigned long long migration_cost[MAX_DOMAIN_DISTANCE] =
-		{ [ 0 ... MAX_DOMAIN_DISTANCE-1 ] = -1LL };
+		{ [ 0 ... MAX_DOMAIN_DISTANCE-1 ] =
+/*
+ * Architectures may override the migration cost and thus avoid
+ * boot-time calibration. Unit is nanoseconds. Mostly useful for
+ * virtualized hardware:
+ */
+#ifdef CONFIG_DEFAULT_MIGRATION_COST
+			CONFIG_DEFAULT_MIGRATION_COST
+#else
+			-1LL
+#endif
+};
 
 /*
  * Allow override of migration cost - in units of microseconds.
-- 
cgit v1.1


From a8534adb74e23374889b84b3d97eb18da542a1b5 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Fri, 17 Feb 2006 13:52:51 -0800
Subject: [PATCH] swsusp: fix breakage with swap on LVM

Restore the compatibility with the older code and make it possible to
suspend if the kernel command line doesn't contain the "resume=" argument

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Cc: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/power/swsusp.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index 4e90905..2d9d08f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -153,13 +153,11 @@ static int swsusp_swap_check(void) /* This is called before saving image */
 {
 	int i;
 
-	if (!swsusp_resume_device)
-		return -ENODEV;
 	spin_lock(&swap_lock);
 	for (i = 0; i < MAX_SWAPFILES; i++) {
 		if (!(swap_info[i].flags & SWP_WRITEOK))
 			continue;
-		if (is_resume_device(swap_info + i)) {
+		if (!swsusp_resume_device || is_resume_device(swap_info + i)) {
 			spin_unlock(&swap_lock);
 			root_swap = i;
 			return 0;
-- 
cgit v1.1


From ef20c8c197df9b8d5bd4af0679123826da028861 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 18 Feb 2006 15:41:50 -0500
Subject: [PATCH] GFP_KERNEL allocations in atomic (auditsc)

audit_log_exit() is called from atomic contexts and gets explicit
gfp_mask argument; it should use it for all allocations rather
than doing some with gfp_mask and some with GFP_KERNEL.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/auditsc.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'kernel')

diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 685c251..d7e7e63 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -841,7 +841,7 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
 
 	for (aux = context->aux; aux; aux = aux->next) {
 
-		ab = audit_log_start(context, GFP_KERNEL, aux->type);
+		ab = audit_log_start(context, gfp_mask, aux->type);
 		if (!ab)
 			continue; /* audit_panic has been called */
 
@@ -878,14 +878,14 @@ static void audit_log_exit(struct audit_context *context, gfp_t gfp_mask)
 	}
 
 	if (context->pwd && context->pwdmnt) {
-		ab = audit_log_start(context, GFP_KERNEL, AUDIT_CWD);
+		ab = audit_log_start(context, gfp_mask, AUDIT_CWD);
 		if (ab) {
 			audit_log_d_path(ab, "cwd=", context->pwd, context->pwdmnt);
 			audit_log_end(ab);
 		}
 	}
 	for (i = 0; i < context->name_count; i++) {
-		ab = audit_log_start(context, GFP_KERNEL, AUDIT_PATH);
+		ab = audit_log_start(context, gfp_mask, AUDIT_PATH);
 		if (!ab)
 			continue; /* audit_panic has been called */
 
-- 
cgit v1.1


From c255d844dd73616f23e4b4733edcc2e5fa4042b2 Mon Sep 17 00:00:00 2001
From: Pavel Machek <pavel@ucw.cz>
Date: Mon, 20 Feb 2006 18:27:58 -0800
Subject: [PATCH] suspend-to-ram: allow video options to be set at runtime

Currently, acpi video options can only be set on kernel command line.  That's
little inflexible; I'd like userland s2ram application that just works, and
modifying kernel command line according to whitelist is not fun.  It is better
to just allow s2ram application to set video options just before suspend
(according to the whitelist).

This implements sysctl to allow setting suspend video options without reboot.

(akpm: Documentation updates for this new sysctl are pending..)

Signed-off-by: Pavel Machek <pavel@suse.cz>
Cc: "Brown, Len" <len.brown@intel.com>
Cc: "Antonino A. Daplas" <adaplas@pol.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7654d55..ebc41bf 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -44,14 +44,12 @@
 #include <linux/limits.h>
 #include <linux/dcache.h>
 #include <linux/syscalls.h>
+#include <linux/nfs_fs.h>
+#include <linux/acpi.h>
 
 #include <asm/uaccess.h>
 #include <asm/processor.h>
 
-#ifdef CONFIG_ROOT_NFS
-#include <linux/nfs_fs.h>
-#endif
-
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
@@ -656,6 +654,16 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#ifdef CONFIG_ACPI_SLEEP
+	{
+		.ctl_name	= KERN_ACPI_VIDEO_FLAGS,
+		.procname	= "acpi_video_flags",
+		.data		= &acpi_video_flags,
+		.maxlen		= sizeof (unsigned long),
+		.mode		= 0644,
+		.proc_handler	= &proc_dointvec,
+	},
+#endif
 	{ .ctl_name = 0 }
 };
 
-- 
cgit v1.1


From 7a9166e3b037296366cea6f3c97f705d33e209e6 Mon Sep 17 00:00:00 2001
From: Luke Yang <luke.adi@gmail.com>
Date: Mon, 20 Feb 2006 18:28:07 -0800
Subject: [PATCH] Fix undefined symbols for nommu architecture

Signed-off-by: Luke Yang <luke.adi@gmail.com>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sysctl.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index ebc41bf..c05a2b7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -636,6 +636,7 @@ static ctl_table kern_table[] = {
 		.proc_handler	= &proc_dointvec,
 	},
 #endif
+#if defined(CONFIG_MMU)
 	{
 		.ctl_name	= KERN_RANDOMIZE,
 		.procname	= "randomize_va_space",
@@ -644,6 +645,7 @@ static ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= &proc_dointvec,
 	},
+#endif
 #if defined(CONFIG_S390) && defined(CONFIG_SMP)
 	{
 		.ctl_name	= KERN_SPIN_RETRY,
-- 
cgit v1.1


From 7fd105e758c8d746d57ab7e77f100e096bf153c8 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 20 Feb 2006 18:28:08 -0800
Subject: [PATCH] Fix compile for CONFIG_SYSVIPC=n or CONFIG_SYSCTL=n

The compat syscalls are added to sys_ni.c since they are not defined if the
above CONFIG options are off.  Also, nfs would not build with CONFIG_SYSCTL
off.

Noticed by Arthur Othieno.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/sys_ni.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'kernel')

diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 17313b9..1067090 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -104,6 +104,8 @@ cond_syscall(sys_setreuid16);
 cond_syscall(sys_setuid16);
 cond_syscall(sys_vm86old);
 cond_syscall(sys_vm86);
+cond_syscall(compat_sys_ipc);
+cond_syscall(compat_sys_sysctl);
 
 /* arch-specific weak syscall entries */
 cond_syscall(sys_pciconfig_read);
-- 
cgit v1.1


From 5914811acf36c3ff091f860a6964808f668f27d0 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bj=F6rn=20Steinbrink?= <B.Steinbrink@gmx.de>
Date: Sat, 18 Feb 2006 18:12:43 +0100
Subject: [PATCH] kjournald keeps reference to namespace
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In daemonize() a new thread gets cleaned up and 'merged' with init_task.
The current fs_struct is handled there, but not the current namespace.

This adds the namespace part.

[ Eric Biederman pointed out the namespace wrappers, and also notes that
  we can't ever count on using our parents namespace because we already
  have called exit_fs(), which is the only way to the namespace from a
  process. ]

Signed-off-by: Björn Steinbrink <B.Steinbrink@gmx.de>
Acked-by: Eric Biederman <ebiederm@xmission.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 kernel/exit.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'kernel')

diff --git a/kernel/exit.c b/kernel/exit.c
index 93cee36..531aadc 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -360,6 +360,9 @@ void daemonize(const char *name, ...)
 	fs = init_task.fs;
 	current->fs = fs;
 	atomic_inc(&fs->count);
+	exit_namespace(current);
+	current->namespace = init_task.namespace;
+	get_namespace(current->namespace);
  	exit_files(current);
 	current->files = init_task.files;
 	atomic_inc(&current->files->count);
-- 
cgit v1.1