summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/hrtimer.c24
-rw-r--r--kernel/power/Kconfig37
-rw-r--r--kernel/rcutorture.c14
-rw-r--r--kernel/sched.c155
-rw-r--r--kernel/time/clocksource.c8
-rw-r--r--kernel/time/tick-broadcast.c36
-rw-r--r--kernel/time/tick-common.c32
-rw-r--r--kernel/time/tick-internal.h4
-rw-r--r--kernel/timer.c16
9 files changed, 132 insertions, 194 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 476cb0c..ec4cb9f 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -540,19 +540,19 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
/*
* Switch to high resolution mode
*/
-static void hrtimer_switch_to_hres(void)
+static int hrtimer_switch_to_hres(void)
{
struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
unsigned long flags;
if (base->hres_active)
- return;
+ return 1;
local_irq_save(flags);
if (tick_init_highres()) {
local_irq_restore(flags);
- return;
+ return 0;
}
base->hres_active = 1;
base->clock_base[CLOCK_REALTIME].resolution = KTIME_HIGH_RES;
@@ -565,13 +565,14 @@ static void hrtimer_switch_to_hres(void)
local_irq_restore(flags);
printk(KERN_INFO "Switched to high resolution mode on CPU %d\n",
smp_processor_id());
+ return 1;
}
#else
static inline int hrtimer_hres_active(void) { return 0; }
static inline int hrtimer_is_hres_enabled(void) { return 0; }
-static inline void hrtimer_switch_to_hres(void) { }
+static inline int hrtimer_switch_to_hres(void) { return 0; }
static inline void hrtimer_force_reprogram(struct hrtimer_cpu_base *base) { }
static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
struct hrtimer_clock_base *base)
@@ -1130,6 +1131,9 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base,
if (base->softirq_time.tv64 <= timer->expires.tv64)
break;
+#ifdef CONFIG_HIGH_RES_TIMERS
+ WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ);
+#endif
timer_stats_account_hrtimer(timer);
fn = timer->function;
@@ -1173,7 +1177,8 @@ void hrtimer_run_queues(void)
* deadlock vs. xtime_lock.
*/
if (tick_check_oneshot_change(!hrtimer_is_hres_enabled()))
- hrtimer_switch_to_hres();
+ if (hrtimer_switch_to_hres())
+ return;
hrtimer_get_softirq_time(cpu_base);
@@ -1355,17 +1360,16 @@ static void migrate_hrtimers(int cpu)
tick_cancel_sched_timer(cpu);
local_irq_disable();
-
- spin_lock(&new_base->lock);
- spin_lock(&old_base->lock);
+ double_spin_lock(&new_base->lock, &old_base->lock,
+ smp_processor_id() < cpu);
for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) {
migrate_hrtimer_list(&old_base->clock_base[i],
&new_base->clock_base[i]);
}
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
+ double_spin_unlock(&new_base->lock, &old_base->lock,
+ smp_processor_id() < cpu);
local_irq_enable();
put_cpu_var(hrtimer_bases);
}
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 95f6657..51a4dd0 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -81,29 +81,34 @@ config SOFTWARE_SUSPEND
bool "Software Suspend"
depends on PM && SWAP && ((X86 && (!SMP || SUSPEND_SMP)) || ((FRV || PPC32) && !SMP))
---help---
- Enable the possibility of suspending the machine.
- It doesn't need ACPI or APM.
- You may suspend your machine by 'swsusp' or 'shutdown -z <time>'
- (patch for sysvinit needed).
+ Enable the suspend to disk (STD) functionality.
- It creates an image which is saved in your active swap. Upon next
+ You can suspend your machine with 'echo disk > /sys/power/state'.
+ Alternatively, you can use the additional userland tools available
+ from <http://suspend.sf.net>.
+
+ In principle it does not require ACPI or APM, although for example
+ ACPI will be used if available.
+
+ It creates an image which is saved in your active swap. Upon the next
boot, pass the 'resume=/dev/swappartition' argument to the kernel to
have it detect the saved image, restore memory state from it, and
continue to run as before. If you do not want the previous state to
- be reloaded, then use the 'noresume' kernel argument. However, note
- that your partitions will be fsck'd and you must re-mkswap your swap
- partitions. It does not work with swap files.
+ be reloaded, then use the 'noresume' kernel command line argument.
+ Note, however, that fsck will be run on your filesystems and you will
+ need to run mkswap against the swap partition used for the suspend.
- Right now you may boot without resuming and then later resume but
- in meantime you cannot use those swap partitions/files which were
- involved in suspending. Also in this case there is a risk that buffers
- on disk won't match with saved ones.
+ It also works with swap files to a limited extent (for details see
+ <file:Documentation/power/swsusp-and-swap-files.txt>).
- For more information take a look at <file:Documentation/power/swsusp.txt>.
+ Right now you may boot without resuming and resume later but in the
+ meantime you cannot use the swap partition(s)/file(s) involved in
+ suspending. Also in this case you must not use the filesystems
+ that were mounted before the suspend. In particular, you MUST NOT
+ MOUNT any journaled filesystems mounted before the suspend or they
+ will get corrupted in a nasty way.
- (For now, swsusp is incompatible with PAE aka HIGHMEM_64G on i386.
- we need identity mapping for resume to work, and that is trivial
- to get with 4MB pages, but less than trivial on PAE).
+ For more information take a look at <file:Documentation/power/swsusp.txt>.
config PM_STD_PARTITION
string "Default resume partition"
diff --git a/kernel/rcutorture.c b/kernel/rcutorture.c
index 482b11f..bcd14e8 100644
--- a/kernel/rcutorture.c
+++ b/kernel/rcutorture.c
@@ -60,19 +60,19 @@ static int test_no_idle_hz; /* Test RCU's support for tickless idle CPUs. */
static int shuffle_interval = 5; /* Interval between shuffles (in sec)*/
static char *torture_type = "rcu"; /* What RCU implementation to torture. */
-module_param(nreaders, int, 0);
+module_param(nreaders, int, 0444);
MODULE_PARM_DESC(nreaders, "Number of RCU reader threads");
-module_param(nfakewriters, int, 0);
+module_param(nfakewriters, int, 0444);
MODULE_PARM_DESC(nfakewriters, "Number of RCU fake writer threads");
-module_param(stat_interval, int, 0);
+module_param(stat_interval, int, 0444);
MODULE_PARM_DESC(stat_interval, "Number of seconds between stats printk()s");
-module_param(verbose, bool, 0);
+module_param(verbose, bool, 0444);
MODULE_PARM_DESC(verbose, "Enable verbose debugging printk()s");
-module_param(test_no_idle_hz, bool, 0);
+module_param(test_no_idle_hz, bool, 0444);
MODULE_PARM_DESC(test_no_idle_hz, "Test support for tickless idle CPUs");
-module_param(shuffle_interval, int, 0);
+module_param(shuffle_interval, int, 0444);
MODULE_PARM_DESC(shuffle_interval, "Number of seconds between shuffles");
-module_param(torture_type, charp, 0);
+module_param(torture_type, charp, 0444);
MODULE_PARM_DESC(torture_type, "Type of RCU to torture (rcu, rcu_bh, srcu)");
#define TORTURE_FLAG "-torture:"
diff --git a/kernel/sched.c b/kernel/sched.c
index 5f102e6..a4ca632 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3006,23 +3006,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
}
#endif
-static inline void wake_priority_sleeper(struct rq *rq)
-{
-#ifdef CONFIG_SCHED_SMT
- if (!rq->nr_running)
- return;
-
- spin_lock(&rq->lock);
- /*
- * If an SMT sibling task has been put to sleep for priority
- * reasons reschedule the idle task to see if it can now run.
- */
- if (rq->nr_running)
- resched_task(rq->idle);
- spin_unlock(&rq->lock);
-#endif
-}
-
DEFINE_PER_CPU(struct kernel_stat, kstat);
EXPORT_PER_CPU_SYMBOL(kstat);
@@ -3239,10 +3222,7 @@ void scheduler_tick(void)
update_cpu_clock(p, rq, now);
- if (p == rq->idle)
- /* Task on the idle queue */
- wake_priority_sleeper(rq);
- else
+ if (p != rq->idle)
task_running_tick(rq, p);
#ifdef CONFIG_SMP
update_load(rq);
@@ -3251,136 +3231,6 @@ void scheduler_tick(void)
#endif
}
-#ifdef CONFIG_SCHED_SMT
-static inline void wakeup_busy_runqueue(struct rq *rq)
-{
- /* If an SMT runqueue is sleeping due to priority reasons wake it up */
- if (rq->curr == rq->idle && rq->nr_running)
- resched_task(rq->idle);
-}
-
-/*
- * Called with interrupt disabled and this_rq's runqueue locked.
- */
-static void wake_sleeping_dependent(int this_cpu)
-{
- struct sched_domain *tmp, *sd = NULL;
- int i;
-
- for_each_domain(this_cpu, tmp) {
- if (tmp->flags & SD_SHARE_CPUPOWER) {
- sd = tmp;
- break;
- }
- }
-
- if (!sd)
- return;
-
- for_each_cpu_mask(i, sd->span) {
- struct rq *smt_rq = cpu_rq(i);
-
- if (i == this_cpu)
- continue;
- if (unlikely(!spin_trylock(&smt_rq->lock)))
- continue;
-
- wakeup_busy_runqueue(smt_rq);
- spin_unlock(&smt_rq->lock);
- }
-}
-
-/*
- * number of 'lost' timeslices this task wont be able to fully
- * utilize, if another task runs on a sibling. This models the
- * slowdown effect of other tasks running on siblings:
- */
-static inline unsigned long
-smt_slice(struct task_struct *p, struct sched_domain *sd)
-{
- return p->time_slice * (100 - sd->per_cpu_gain) / 100;
-}
-
-/*
- * To minimise lock contention and not have to drop this_rq's runlock we only
- * trylock the sibling runqueues and bypass those runqueues if we fail to
- * acquire their lock. As we only trylock the normal locking order does not
- * need to be obeyed.
- */
-static int
-dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
-{
- struct sched_domain *tmp, *sd = NULL;
- int ret = 0, i;
-
- /* kernel/rt threads do not participate in dependent sleeping */
- if (!p->mm || rt_task(p))
- return 0;
-
- for_each_domain(this_cpu, tmp) {
- if (tmp->flags & SD_SHARE_CPUPOWER) {
- sd = tmp;
- break;
- }
- }
-
- if (!sd)
- return 0;
-
- for_each_cpu_mask(i, sd->span) {
- struct task_struct *smt_curr;
- struct rq *smt_rq;
-
- if (i == this_cpu)
- continue;
-
- smt_rq = cpu_rq(i);
- if (unlikely(!spin_trylock(&smt_rq->lock)))
- continue;
-
- smt_curr = smt_rq->curr;
-
- if (!smt_curr->mm)
- goto unlock;
-
- /*
- * If a user task with lower static priority than the
- * running task on the SMT sibling is trying to schedule,
- * delay it till there is proportionately less timeslice
- * left of the sibling task to prevent a lower priority
- * task from using an unfair proportion of the
- * physical cpu's resources. -ck
- */
- if (rt_task(smt_curr)) {
- /*
- * With real time tasks we run non-rt tasks only
- * per_cpu_gain% of the time.
- */
- if ((jiffies % DEF_TIMESLICE) >
- (sd->per_cpu_gain * DEF_TIMESLICE / 100))
- ret = 1;
- } else {
- if (smt_curr->static_prio < p->static_prio &&
- !TASK_PREEMPTS_CURR(p, smt_rq) &&
- smt_slice(smt_curr, sd) > task_timeslice(p))
- ret = 1;
- }
-unlock:
- spin_unlock(&smt_rq->lock);
- }
- return ret;
-}
-#else
-static inline void wake_sleeping_dependent(int this_cpu)
-{
-}
-static inline int
-dependent_sleeper(int this_cpu, struct rq *this_rq, struct task_struct *p)
-{
- return 0;
-}
-#endif
-
#if defined(CONFIG_PREEMPT) && defined(CONFIG_DEBUG_PREEMPT)
void fastcall add_preempt_count(int val)
@@ -3507,7 +3357,6 @@ need_resched_nonpreemptible:
if (!rq->nr_running) {
next = rq->idle;
rq->expired_timestamp = 0;
- wake_sleeping_dependent(cpu);
goto switch_tasks;
}
}
@@ -3547,8 +3396,6 @@ need_resched_nonpreemptible:
}
}
next->sleep_type = SLEEP_NORMAL;
- if (rq->nr_running == 1 && dependent_sleeper(cpu, rq, next))
- next = rq->idle;
switch_tasks:
if (next == rq->idle)
schedstat_inc(rq, sched_goidle);
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index 193a079..5b0e46b 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -55,16 +55,18 @@ static DEFINE_SPINLOCK(clocksource_lock);
static char override_name[32];
static int finished_booting;
-/* clocksource_done_booting - Called near the end of bootup
+/* clocksource_done_booting - Called near the end of core bootup
*
- * Hack to avoid lots of clocksource churn at boot time
+ * Hack to avoid lots of clocksource churn at boot time.
+ * We use fs_initcall because we want this to start before
+ * device_initcall but after subsys_initcall.
*/
static int __init clocksource_done_booting(void)
{
finished_booting = 1;
return 0;
}
-late_initcall(clocksource_done_booting);
+fs_initcall(clocksource_done_booting);
#ifdef CONFIG_CLOCKSOURCE_WATCHDOG
static LIST_HEAD(watchdog_list);
diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c
index 12b3efe..5567745 100644
--- a/kernel/time/tick-broadcast.c
+++ b/kernel/time/tick-broadcast.c
@@ -284,6 +284,42 @@ void tick_shutdown_broadcast(unsigned int *cpup)
spin_unlock_irqrestore(&tick_broadcast_lock, flags);
}
+void tick_suspend_broadcast(void)
+{
+ struct clock_event_device *bc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+ bc = tick_broadcast_device.evtdev;
+ if (bc && tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC)
+ clockevents_set_mode(bc, CLOCK_EVT_MODE_SHUTDOWN);
+
+ spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+}
+
+int tick_resume_broadcast(void)
+{
+ struct clock_event_device *bc;
+ unsigned long flags;
+ int broadcast = 0;
+
+ spin_lock_irqsave(&tick_broadcast_lock, flags);
+
+ bc = tick_broadcast_device.evtdev;
+ if (bc) {
+ if (tick_broadcast_device.mode == TICKDEV_MODE_PERIODIC &&
+ !cpus_empty(tick_broadcast_mask))
+ tick_broadcast_start_periodic(bc);
+
+ broadcast = cpu_isset(smp_processor_id(), tick_broadcast_mask);
+ }
+ spin_unlock_irqrestore(&tick_broadcast_lock, flags);
+
+ return broadcast;
+}
+
+
#ifdef CONFIG_TICK_ONESHOT
static cpumask_t tick_broadcast_oneshot_mask;
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0986a2b..43ba1bd 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -298,6 +298,28 @@ static void tick_shutdown(unsigned int *cpup)
spin_unlock_irqrestore(&tick_device_lock, flags);
}
+static void tick_suspend_periodic(void)
+{
+ struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tick_device_lock, flags);
+ if (td->mode == TICKDEV_MODE_PERIODIC)
+ clockevents_set_mode(td->evtdev, CLOCK_EVT_MODE_SHUTDOWN);
+ spin_unlock_irqrestore(&tick_device_lock, flags);
+}
+
+static void tick_resume_periodic(void)
+{
+ struct tick_device *td = &__get_cpu_var(tick_cpu_device);
+ unsigned long flags;
+
+ spin_lock_irqsave(&tick_device_lock, flags);
+ if (td->mode == TICKDEV_MODE_PERIODIC)
+ tick_setup_periodic(td->evtdev, 0);
+ spin_unlock_irqrestore(&tick_device_lock, flags);
+}
+
/*
* Notification about clock event devices
*/
@@ -325,6 +347,16 @@ static int tick_notify(struct notifier_block *nb, unsigned long reason,
tick_shutdown(dev);
break;
+ case CLOCK_EVT_NOTIFY_SUSPEND:
+ tick_suspend_periodic();
+ tick_suspend_broadcast();
+ break;
+
+ case CLOCK_EVT_NOTIFY_RESUME:
+ if (!tick_resume_broadcast())
+ tick_resume_periodic();
+ break;
+
default:
break;
}
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index 54861a0..75890ef 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -67,6 +67,8 @@ extern int tick_check_broadcast_device(struct clock_event_device *dev);
extern int tick_is_broadcast_device(struct clock_event_device *dev);
extern void tick_broadcast_on_off(unsigned long reason, int *oncpu);
extern void tick_shutdown_broadcast(unsigned int *cpup);
+extern void tick_suspend_broadcast(void);
+extern int tick_resume_broadcast(void);
extern void
tick_set_periodic_handler(struct clock_event_device *dev, int broadcast);
@@ -90,6 +92,8 @@ static inline int tick_device_uses_broadcast(struct clock_event_device *dev,
static inline void tick_do_periodic_broadcast(struct clock_event_device *d) { }
static inline void tick_broadcast_on_off(unsigned long reason, int *oncpu) { }
static inline void tick_shutdown_broadcast(unsigned int *cpup) { }
+static inline void tick_suspend_broadcast(void) { }
+static inline int tick_resume_broadcast(void) { return 0; }
/*
* Set the periodic handler in non broadcast mode
diff --git a/kernel/timer.c b/kernel/timer.c
index 6663a87..797cccb 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -862,6 +862,8 @@ int do_settimeofday(struct timespec *tv)
clock->error = 0;
ntp_clear();
+ update_vsyscall(&xtime, clock);
+
write_sequnlock_irqrestore(&xtime_lock, flags);
/* signal hrtimers about time change */
@@ -997,6 +999,9 @@ static int timekeeping_resume(struct sys_device *dev)
write_sequnlock_irqrestore(&xtime_lock, flags);
touch_softlockup_watchdog();
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL);
+
/* Resume hrtimers */
clock_was_set();
@@ -1011,6 +1016,9 @@ static int timekeeping_suspend(struct sys_device *dev, pm_message_t state)
timekeeping_suspended = 1;
timekeeping_suspend_time = read_persistent_clock();
write_sequnlock_irqrestore(&xtime_lock, flags);
+
+ clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
+
return 0;
}
@@ -1651,8 +1659,8 @@ static void __devinit migrate_timers(int cpu)
new_base = get_cpu_var(tvec_bases);
local_irq_disable();
- spin_lock(&new_base->lock);
- spin_lock(&old_base->lock);
+ double_spin_lock(&new_base->lock, &old_base->lock,
+ smp_processor_id() < cpu);
BUG_ON(old_base->running_timer);
@@ -1665,8 +1673,8 @@ static void __devinit migrate_timers(int cpu)
migrate_timer_list(new_base, old_base->tv5.vec + i);
}
- spin_unlock(&old_base->lock);
- spin_unlock(&new_base->lock);
+ double_spin_unlock(&new_base->lock, &old_base->lock,
+ smp_processor_id() < cpu);
local_irq_enable();
put_cpu_var(tvec_bases);
}
OpenPOWER on IntegriCloud