summaryrefslogtreecommitdiffstats
path: root/include/linux/sched.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/sched.h')
-rw-r--r--include/linux/sched.h120
1 files changed, 86 insertions, 34 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index e9f6935..a440cf1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*!!(
#define set_task_state(tsk, state_value) \
do { \
(tsk)->task_state_change = _THIS_IP_; \
- smp_store_mb((tsk)->state, (state_value)); \
+ smp_store_mb((tsk)->state, (state_value)); \
} while (0)
-/*
- * set_current_state() includes a barrier so that the write of current->state
- * is correctly serialised wrt the caller's subsequent test of whether to
- * actually sleep:
- *
- * set_current_state(TASK_UNINTERRUPTIBLE);
- * if (do_i_need_to_sleep())
- * schedule();
- *
- * If the caller does not need such serialisation then use __set_current_state()
- */
#define __set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
@@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*!!(
#define set_current_state(state_value) \
do { \
current->task_state_change = _THIS_IP_; \
- smp_store_mb(current->state, (state_value)); \
+ smp_store_mb(current->state, (state_value)); \
} while (0)
#else
+/*
+ * @tsk had better be current, or you get to keep the pieces.
+ *
+ * The only reason is that computing current can be more expensive than
+ * using a pointer that's already available.
+ *
+ * Therefore, see set_current_state().
+ */
#define __set_task_state(tsk, state_value) \
do { (tsk)->state = (state_value); } while (0)
#define set_task_state(tsk, state_value) \
@@ -299,11 +296,34 @@ extern char ___assert_task_state[1 - 2*!!(
* is correctly serialised wrt the caller's subsequent test of whether to
* actually sleep:
*
+ * for (;;) {
* set_current_state(TASK_UNINTERRUPTIBLE);
- * if (do_i_need_to_sleep())
- * schedule();
+ * if (!need_sleep)
+ * break;
+ *
+ * schedule();
+ * }
+ * __set_current_state(TASK_RUNNING);
+ *
+ * If the caller does not need such serialisation (because, for instance, the
+ * condition test and condition change and wakeup are under the same lock) then
+ * use __set_current_state().
+ *
+ * The above is typically ordered against the wakeup, which does:
+ *
+ * need_sleep = false;
+ * wake_up_state(p, TASK_UNINTERRUPTIBLE);
+ *
+ * Where wake_up_state() (and all other wakeup primitives) imply enough
+ * barriers to order the store of the variable against wakeup.
*
- * If the caller does not need such serialisation then use __set_current_state()
+ * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
+ * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
+ * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
+ *
+ * This is obviously fine, since they both store the exact same value.
+ *
+ * Also see the comments of try_to_wake_up().
*/
#define __set_current_state(state_value) \
do { current->state = (state_value); } while (0)
@@ -520,7 +540,11 @@ static inline int get_dumpable(struct mm_struct *mm)
/* leave room for more dump flags */
#define MMF_VM_MERGEABLE 16 /* KSM may merge identical pages */
#define MMF_VM_HUGEPAGE 17 /* set when VM_HUGEPAGE is set on vma */
-#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
+/*
+ * This one-shot flag is dropped due to necessity of changing exe once again
+ * on NFS restore
+ */
+//#define MMF_EXE_FILE_CHANGED 18 /* see prctl_set_mm_exe_file() */
#define MMF_HAS_UPROBES 19 /* has uprobes */
#define MMF_RECALC_UPROBES 20 /* MMF_HAS_UPROBES can be wrong */
@@ -989,7 +1013,7 @@ enum cpu_idle_type {
* already in a wake queue, the wakeup will happen soon and the second
* waker can just skip it.
*
- * The WAKE_Q macro declares and initializes the list head.
+ * The DEFINE_WAKE_Q macro declares and initializes the list head.
* wake_up_q() does NOT reinitialize the list; it's expected to be
* called near the end of a function, where the fact that the queue is
* not used again will be easy to see by inspection.
@@ -1009,7 +1033,7 @@ struct wake_q_head {
#define WAKE_Q_TAIL ((struct wake_q_node *) 0x01)
-#define WAKE_Q(name) \
+#define DEFINE_WAKE_Q(name) \
struct wake_q_head name = { WAKE_Q_TAIL, &name.first }
extern void wake_q_add(struct wake_q_head *head,
@@ -1057,6 +1081,8 @@ static inline int cpu_numa_flags(void)
}
#endif
+extern int arch_asym_cpu_priority(int cpu);
+
struct sched_domain_attr {
int relax_domain_level;
};
@@ -1627,7 +1653,10 @@ struct task_struct {
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */
- cputime_t utime, stime, utimescaled, stimescaled;
+ cputime_t utime, stime;
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
+ cputime_t utimescaled, stimescaled;
+#endif
cputime_t gtime;
struct prev_cputime prev_cputime;
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
@@ -2221,40 +2250,45 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask);
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
extern void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime);
-extern void task_cputime_scaled(struct task_struct *t,
- cputime_t *utimescaled, cputime_t *stimescaled);
extern cputime_t task_gtime(struct task_struct *t);
#else
static inline void task_cputime(struct task_struct *t,
cputime_t *utime, cputime_t *stime)
{
- if (utime)
- *utime = t->utime;
- if (stime)
- *stime = t->stime;
+ *utime = t->utime;
+ *stime = t->stime;
}
+static inline cputime_t task_gtime(struct task_struct *t)
+{
+ return t->gtime;
+}
+#endif
+
+#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
static inline void task_cputime_scaled(struct task_struct *t,
cputime_t *utimescaled,
cputime_t *stimescaled)
{
- if (utimescaled)
- *utimescaled = t->utimescaled;
- if (stimescaled)
- *stimescaled = t->stimescaled;
+ *utimescaled = t->utimescaled;
+ *stimescaled = t->stimescaled;
}
-
-static inline cputime_t task_gtime(struct task_struct *t)
+#else
+static inline void task_cputime_scaled(struct task_struct *t,
+ cputime_t *utimescaled,
+ cputime_t *stimescaled)
{
- return t->gtime;
+ task_cputime(t, utimescaled, stimescaled);
}
#endif
+
extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st);
/*
* Per process flags
*/
+#define PF_IDLE 0x00000002 /* I am an IDLE thread */
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
@@ -2445,6 +2479,10 @@ static inline void calc_load_enter_idle(void) { }
static inline void calc_load_exit_idle(void) { }
#endif /* CONFIG_NO_HZ_COMMON */
+#ifndef cpu_relax_yield
+#define cpu_relax_yield() cpu_relax()
+#endif
+
/*
* Do not use outside of architecture code which knows its limitations.
*
@@ -2568,6 +2606,7 @@ extern void sched_autogroup_create_attach(struct task_struct *p);
extern void sched_autogroup_detach(struct task_struct *p);
extern void sched_autogroup_fork(struct signal_struct *sig);
extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
#ifdef CONFIG_PROC_FS
extern void proc_sched_autogroup_show_task(struct task_struct *p, struct seq_file *m);
extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
@@ -2577,6 +2616,7 @@ static inline void sched_autogroup_create_attach(struct task_struct *p) { }
static inline void sched_autogroup_detach(struct task_struct *p) { }
static inline void sched_autogroup_fork(struct signal_struct *sig) { }
static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
#endif
extern int yield_to(struct task_struct *p, bool preempt);
@@ -2610,7 +2650,7 @@ extern struct task_struct *idle_task(int cpu);
*/
static inline bool is_idle_task(const struct task_struct *p)
{
- return p->pid == 0;
+ return !!(p->flags & PF_IDLE);
}
extern struct task_struct *curr_task(int cpu);
extern void ia64_set_curr_task(int cpu, struct task_struct *p);
@@ -3507,6 +3547,18 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
#endif /* CONFIG_SMP */
+/*
+ * In order to reduce various lock holder preemption latencies provide an
+ * interface to see if a vCPU is currently running or not.
+ *
+ * This allows us to terminate optimistic spin loops and block, analogous to
+ * the native optimistic spin heuristic of testing if the lock owner task is
+ * running or not.
+ */
+#ifndef vcpu_is_preempted
+# define vcpu_is_preempted(cpu) false
+#endif
+
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
OpenPOWER on IntegriCloud