summaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c4
-rw-r--r--kernel/sched_fair.c90
2 files changed, 84 insertions, 10 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 34bf8e6..a2d5514 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -256,6 +256,7 @@ struct cfs_bandwidth {
ktime_t period;
u64 quota, runtime;
s64 hierarchal_quota;
+ u64 runtime_expires;
int idle, timer_active;
struct hrtimer period_timer;
@@ -396,6 +397,7 @@ struct cfs_rq {
#endif
#ifdef CONFIG_CFS_BANDWIDTH
int runtime_enabled;
+ u64 runtime_expires;
s64 runtime_remaining;
#endif
#endif
@@ -9166,8 +9168,8 @@ static int tg_set_cfs_bandwidth(struct task_group *tg, u64 period, u64 quota)
raw_spin_lock_irq(&cfs_b->lock);
cfs_b->period = ns_to_ktime(period);
cfs_b->quota = quota;
- cfs_b->runtime = quota;
+ __refill_cfs_bandwidth_runtime(cfs_b);
/* restart the period timer (if active) to handle new period expiry */
if (runtime_enabled && cfs_b->timer_active) {
/* force a reprogram */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index af73a8a..9d1adbd 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1272,11 +1272,30 @@ static inline u64 sched_cfs_bandwidth_slice(void)
return (u64)sysctl_sched_cfs_bandwidth_slice * NSEC_PER_USEC;
}
+/*
+ * Replenish runtime according to assigned quota and update expiration time.
+ * We use sched_clock_cpu directly instead of rq->clock to avoid adding
+ * additional synchronization around rq->lock.
+ *
+ * requires cfs_b->lock
+ */
+static void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
+{
+ u64 now;
+
+ if (cfs_b->quota == RUNTIME_INF)
+ return;
+
+ now = sched_clock_cpu(smp_processor_id());
+ cfs_b->runtime = cfs_b->quota;
+ cfs_b->runtime_expires = now + ktime_to_ns(cfs_b->period);
+}
+
static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
struct task_group *tg = cfs_rq->tg;
struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(tg);
- u64 amount = 0, min_amount;
+ u64 amount = 0, min_amount, expires;
/* note: this is a positive sum as runtime_remaining <= 0 */
min_amount = sched_cfs_bandwidth_slice() - cfs_rq->runtime_remaining;
@@ -1285,9 +1304,16 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
if (cfs_b->quota == RUNTIME_INF)
amount = min_amount;
else {
- /* ensure bandwidth timer remains active under consumption */
- if (!cfs_b->timer_active)
+ /*
+ * If the bandwidth pool has become inactive, then at least one
+ * period must have elapsed since the last consumption.
+ * Refresh the global state and ensure bandwidth timer becomes
+ * active.
+ */
+ if (!cfs_b->timer_active) {
+ __refill_cfs_bandwidth_runtime(cfs_b);
__start_cfs_bandwidth(cfs_b);
+ }
if (cfs_b->runtime > 0) {
amount = min(cfs_b->runtime, min_amount);
@@ -1295,19 +1321,61 @@ static void assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
cfs_b->idle = 0;
}
}
+ expires = cfs_b->runtime_expires;
raw_spin_unlock(&cfs_b->lock);
cfs_rq->runtime_remaining += amount;
+ /*
+ * we may have advanced our local expiration to account for allowed
+ * spread between our sched_clock and the one on which runtime was
+ * issued.
+ */
+ if ((s64)(expires - cfs_rq->runtime_expires) > 0)
+ cfs_rq->runtime_expires = expires;
}
-static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
- unsigned long delta_exec)
+/*
+ * Note: This depends on the synchronization provided by sched_clock and the
+ * fact that rq->clock snapshots this value.
+ */
+static void expire_cfs_rq_runtime(struct cfs_rq *cfs_rq)
{
- if (!cfs_rq->runtime_enabled)
+ struct cfs_bandwidth *cfs_b = tg_cfs_bandwidth(cfs_rq->tg);
+ struct rq *rq = rq_of(cfs_rq);
+
+ /* if the deadline is ahead of our clock, nothing to do */
+ if (likely((s64)(rq->clock - cfs_rq->runtime_expires) < 0))
+ return;
+
+ if (cfs_rq->runtime_remaining < 0)
return;
+ /*
+ * If the local deadline has passed we have to consider the
+ * possibility that our sched_clock is 'fast' and the global deadline
+ * has not truly expired.
+ *
+ * Fortunately we can check determine whether this the case by checking
+ * whether the global deadline has advanced.
+ */
+
+ if ((s64)(cfs_rq->runtime_expires - cfs_b->runtime_expires) >= 0) {
+ /* extend local deadline, drift is bounded above by 2 ticks */
+ cfs_rq->runtime_expires += TICK_NSEC;
+ } else {
+ /* global deadline is ahead, expiration has passed */
+ cfs_rq->runtime_remaining = 0;
+ }
+}
+
+static void __account_cfs_rq_runtime(struct cfs_rq *cfs_rq,
+ unsigned long delta_exec)
+{
+ /* dock delta_exec before expiring quota (as it could span periods) */
cfs_rq->runtime_remaining -= delta_exec;
- if (cfs_rq->runtime_remaining > 0)
+ expire_cfs_rq_runtime(cfs_rq);
+
+ if (likely(cfs_rq->runtime_remaining > 0))
return;
assign_cfs_rq_runtime(cfs_rq);
@@ -1338,7 +1406,12 @@ static int do_sched_cfs_period_timer(struct cfs_bandwidth *cfs_b, int overrun)
goto out_unlock;
idle = cfs_b->idle;
- cfs_b->runtime = cfs_b->quota;
+ /* if we're going inactive then everything else can be deferred */
+ if (idle)
+ goto out_unlock;
+
+ __refill_cfs_bandwidth_runtime(cfs_b);
+
/* mark as potentially idle for the upcoming period */
cfs_b->idle = 1;
@@ -1557,7 +1630,6 @@ static long effective_load(struct task_group *tg, int cpu, long wl, long wg)
return wl;
}
-
#else
static inline unsigned long effective_load(struct task_group *tg, int cpu,
OpenPOWER on IntegriCloud