From 4ef110141b3e0758fe30d686417b5686b87eb25b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Tue, 19 Feb 2008 10:05:35 +1100 Subject: [POWERPC] spufs: fix scheduler starvation by idle contexts 2.6.25 has a regression where we can starve the scheduler by creating (N_SPES+1) contexts, then running them one at a time. The final context will never be run, as the other contexts are loaded on the SPEs, none of which are repoted as free (ie, spu->alloc_state != SPU_FREE), so spu_get_idle() doesn't give us a spu to run on. Because all of the contexts are stopped, none are descheduled by the scheduler tick, as spusched_tick returns if spu_stopped(ctx). This change replaces the spu_stopped() check with checking for SCHED_IDLE in ctx->policy. We set a context's policy to SCHED_IDLE when we're not in spu_run(). We also favour SCHED_IDLE contexts when looking for contexts to unbind, but leave their timeslice intact for later resumption. This patch fixes the following test in the spufs-testsuite: tests/20-scheduler/02-yield-starvation Signed-off-by: Jeremy Kerr --- arch/powerpc/platforms/cell/spufs/run.c | 1 + arch/powerpc/platforms/cell/spufs/sched.c | 8 +++----- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/spufs/run.c b/arch/powerpc/platforms/cell/spufs/run.c index fca22e1..6221968 100644 --- a/arch/powerpc/platforms/cell/spufs/run.c +++ b/arch/powerpc/platforms/cell/spufs/run.c @@ -234,6 +234,7 @@ static int spu_run_fini(struct spu_context *ctx, u32 *npc, *npc = ctx->ops->npc_read(ctx); spuctx_switch_state(ctx, SPU_UTIL_IDLE_LOADED); + ctx->policy = SCHED_IDLE; spu_release(ctx); if (signal_pending(current)) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 5915343..3a59721 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -856,21 +856,18 @@ static noinline void spusched_tick(struct spu_context *ctx) { struct spu_context *new = NULL; struct spu *spu = NULL; - u32 status; if (spu_acquire(ctx)) BUG(); /* a kernel thread never has signals pending */ if (ctx->state != SPU_STATE_RUNNABLE) goto out; - if (spu_stopped(ctx, &status)) - goto out; if (ctx->flags & SPU_CREATE_NOSCHED) goto out; if (ctx->policy == SCHED_FIFO) goto out; - if (--ctx->time_slice) + if (--ctx->time_slice && ctx->policy != SCHED_IDLE) goto out; spu = ctx->spu; @@ -880,7 +877,8 @@ static noinline void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { spu_unschedule(spu, ctx); - spu_add_to_rq(ctx); + if (ctx->policy != SCHED_IDLE) + spu_add_to_rq(ctx); } else { spu_context_nospu_trace(spusched_tick__newslice, ctx); ctx->time_slice++; -- cgit v1.1 From 61b36fc1f7d511132b1dd1422c29c7a8f26d77db Mon Sep 17 00:00:00 2001 From: Andre Detsch Date: Tue, 19 Feb 2008 10:06:15 -0300 Subject: [POWERPC] cell: fix spurious false return from spu_trap_data_{map,seg} At present, the __spufs_trap_data_map and __spu_trap_data_seq functions exit if spu->flags has the SPU_CONTEXT_SWITCH_ACTIVE set. This was resulting in suprious returns from these functions, as they may be legitimately called when we have this bit set. We only use it in these two sanity checks, so this change removes the flag completely. This fixes hangs in the page-fault path of SPE apps. Signed-off-by: Andre Detsch Signed-off-by: Jeremy Kerr --- arch/powerpc/platforms/cell/spu_base.c | 12 ------------ arch/powerpc/platforms/cell/spufs/switch.c | 6 +++--- 2 files changed, 3 insertions(+), 15 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index e45cfa8..87eb07f 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -160,13 +160,6 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) pr_debug("%s\n", __FUNCTION__); - if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) { - /* SLBs are pre-loaded for context switch, so - * we should never get here! - */ - printk("%s: invalid access during switch!\n", __func__); - return 1; - } slb.esid = (ea & ESID_MASK) | SLB_ESID_V; switch(REGION_ID(ea)) { @@ -226,11 +219,6 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) return 0; } - if (test_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags)) { - printk("%s: invalid access during switch!\n", __func__); - return 1; - } - spu->class_0_pending = 0; spu->dar = ea; spu->dsisr = dsisr; diff --git a/arch/powerpc/platforms/cell/spufs/switch.c b/arch/powerpc/platforms/cell/spufs/switch.c index 6063c88..6f5886c 100644 --- a/arch/powerpc/platforms/cell/spufs/switch.c +++ b/arch/powerpc/platforms/cell/spufs/switch.c @@ -720,8 +720,9 @@ static inline void set_switch_active(struct spu_state *csa, struct spu *spu) * Restore, Step 23. * Change the software context switch pending flag * to context switch active. + * + * This implementation does not uses a switch active flag. */ - set_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags); clear_bit(SPU_CONTEXT_SWITCH_PENDING, &spu->flags); mb(); } @@ -1739,9 +1740,8 @@ static inline void reset_switch_active(struct spu_state *csa, struct spu *spu) { /* Restore, Step 74: * Reset the "context switch active" flag. + * Not performed by this implementation. */ - clear_bit(SPU_CONTEXT_SWITCH_ACTIVE, &spu->flags); - mb(); } static inline void reenable_interrupts(struct spu_state *csa, struct spu *spu) -- cgit v1.1