diff options
-rw-r--r-- | arch/powerpc/platforms/cell/spu_base.c | 72 | ||||
-rw-r--r-- | arch/powerpc/platforms/cell/spufs/sched.c | 198 | ||||
-rw-r--r-- | include/asm-powerpc/spu.h | 11 |
3 files changed, 112 insertions, 169 deletions
diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index 8617b50..9012422 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -409,7 +409,7 @@ static void spu_free_irqs(struct spu *spu) free_irq(spu->irqs[2], spu); } -static void spu_init_channels(struct spu *spu) +void spu_init_channels(struct spu *spu) { static const struct { unsigned channel; @@ -442,66 +442,7 @@ static void spu_init_channels(struct spu *spu) out_be64(&priv2->spu_chnlcnt_RW, count_list[i].count); } } - -struct spu *spu_alloc_spu(struct spu *req_spu) -{ - struct spu *spu, *ret = NULL; - - spin_lock(&spu_lock); - list_for_each_entry(spu, &cbe_spu_info[req_spu->node].free_spus, list) { - if (spu == req_spu) { - list_del_init(&spu->list); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); - spu_init_channels(spu); - ret = spu; - break; - } - } - spin_unlock(&spu_lock); - return ret; -} -EXPORT_SYMBOL_GPL(spu_alloc_spu); - -struct spu *spu_alloc_node(int node) -{ - struct spu *spu = NULL; - - spin_lock(&spu_lock); - if (!list_empty(&cbe_spu_info[node].free_spus)) { - spu = list_entry(cbe_spu_info[node].free_spus.next, struct spu, - list); - list_del_init(&spu->list); - pr_debug("Got SPU %d %d\n", spu->number, spu->node); - } - spin_unlock(&spu_lock); - - if (spu) - spu_init_channels(spu); - return spu; -} -EXPORT_SYMBOL_GPL(spu_alloc_node); - -struct spu *spu_alloc(void) -{ - struct spu *spu = NULL; - int node; - - for (node = 0; node < MAX_NUMNODES; node++) { - spu = spu_alloc_node(node); - if (spu) - break; - } - - return spu; -} - -void spu_free(struct spu *spu) -{ - spin_lock(&spu_lock); - list_add_tail(&spu->list, &cbe_spu_info[spu->node].free_spus); - spin_unlock(&spu_lock); -} -EXPORT_SYMBOL_GPL(spu_free); +EXPORT_SYMBOL_GPL(spu_init_channels); static int spu_shutdown(struct sys_device *sysdev) { @@ -597,6 +538,8 @@ static int __init create_spu(void *data) if (!spu) goto out; + spu->alloc_state = SPU_FREE; + spin_lock_init(&spu->register_lock); spin_lock(&spu_lock); spu->number = number++; @@ -617,11 +560,10 @@ static int __init create_spu(void *data) if (ret) goto out_free_irqs; - spin_lock(&spu_lock); - list_add(&spu->list, &cbe_spu_info[spu->node].free_spus); + mutex_lock(&cbe_spu_info[spu->node].list_mutex); list_add(&spu->cbe_list, &cbe_spu_info[spu->node].spus); cbe_spu_info[spu->node].n_spus++; - spin_unlock(&spu_lock); + mutex_unlock(&cbe_spu_info[spu->node].list_mutex); mutex_lock(&spu_full_list_mutex); spin_lock_irqsave(&spu_full_list_lock, flags); @@ -831,8 +773,8 @@ static int __init init_spu_base(void) int i, ret = 0; for (i = 0; i < MAX_NUMNODES; i++) { + mutex_init(&cbe_spu_info[i].list_mutex); INIT_LIST_HEAD(&cbe_spu_info[i].spus); - INIT_LIST_HEAD(&cbe_spu_info[i].free_spus); } if (!spu_management_ops) diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index 44e2338..227968b 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -51,9 +51,6 @@ struct spu_prio_array { DECLARE_BITMAP(bitmap, MAX_PRIO); struct list_head runq[MAX_PRIO]; spinlock_t runq_lock; - struct list_head active_list[MAX_NUMNODES]; - struct mutex active_mutex[MAX_NUMNODES]; - int nr_active[MAX_NUMNODES]; int nr_waiting; }; @@ -127,7 +124,7 @@ void __spu_update_sched_info(struct spu_context *ctx) ctx->policy = current->policy; /* - * A lot of places that don't hold active_mutex poke into + * A lot of places that don't hold list_mutex poke into * cpus_allowed, including grab_runnable_context which * already holds the runq_lock. So abuse runq_lock * to protect this field aswell. @@ -141,9 +138,9 @@ void spu_update_sched_info(struct spu_context *ctx) { int node = ctx->spu->node; - mutex_lock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); __spu_update_sched_info(ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); } static int __node_allowed(struct spu_context *ctx, int node) @@ -169,39 +166,6 @@ static int node_allowed(struct spu_context *ctx, int node) return rval; } -/** - * spu_add_to_active_list - add spu to active list - * @spu: spu to add to the active list - */ -static void spu_add_to_active_list(struct spu *spu) -{ - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - spu_prio->nr_active[node]++; - list_add_tail(&spu->list, &spu_prio->active_list[node]); - mutex_unlock(&spu_prio->active_mutex[node]); -} - -static void __spu_remove_from_active_list(struct spu *spu) -{ - list_del_init(&spu->list); - spu_prio->nr_active[spu->node]--; -} - -/** - * spu_remove_from_active_list - remove spu from active list - * @spu: spu to remove from the active list - */ -static void spu_remove_from_active_list(struct spu *spu) -{ - int node = spu->node; - - mutex_lock(&spu_prio->active_mutex[node]); - __spu_remove_from_active_list(spu); - mutex_unlock(&spu_prio->active_mutex[node]); -} - static BLOCKING_NOTIFIER_HEAD(spu_switch_notifier); void spu_switch_notify(struct spu *spu, struct spu_context *ctx) @@ -222,15 +186,18 @@ static void notify_spus_active(void) */ for_each_online_node(node) { struct spu *spu; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { - struct spu_context *ctx = spu->ctx; - set_bit(SPU_SCHED_NOTIFY_ACTIVE, &ctx->sched_flags); - mb(); /* make sure any tasks woken up below */ - /* can see the bit(s) set above */ - wake_up_all(&ctx->stop_wq); + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state != SPU_FREE) { + struct spu_context *ctx = spu->ctx; + set_bit(SPU_SCHED_NOTIFY_ACTIVE, + &ctx->sched_flags); + mb(); + wake_up_all(&ctx->stop_wq); + } } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } @@ -293,10 +260,12 @@ static void spu_bind_context(struct spu *spu, struct spu_context *ctx) } /* - * XXX(hch): needs locking. + * Must be used with the list_mutex held. */ static inline int sched_spu(struct spu *spu) { + BUG_ON(!mutex_is_locked(&cbe_spu_info[spu->node].list_mutex)); + return (!spu->ctx || !(spu->ctx->flags & SPU_CREATE_NOSCHED)); } @@ -349,11 +318,15 @@ static struct spu *aff_ref_location(struct spu_context *ctx, int mem_aff, node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; + mutex_lock(&cbe_spu_info[node].list_mutex); list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { if ((!mem_aff || spu->has_mem_affinity) && - sched_spu(spu)) + sched_spu(spu)) { + mutex_unlock(&cbe_spu_info[node].list_mutex); return spu; + } } + mutex_unlock(&cbe_spu_info[node].list_mutex); } return NULL; } @@ -381,13 +354,14 @@ static void aff_set_ref_point_location(struct spu_gang *gang) gang->aff_ref_spu = aff_ref_location(ctx, mem_aff, gs, lowest_offset); } -static struct spu *ctx_location(struct spu *ref, int offset) +static struct spu *ctx_location(struct spu *ref, int offset, int node) { struct spu *spu; spu = NULL; if (offset >= 0) { list_for_each_entry(spu, ref->aff_list.prev, aff_list) { + BUG_ON(spu->node != node); if (offset == 0) break; if (sched_spu(spu)) @@ -395,12 +369,14 @@ static struct spu *ctx_location(struct spu *ref, int offset) } } else { list_for_each_entry_reverse(spu, ref->aff_list.next, aff_list) { + BUG_ON(spu->node != node); if (offset == 0) break; if (sched_spu(spu)) offset++; } } + return spu; } @@ -408,13 +384,13 @@ static struct spu *ctx_location(struct spu *ref, int offset) * affinity_check is called each time a context is going to be scheduled. * It returns the spu ptr on which the context must run. */ -struct spu *affinity_check(struct spu_context *ctx) +static int has_affinity(struct spu_context *ctx) { - struct spu_gang *gang; + struct spu_gang *gang = ctx->gang; if (list_empty(&ctx->aff_list)) - return NULL; - gang = ctx->gang; + return 0; + mutex_lock(&gang->aff_mutex); if (!gang->aff_ref_spu) { if (!(gang->aff_flags & AFF_MERGED)) @@ -424,9 +400,8 @@ struct spu *affinity_check(struct spu_context *ctx) aff_set_ref_point_location(gang); } mutex_unlock(&gang->aff_mutex); - if (!gang->aff_ref_spu) - return NULL; - return ctx_location(gang->aff_ref_spu, ctx->aff_offset); + + return gang->aff_ref_spu != NULL; } /** @@ -535,22 +510,41 @@ static void spu_prio_wait(struct spu_context *ctx) static struct spu *spu_get_idle(struct spu_context *ctx) { - struct spu *spu = NULL; - int node = cpu_to_node(raw_smp_processor_id()); - int n; + struct spu *spu; + int node, n; + + if (has_affinity(ctx)) { + node = ctx->gang->aff_ref_spu->node; - spu = affinity_check(ctx); - if (spu) - return spu_alloc_spu(spu); + mutex_lock(&cbe_spu_info[node].list_mutex); + spu = ctx_location(ctx->gang->aff_ref_spu, ctx->aff_offset, node); + if (spu && spu->alloc_state == SPU_FREE) + goto found; + mutex_unlock(&cbe_spu_info[node].list_mutex); + return NULL; + } + node = cpu_to_node(raw_smp_processor_id()); for (n = 0; n < MAX_NUMNODES; n++, node++) { node = (node < MAX_NUMNODES) ? node : 0; if (!node_allowed(ctx, node)) continue; - spu = spu_alloc_node(node); - if (spu) - break; + + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { + if (spu->alloc_state == SPU_FREE) + goto found; + } + mutex_unlock(&cbe_spu_info[node].list_mutex); } + + return NULL; + + found: + spu->alloc_state = SPU_USED; + mutex_unlock(&cbe_spu_info[node].list_mutex); + pr_debug("Got SPU %d %d\n", spu->number, spu->node); + spu_init_channels(spu); return spu; } @@ -580,15 +574,15 @@ static struct spu *find_victim(struct spu_context *ctx) if (!node_allowed(ctx, node)) continue; - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry(spu, &spu_prio->active_list[node], list) { + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) { struct spu_context *tmp = spu->ctx; if (tmp->prio > ctx->prio && (!victim || tmp->prio > victim->prio)) victim = spu->ctx; } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_unlock(&cbe_spu_info[node].list_mutex); if (victim) { /* @@ -613,7 +607,11 @@ static struct spu *find_victim(struct spu_context *ctx) victim = NULL; goto restart; } - spu_remove_from_active_list(spu); + + mutex_lock(&cbe_spu_info[node].list_mutex); + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + spu_unbind_context(spu, victim); victim->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; @@ -662,8 +660,12 @@ int spu_activate(struct spu_context *ctx, unsigned long flags) if (!spu && rt_prio(ctx->prio)) spu = find_victim(ctx); if (spu) { + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_bind_context(spu, ctx); - spu_add_to_active_list(spu); + cbe_spu_info[node].nr_active++; + mutex_unlock(&cbe_spu_info[node].list_mutex); return 0; } @@ -712,11 +714,17 @@ static int __spu_deactivate(struct spu_context *ctx, int force, int max_prio) if (spu) { new = grab_runnable_context(max_prio, spu->node); if (new || force) { - spu_remove_from_active_list(spu); + int node = spu->node; + + mutex_lock(&cbe_spu_info[node].list_mutex); spu_unbind_context(spu, ctx); + spu->alloc_state = SPU_FREE; + cbe_spu_info[node].nr_active--; + mutex_unlock(&cbe_spu_info[node].list_mutex); + ctx->stats.vol_ctx_switch++; spu->stats.vol_ctx_switch++; - spu_free(spu); + if (new) wake_up(&new->stop_wq); } @@ -755,7 +763,7 @@ void spu_yield(struct spu_context *ctx) } } -static void spusched_tick(struct spu_context *ctx) +static noinline void spusched_tick(struct spu_context *ctx) { if (ctx->flags & SPU_CREATE_NOSCHED) return; @@ -766,7 +774,7 @@ static void spusched_tick(struct spu_context *ctx) return; /* - * Unfortunately active_mutex ranks outside of state_mutex, so + * Unfortunately list_mutex ranks outside of state_mutex, so * we have to trylock here. If we fail give the context another * tick and try again. */ @@ -776,12 +784,11 @@ static void spusched_tick(struct spu_context *ctx) new = grab_runnable_context(ctx->prio + 1, spu->node); if (new) { - - __spu_remove_from_active_list(spu); spu_unbind_context(spu, ctx); ctx->stats.invol_ctx_switch++; spu->stats.invol_ctx_switch++; - spu_free(spu); + spu->alloc_state = SPU_FREE; + cbe_spu_info[spu->node].nr_active--; wake_up(&new->stop_wq); /* * We need to break out of the wait loop in @@ -802,7 +809,7 @@ static void spusched_tick(struct spu_context *ctx) * * Return the number of tasks currently running or waiting to run. * - * Note that we don't take runq_lock / active_mutex here. Reading + * Note that we don't take runq_lock / list_mutex here. Reading * a single 32bit value is atomic on powerpc, and we don't care * about memory ordering issues here. */ @@ -811,7 +818,7 @@ static unsigned long count_active_contexts(void) int nr_active = 0, node; for (node = 0; node < MAX_NUMNODES; node++) - nr_active += spu_prio->nr_active[node]; + nr_active += cbe_spu_info[node].nr_active; nr_active += spu_prio->nr_waiting; return nr_active; @@ -851,19 +858,18 @@ static void spusched_wake(unsigned long data) static int spusched_thread(void *unused) { - struct spu *spu, *next; + struct spu *spu; int node; while (!kthread_should_stop()) { set_current_state(TASK_INTERRUPTIBLE); schedule(); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, next, - &spu_prio->active_list[node], - list) - spusched_tick(spu->ctx); - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->ctx) + spusched_tick(spu->ctx); + mutex_unlock(&cbe_spu_info[node].list_mutex); } } @@ -922,8 +928,8 @@ int __init spu_sched_init(void) __clear_bit(i, spu_prio->bitmap); } for (i = 0; i < MAX_NUMNODES; i++) { - mutex_init(&spu_prio->active_mutex[i]); - INIT_LIST_HEAD(&spu_prio->active_list[i]); + mutex_init(&cbe_spu_info[i].list_mutex); + INIT_LIST_HEAD(&cbe_spu_info[i].spus); } spin_lock_init(&spu_prio->runq_lock); @@ -954,7 +960,7 @@ int __init spu_sched_init(void) void spu_sched_exit(void) { - struct spu *spu, *tmp; + struct spu *spu; int node; remove_proc_entry("spu_loadavg", NULL); @@ -963,13 +969,11 @@ void spu_sched_exit(void) kthread_stop(spusched_task); for (node = 0; node < MAX_NUMNODES; node++) { - mutex_lock(&spu_prio->active_mutex[node]); - list_for_each_entry_safe(spu, tmp, &spu_prio->active_list[node], - list) { - list_del_init(&spu->list); - spu_free(spu); - } - mutex_unlock(&spu_prio->active_mutex[node]); + mutex_lock(&cbe_spu_info[node].list_mutex); + list_for_each_entry(spu, &cbe_spu_info[node].spus, cbe_list) + if (spu->alloc_state != SPU_FREE) + spu->alloc_state = SPU_FREE; + mutex_unlock(&cbe_spu_info[node].list_mutex); } kfree(spu_prio); } diff --git a/include/asm-powerpc/spu.h b/include/asm-powerpc/spu.h index a0f7fc8..8836c0f 100644 --- a/include/asm-powerpc/spu.h +++ b/include/asm-powerpc/spu.h @@ -121,10 +121,9 @@ struct spu { unsigned long problem_phys; struct spu_problem __iomem *problem; struct spu_priv2 __iomem *priv2; - struct list_head list; struct list_head cbe_list; - struct list_head sched_list; struct list_head full_list; + enum { SPU_FREE, SPU_USED } alloc_state; int number; unsigned int irqs[3]; u32 node; @@ -187,18 +186,16 @@ struct spu { }; struct cbe_spu_info { + struct mutex list_mutex; struct list_head spus; - struct list_head free_spus; int n_spus; + int nr_active; atomic_t reserved_spus; }; extern struct cbe_spu_info cbe_spu_info[]; -struct spu *spu_alloc(void); -struct spu *spu_alloc_node(int node); -struct spu *spu_alloc_spu(struct spu *spu); -void spu_free(struct spu *spu); +void spu_init_channels(struct spu *spu); int spu_irq_class_0_bottom(struct spu *spu); int spu_irq_class_1_bottom(struct spu *spu); void spu_irq_setaffinity(struct spu *spu, int cpu); |