From dcc17d1baef3721d1574e5b2f4f2d4607514bcff Mon Sep 17 00:00:00 2001 From: Peter Keilty Date: Mon, 31 Oct 2005 16:44:47 -0500 Subject: [IA64] Use bitmaps for efficient context allocation/free Corrects the very inefficent method of finding free context_ids in get_mmu_context(). Instead of walking the task_list of all processes, 2 bitmaps are used to efficently store and lookup state, inuse and needs flushing. The entire rid address space is now used before calling wrap_mmu_context and global tlb flushing. Special thanks to Ken and Rohit for their review and modifications in using a bit flushmap. Signed-off-by: Peter Keilty Signed-off-by: Tony Luck --- arch/ia64/kernel/setup.c | 1 + arch/ia64/mm/tlb.c | 56 +++++++++++++++++++++++------------------------- 2 files changed, 28 insertions(+), 29 deletions(-) (limited to 'arch') diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index fc56ca2..c9388a9 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -454,6 +454,7 @@ setup_arch (char **cmdline_p) #endif cpu_init(); /* initialize the bootstrap CPU */ + mmu_context_init(); /* initialize context_id bitmap */ #ifdef CONFIG_ACPI acpi_boot_init(); diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index c79a9b9..39628fc 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -8,6 +8,8 @@ * Modified RID allocation for SMP * Goutham Rao * IPI based ptc implementation and A-step IPI implementation. + * Rohit Seth + * Ken Chen */ #include #include @@ -16,12 +18,14 @@ #include #include #include +#include #include #include #include #include #include +#include static struct { unsigned long mask; /* mask of supported purge page-sizes */ @@ -31,49 +35,43 @@ static struct { struct ia64_ctx ia64_ctx = { .lock = SPIN_LOCK_UNLOCKED, .next = 1, - .limit = (1 << 15) - 1, /* start out with the safe (architected) limit */ .max_ctx = ~0U }; DEFINE_PER_CPU(u8, ia64_need_tlb_flush); /* + * Initializes the ia64_ctx.bitmap array based on max_ctx+1. + * Called after cpu_init() has setup ia64_ctx.max_ctx based on + * maximum RID that is supported by boot CPU. + */ +void __init +mmu_context_init (void) +{ + ia64_ctx.bitmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); + ia64_ctx.flushmap = alloc_bootmem((ia64_ctx.max_ctx+1)>>3); +} + +/* * Acquire the ia64_ctx.lock before calling this function! */ void wrap_mmu_context (struct mm_struct *mm) { - unsigned long tsk_context, max_ctx = ia64_ctx.max_ctx; - struct task_struct *tsk; int i; + unsigned long flush_bit; - if (ia64_ctx.next > max_ctx) - ia64_ctx.next = 300; /* skip daemons */ - ia64_ctx.limit = max_ctx + 1; - - /* - * Scan all the task's mm->context and set proper safe range - */ - - read_lock(&tasklist_lock); - repeat: - for_each_process(tsk) { - if (!tsk->mm) - continue; - tsk_context = tsk->mm->context; - if (tsk_context == ia64_ctx.next) { - if (++ia64_ctx.next >= ia64_ctx.limit) { - /* empty range: reset the range limit and start over */ - if (ia64_ctx.next > max_ctx) - ia64_ctx.next = 300; - ia64_ctx.limit = max_ctx + 1; - goto repeat; - } - } - if ((tsk_context > ia64_ctx.next) && (tsk_context < ia64_ctx.limit)) - ia64_ctx.limit = tsk_context; + for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { + flush_bit = xchg(&ia64_ctx.flushmap[i], 0); + ia64_ctx.bitmap[i] ^= flush_bit; } - read_unlock(&tasklist_lock); + + /* use offset at 300 to skip daemons */ + ia64_ctx.next = find_next_zero_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, 300); + ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, + ia64_ctx.max_ctx, ia64_ctx.next); + /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ { int cpu = get_cpu(); /* prevent preemption/migration */ -- cgit v1.1 From 58cd90829918dabbd81a453de676d41fb7b628ad Mon Sep 17 00:00:00 2001 From: "Chen, Kenneth W" Date: Sat, 29 Oct 2005 18:47:04 -0700 Subject: [IA64] make mmu_context.h and tlb.c 80-column friendly wrap_mmu_context(), delayed_tlb_flush(), get_mmu_context() all have an extra { } block which cause one extra indentation. get_mmu_context() is particularly bad with 5 indentations to the most inner "if". It finally gets on my nerve that I can't keep the code within 80 columns. Remove the extra { } block and while I'm at it, reformat all the comments to 80-column friendly. No functional change at all with this patch. Signed-off-by: Ken Chen Signed-off-by: Tony Luck --- arch/ia64/mm/tlb.c | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'arch') diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c index 39628fc..41105d4 100644 --- a/arch/ia64/mm/tlb.c +++ b/arch/ia64/mm/tlb.c @@ -29,7 +29,7 @@ static struct { unsigned long mask; /* mask of supported purge page-sizes */ - unsigned long max_bits; /* log2() of largest supported purge page-size */ + unsigned long max_bits; /* log2 of largest supported purge page-size */ } purge; struct ia64_ctx ia64_ctx = { @@ -58,7 +58,7 @@ mmu_context_init (void) void wrap_mmu_context (struct mm_struct *mm) { - int i; + int i, cpu; unsigned long flush_bit; for (i=0; i <= ia64_ctx.max_ctx / BITS_PER_LONG; i++) { @@ -72,20 +72,21 @@ wrap_mmu_context (struct mm_struct *mm) ia64_ctx.limit = find_next_bit(ia64_ctx.bitmap, ia64_ctx.max_ctx, ia64_ctx.next); - /* can't call flush_tlb_all() here because of race condition with O(1) scheduler [EF] */ - { - int cpu = get_cpu(); /* prevent preemption/migration */ - for_each_online_cpu(i) { - if (i != cpu) - per_cpu(ia64_need_tlb_flush, i) = 1; - } - put_cpu(); - } + /* + * can't call flush_tlb_all() here because of race condition + * with O(1) scheduler [EF] + */ + cpu = get_cpu(); /* prevent preemption/migration */ + for_each_online_cpu(i) + if (i != cpu) + per_cpu(ia64_need_tlb_flush, i) = 1; + put_cpu(); local_flush_tlb_all(); } void -ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, unsigned long end, unsigned long nbits) +ia64_global_tlb_purge (struct mm_struct *mm, unsigned long start, + unsigned long end, unsigned long nbits) { static DEFINE_SPINLOCK(ptcg_lock); @@ -133,7 +134,8 @@ local_flush_tlb_all (void) } void -flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long end) +flush_tlb_range (struct vm_area_struct *vma, unsigned long start, + unsigned long end) { struct mm_struct *mm = vma->vm_mm; unsigned long size = end - start; @@ -147,7 +149,8 @@ flush_tlb_range (struct vm_area_struct *vma, unsigned long start, unsigned long #endif nbits = ia64_fls(size + 0xfff); - while (unlikely (((1UL << nbits) & purge.mask) == 0) && (nbits < purge.max_bits)) + while (unlikely (((1UL << nbits) & purge.mask) == 0) && + (nbits < purge.max_bits)) ++nbits; if (nbits > purge.max_bits) nbits = purge.max_bits; @@ -189,5 +192,5 @@ ia64_tlb_init (void) local_cpu_data->ptce_stride[0] = ptce_info.stride[0]; local_cpu_data->ptce_stride[1] = ptce_info.stride[1]; - local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ + local_flush_tlb_all(); /* nuke left overs from bootstrapping... */ } -- cgit v1.1