summaryrefslogtreecommitdiffstats
path: root/arch/ia64/mm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/ia64/mm')
-rw-r--r--arch/ia64/mm/contig.c99
-rw-r--r--arch/ia64/mm/discontig.c129
-rw-r--r--arch/ia64/mm/init.c6
-rw-r--r--arch/ia64/mm/ioremap.c11
-rw-r--r--arch/ia64/mm/tlb.c56
5 files changed, 245 insertions, 56 deletions
diff --git a/arch/ia64/mm/contig.c b/arch/ia64/mm/contig.c
index 2f724d2..54bf540 100644
--- a/arch/ia64/mm/contig.c
+++ b/arch/ia64/mm/contig.c
@@ -154,38 +154,99 @@ static void *cpu_data;
void * __cpuinit
per_cpu_init (void)
{
- int cpu;
- static int first_time=1;
+ static bool first_time = true;
+ void *cpu0_data = __cpu0_per_cpu;
+ unsigned int cpu;
+
+ if (!first_time)
+ goto skip;
+ first_time = false;
/*
- * get_free_pages() cannot be used before cpu_init() done. BSP
- * allocates "NR_CPUS" pages for all CPUs to avoid that AP calls
- * get_zeroed_page().
+ * get_free_pages() cannot be used before cpu_init() done.
+ * BSP allocates PERCPU_PAGE_SIZE bytes for all possible CPUs
+ * to avoid that AP calls get_zeroed_page().
*/
- if (first_time) {
- void *cpu0_data = __cpu0_per_cpu;
+ for_each_possible_cpu(cpu) {
+ void *src = cpu == 0 ? cpu0_data : __phys_per_cpu_start;
- first_time=0;
+ memcpy(cpu_data, src, __per_cpu_end - __per_cpu_start);
+ __per_cpu_offset[cpu] = (char *)cpu_data - __per_cpu_start;
+ per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
- __per_cpu_offset[0] = (char *) cpu0_data - __per_cpu_start;
- per_cpu(local_per_cpu_offset, 0) = __per_cpu_offset[0];
+ /*
+ * percpu area for cpu0 is moved from the __init area
+ * which is setup by head.S and used till this point.
+ * Update ar.k3. This move is ensures that percpu
+ * area for cpu0 is on the correct node and its
+ * virtual address isn't insanely far from other
+ * percpu areas which is important for congruent
+ * percpu allocator.
+ */
+ if (cpu == 0)
+ ia64_set_kr(IA64_KR_PER_CPU_DATA, __pa(cpu_data) -
+ (unsigned long)__per_cpu_start);
- for (cpu = 1; cpu < NR_CPUS; cpu++) {
- memcpy(cpu_data, __phys_per_cpu_start, __per_cpu_end - __per_cpu_start);
- __per_cpu_offset[cpu] = (char *) cpu_data - __per_cpu_start;
- cpu_data += PERCPU_PAGE_SIZE;
- per_cpu(local_per_cpu_offset, cpu) = __per_cpu_offset[cpu];
- }
+ cpu_data += PERCPU_PAGE_SIZE;
}
+skip:
return __per_cpu_start + __per_cpu_offset[smp_processor_id()];
}
static inline void
alloc_per_cpu_data(void)
{
- cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * NR_CPUS-1,
+ cpu_data = __alloc_bootmem(PERCPU_PAGE_SIZE * num_possible_cpus(),
PERCPU_PAGE_SIZE, __pa(MAX_DMA_ADDRESS));
}
+
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas. All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init
+setup_per_cpu_areas(void)
+{
+ struct pcpu_alloc_info *ai;
+ struct pcpu_group_info *gi;
+ unsigned int cpu;
+ ssize_t static_size, reserved_size, dyn_size;
+ int rc;
+
+ ai = pcpu_alloc_alloc_info(1, num_possible_cpus());
+ if (!ai)
+ panic("failed to allocate pcpu_alloc_info");
+ gi = &ai->groups[0];
+
+ /* units are assigned consecutively to possible cpus */
+ for_each_possible_cpu(cpu)
+ gi->cpu_map[gi->nr_units++] = cpu;
+
+ /* set parameters */
+ static_size = __per_cpu_end - __per_cpu_start;
+ reserved_size = PERCPU_MODULE_RESERVE;
+ dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+ if (dyn_size < 0)
+ panic("percpu area overflow static=%zd reserved=%zd\n",
+ static_size, reserved_size);
+
+ ai->static_size = static_size;
+ ai->reserved_size = reserved_size;
+ ai->dyn_size = dyn_size;
+ ai->unit_size = PERCPU_PAGE_SIZE;
+ ai->atom_size = PAGE_SIZE;
+ ai->alloc_size = PERCPU_PAGE_SIZE;
+
+ rc = pcpu_setup_first_chunk(ai, __per_cpu_start + __per_cpu_offset[0]);
+ if (rc)
+ panic("failed to setup percpu area (err=%d)", rc);
+
+ pcpu_free_alloc_info(ai);
+}
#else
#define alloc_per_cpu_data() do { } while (0)
#endif /* CONFIG_SMP */
@@ -270,8 +331,8 @@ paging_init (void)
map_size = PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
sizeof(struct page));
- vmalloc_end -= map_size;
- vmem_map = (struct page *) vmalloc_end;
+ VMALLOC_END -= map_size;
+ vmem_map = (struct page *) VMALLOC_END;
efi_memmap_walk(create_mem_map_page_table, NULL);
/*
diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c
index d85ba98..19c4b21 100644
--- a/arch/ia64/mm/discontig.c
+++ b/arch/ia64/mm/discontig.c
@@ -143,22 +143,120 @@ static void *per_cpu_node_setup(void *cpu_data, int node)
int cpu;
for_each_possible_early_cpu(cpu) {
- if (cpu == 0) {
- void *cpu0_data = __cpu0_per_cpu;
- __per_cpu_offset[cpu] = (char*)cpu0_data -
- __per_cpu_start;
- } else if (node == node_cpuid[cpu].nid) {
- memcpy(__va(cpu_data), __phys_per_cpu_start,
- __per_cpu_end - __per_cpu_start);
- __per_cpu_offset[cpu] = (char*)__va(cpu_data) -
- __per_cpu_start;
- cpu_data += PERCPU_PAGE_SIZE;
- }
+ void *src = cpu == 0 ? __cpu0_per_cpu : __phys_per_cpu_start;
+
+ if (node != node_cpuid[cpu].nid)
+ continue;
+
+ memcpy(__va(cpu_data), src, __per_cpu_end - __per_cpu_start);
+ __per_cpu_offset[cpu] = (char *)__va(cpu_data) -
+ __per_cpu_start;
+
+ /*
+ * percpu area for cpu0 is moved from the __init area
+ * which is setup by head.S and used till this point.
+ * Update ar.k3. This move is ensures that percpu
+ * area for cpu0 is on the correct node and its
+ * virtual address isn't insanely far from other
+ * percpu areas which is important for congruent
+ * percpu allocator.
+ */
+ if (cpu == 0)
+ ia64_set_kr(IA64_KR_PER_CPU_DATA,
+ (unsigned long)cpu_data -
+ (unsigned long)__per_cpu_start);
+
+ cpu_data += PERCPU_PAGE_SIZE;
}
#endif
return cpu_data;
}
+#ifdef CONFIG_SMP
+/**
+ * setup_per_cpu_areas - setup percpu areas
+ *
+ * Arch code has already allocated and initialized percpu areas. All
+ * this function has to do is to teach the determined layout to the
+ * dynamic percpu allocator, which happens to be more complex than
+ * creating whole new ones using helpers.
+ */
+void __init setup_per_cpu_areas(void)
+{
+ struct pcpu_alloc_info *ai;
+ struct pcpu_group_info *uninitialized_var(gi);
+ unsigned int *cpu_map;
+ void *base;
+ unsigned long base_offset;
+ unsigned int cpu;
+ ssize_t static_size, reserved_size, dyn_size;
+ int node, prev_node, unit, nr_units, rc;
+
+ ai = pcpu_alloc_alloc_info(MAX_NUMNODES, nr_cpu_ids);
+ if (!ai)
+ panic("failed to allocate pcpu_alloc_info");
+ cpu_map = ai->groups[0].cpu_map;
+
+ /* determine base */
+ base = (void *)ULONG_MAX;
+ for_each_possible_cpu(cpu)
+ base = min(base,
+ (void *)(__per_cpu_offset[cpu] + __per_cpu_start));
+ base_offset = (void *)__per_cpu_start - base;
+
+ /* build cpu_map, units are grouped by node */
+ unit = 0;
+ for_each_node(node)
+ for_each_possible_cpu(cpu)
+ if (node == node_cpuid[cpu].nid)
+ cpu_map[unit++] = cpu;
+ nr_units = unit;
+
+ /* set basic parameters */
+ static_size = __per_cpu_end - __per_cpu_start;
+ reserved_size = PERCPU_MODULE_RESERVE;
+ dyn_size = PERCPU_PAGE_SIZE - static_size - reserved_size;
+ if (dyn_size < 0)
+ panic("percpu area overflow static=%zd reserved=%zd\n",
+ static_size, reserved_size);
+
+ ai->static_size = static_size;
+ ai->reserved_size = reserved_size;
+ ai->dyn_size = dyn_size;
+ ai->unit_size = PERCPU_PAGE_SIZE;
+ ai->atom_size = PAGE_SIZE;
+ ai->alloc_size = PERCPU_PAGE_SIZE;
+
+ /*
+ * CPUs are put into groups according to node. Walk cpu_map
+ * and create new groups at node boundaries.
+ */
+ prev_node = -1;
+ ai->nr_groups = 0;
+ for (unit = 0; unit < nr_units; unit++) {
+ cpu = cpu_map[unit];
+ node = node_cpuid[cpu].nid;
+
+ if (node == prev_node) {
+ gi->nr_units++;
+ continue;
+ }
+ prev_node = node;
+
+ gi = &ai->groups[ai->nr_groups++];
+ gi->nr_units = 1;
+ gi->base_offset = __per_cpu_offset[cpu] + base_offset;
+ gi->cpu_map = &cpu_map[unit];
+ }
+
+ rc = pcpu_setup_first_chunk(ai, base);
+ if (rc)
+ panic("failed to setup percpu area (err=%d)", rc);
+
+ pcpu_free_alloc_info(ai);
+}
+#endif
+
/**
* fill_pernode - initialize pernode data.
* @node: the node id.
@@ -352,7 +450,8 @@ static void __init initialize_pernode_data(void)
/* Set the node_data pointer for each per-cpu struct */
for_each_possible_early_cpu(cpu) {
node = node_cpuid[cpu].nid;
- per_cpu(cpu_info, cpu).node_data = mem_data[node].node_data;
+ per_cpu(ia64_cpu_info, cpu).node_data =
+ mem_data[node].node_data;
}
#else
{
@@ -360,7 +459,7 @@ static void __init initialize_pernode_data(void)
cpu = 0;
node = node_cpuid[cpu].nid;
cpu0_cpu_info = (struct cpuinfo_ia64 *)(__phys_per_cpu_start +
- ((char *)&per_cpu__cpu_info - __per_cpu_start));
+ ((char *)&per_cpu__ia64_cpu_info - __per_cpu_start));
cpu0_cpu_info->node_data = mem_data[node].node_data;
}
#endif /* CONFIG_SMP */
@@ -666,9 +765,9 @@ void __init paging_init(void)
sparse_init();
#ifdef CONFIG_VIRTUAL_MEM_MAP
- vmalloc_end -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
+ VMALLOC_END -= PAGE_ALIGN(ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) *
sizeof(struct page));
- vmem_map = (struct page *) vmalloc_end;
+ vmem_map = (struct page *) VMALLOC_END;
efi_memmap_walk(create_mem_map_page_table, NULL);
printk("Virtual mem_map starts at 0x%p\n", vmem_map);
#endif
diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c
index 1857766..7c0d481 100644
--- a/arch/ia64/mm/init.c
+++ b/arch/ia64/mm/init.c
@@ -44,8 +44,8 @@ extern void ia64_tlb_init (void);
unsigned long MAX_DMA_ADDRESS = PAGE_OFFSET + 0x100000000UL;
#ifdef CONFIG_VIRTUAL_MEM_MAP
-unsigned long vmalloc_end = VMALLOC_END_INIT;
-EXPORT_SYMBOL(vmalloc_end);
+unsigned long VMALLOC_END = VMALLOC_END_INIT;
+EXPORT_SYMBOL(VMALLOC_END);
struct page *vmem_map;
EXPORT_SYMBOL(vmem_map);
#endif
@@ -91,7 +91,7 @@ dma_mark_clean(void *addr, size_t size)
inline void
ia64_set_rbs_bot (void)
{
- unsigned long stack_size = current->signal->rlim[RLIMIT_STACK].rlim_max & -16;
+ unsigned long stack_size = rlimit_max(RLIMIT_STACK) & -16;
if (stack_size > MAX_USER_STACK_SIZE)
stack_size = MAX_USER_STACK_SIZE;
diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c
index 2a14062..3dccdd8 100644
--- a/arch/ia64/mm/ioremap.c
+++ b/arch/ia64/mm/ioremap.c
@@ -22,6 +22,12 @@ __ioremap (unsigned long phys_addr)
}
void __iomem *
+early_ioremap (unsigned long phys_addr, unsigned long size)
+{
+ return __ioremap(phys_addr);
+}
+
+void __iomem *
ioremap (unsigned long phys_addr, unsigned long size)
{
void __iomem *addr;
@@ -102,6 +108,11 @@ ioremap_nocache (unsigned long phys_addr, unsigned long size)
EXPORT_SYMBOL(ioremap_nocache);
void
+early_iounmap (volatile void __iomem *addr, unsigned long size)
+{
+}
+
+void
iounmap (volatile void __iomem *addr)
{
if (REGION_NUMBER(addr) == RGN_GATE)
diff --git a/arch/ia64/mm/tlb.c b/arch/ia64/mm/tlb.c
index f426dc7..f3de9d7 100644
--- a/arch/ia64/mm/tlb.c
+++ b/arch/ia64/mm/tlb.c
@@ -48,7 +48,7 @@ DEFINE_PER_CPU(u8, ia64_need_tlb_flush);
DEFINE_PER_CPU(u8, ia64_tr_num); /*Number of TR slots in current processor*/
DEFINE_PER_CPU(u8, ia64_tr_used); /*Max Slot number used by kernel*/
-struct ia64_tr_entry __per_cpu_idtrs[NR_CPUS][2][IA64_TR_ALLOC_MAX];
+struct ia64_tr_entry *ia64_idtrs[NR_CPUS];
/*
* Initializes the ia64_ctx.bitmap array based on max_ctx+1.
@@ -100,24 +100,36 @@ wrap_mmu_context (struct mm_struct *mm)
* this primitive it can be moved up to a spinaphore.h header.
*/
struct spinaphore {
- atomic_t cur;
+ unsigned long ticket;
+ unsigned long serve;
};
static inline void spinaphore_init(struct spinaphore *ss, int val)
{
- atomic_set(&ss->cur, val);
+ ss->ticket = 0;
+ ss->serve = val;
}
static inline void down_spin(struct spinaphore *ss)
{
- while (unlikely(!atomic_add_unless(&ss->cur, -1, 0)))
- while (atomic_read(&ss->cur) == 0)
- cpu_relax();
+ unsigned long t = ia64_fetchadd(1, &ss->ticket, acq), serve;
+
+ if (time_before(t, ss->serve))
+ return;
+
+ ia64_invala();
+
+ for (;;) {
+ asm volatile ("ld4.c.nc %0=[%1]" : "=r"(serve) : "r"(&ss->serve) : "memory");
+ if (time_before(t, serve))
+ return;
+ cpu_relax();
+ }
}
static inline void up_spin(struct spinaphore *ss)
{
- atomic_add(1, &ss->cur);
+ ia64_fetchadd(1, &ss->serve, rel);
}
static struct spinaphore ptcg_sem;
@@ -417,10 +429,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
struct ia64_tr_entry *p;
int cpu = smp_processor_id();
+ if (!ia64_idtrs[cpu]) {
+ ia64_idtrs[cpu] = kmalloc(2 * IA64_TR_ALLOC_MAX *
+ sizeof (struct ia64_tr_entry), GFP_KERNEL);
+ if (!ia64_idtrs[cpu])
+ return -ENOMEM;
+ }
r = -EINVAL;
/*Check overlap with existing TR entries*/
if (target_mask & 0x1) {
- p = &__per_cpu_idtrs[cpu][0][0];
+ p = ia64_idtrs[cpu];
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
i++, p++) {
if (p->pte & 0x1)
@@ -432,7 +450,7 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
}
}
if (target_mask & 0x2) {
- p = &__per_cpu_idtrs[cpu][1][0];
+ p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX;
for (i = IA64_TR_ALLOC_BASE; i <= per_cpu(ia64_tr_used, cpu);
i++, p++) {
if (p->pte & 0x1)
@@ -447,16 +465,16 @@ int ia64_itr_entry(u64 target_mask, u64 va, u64 pte, u64 log_size)
for (i = IA64_TR_ALLOC_BASE; i < per_cpu(ia64_tr_num, cpu); i++) {
switch (target_mask & 0x3) {
case 1:
- if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1))
+ if (!((ia64_idtrs[cpu] + i)->pte & 0x1))
goto found;
continue;
case 2:
- if (!(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+ if (!((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
goto found;
continue;
case 3:
- if (!(__per_cpu_idtrs[cpu][0][i].pte & 0x1) &&
- !(__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+ if (!((ia64_idtrs[cpu] + i)->pte & 0x1) &&
+ !((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
goto found;
continue;
default:
@@ -476,7 +494,7 @@ found:
if (target_mask & 0x1) {
ia64_itr(0x1, i, va, pte, log_size);
ia64_srlz_i();
- p = &__per_cpu_idtrs[cpu][0][i];
+ p = ia64_idtrs[cpu] + i;
p->ifa = va;
p->pte = pte;
p->itir = log_size << 2;
@@ -485,7 +503,7 @@ found:
if (target_mask & 0x2) {
ia64_itr(0x2, i, va, pte, log_size);
ia64_srlz_i();
- p = &__per_cpu_idtrs[cpu][1][i];
+ p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i;
p->ifa = va;
p->pte = pte;
p->itir = log_size << 2;
@@ -516,7 +534,7 @@ void ia64_ptr_entry(u64 target_mask, int slot)
return;
if (target_mask & 0x1) {
- p = &__per_cpu_idtrs[cpu][0][slot];
+ p = ia64_idtrs[cpu] + slot;
if ((p->pte&0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
p->pte = 0;
ia64_ptr(0x1, p->ifa, p->itir>>2);
@@ -525,7 +543,7 @@ void ia64_ptr_entry(u64 target_mask, int slot)
}
if (target_mask & 0x2) {
- p = &__per_cpu_idtrs[cpu][1][slot];
+ p = ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + slot;
if ((p->pte & 0x1) && is_tr_overlap(p, p->ifa, p->itir>>2)) {
p->pte = 0;
ia64_ptr(0x2, p->ifa, p->itir>>2);
@@ -534,8 +552,8 @@ void ia64_ptr_entry(u64 target_mask, int slot)
}
for (i = per_cpu(ia64_tr_used, cpu); i >= IA64_TR_ALLOC_BASE; i--) {
- if ((__per_cpu_idtrs[cpu][0][i].pte & 0x1) ||
- (__per_cpu_idtrs[cpu][1][i].pte & 0x1))
+ if (((ia64_idtrs[cpu] + i)->pte & 0x1) ||
+ ((ia64_idtrs[cpu] + IA64_TR_ALLOC_MAX + i)->pte & 0x1))
break;
}
per_cpu(ia64_tr_used, cpu) = i;
OpenPOWER on IntegriCloud