summaryrefslogtreecommitdiffstats
path: root/arch/s390
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390')
-rw-r--r--arch/s390/Kconfig14
-rw-r--r--arch/s390/defconfig1
-rw-r--r--arch/s390/kernel/setup.c55
-rw-r--r--arch/s390/lib/uaccess_pt.c5
-rw-r--r--arch/s390/mm/Makefile2
-rw-r--r--arch/s390/mm/extmem.c106
-rw-r--r--arch/s390/mm/init.c184
-rw-r--r--arch/s390/mm/vmem.c381
8 files changed, 477 insertions, 271 deletions
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index 45e47bf..ff69056 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -241,8 +241,14 @@ config WARN_STACK_SIZE
This allows you to specify the maximum frame size a function may
have without the compiler complaining about it.
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
source "mm/Kconfig"
+config HOLES_IN_ZONE
+ def_bool y
+
comment "I/O subsystem configuration"
config MACHCHK_WARNING
@@ -266,14 +272,6 @@ config QDIO
If unsure, say Y.
-config QDIO_PERF_STATS
- bool "Performance statistics in /proc"
- depends on QDIO
- help
- Say Y here to get performance statistics in /proc/qdio_perf
-
- If unsure, say N.
-
config QDIO_DEBUG
bool "Extended debugging information"
depends on QDIO
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index 7cd51e7..a6ec919 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -134,7 +134,6 @@ CONFIG_RESOURCES_64BIT=y
#
CONFIG_MACHCHK_WARNING=y
CONFIG_QDIO=y
-# CONFIG_QDIO_PERF_STATS is not set
# CONFIG_QDIO_DEBUG is not set
#
diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index b928fec..49ef206 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -64,9 +64,8 @@ unsigned int console_devno = -1;
unsigned int console_irq = -1;
unsigned long machine_flags = 0;
-struct mem_chunk memory_chunk[MEMORY_CHUNKS];
+struct mem_chunk __initdata memory_chunk[MEMORY_CHUNKS];
volatile int __cpu_logical_map[NR_CPUS]; /* logical cpu to cpu address */
-unsigned long __initdata zholes_size[MAX_NR_ZONES];
static unsigned long __initdata memory_end;
/*
@@ -354,21 +353,6 @@ void machine_power_off(void)
*/
void (*pm_power_off)(void) = machine_power_off;
-static void __init
-add_memory_hole(unsigned long start, unsigned long end)
-{
- unsigned long dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
-
- if (end <= dma_pfn)
- zholes_size[ZONE_DMA] += end - start + 1;
- else if (start > dma_pfn)
- zholes_size[ZONE_NORMAL] += end - start + 1;
- else {
- zholes_size[ZONE_DMA] += dma_pfn - start + 1;
- zholes_size[ZONE_NORMAL] += end - dma_pfn;
- }
-}
-
static int __init early_parse_mem(char *p)
{
memory_end = memparse(p, &p);
@@ -521,7 +505,6 @@ setup_memory(void)
{
unsigned long bootmap_size;
unsigned long start_pfn, end_pfn, init_pfn;
- unsigned long last_rw_end;
int i;
/*
@@ -577,39 +560,27 @@ setup_memory(void)
/*
* Register RAM areas with the bootmem allocator.
*/
- last_rw_end = start_pfn;
for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- unsigned long start_chunk, end_chunk;
+ unsigned long start_chunk, end_chunk, pfn;
if (memory_chunk[i].type != CHUNK_READ_WRITE)
continue;
- start_chunk = (memory_chunk[i].addr + PAGE_SIZE - 1);
- start_chunk >>= PAGE_SHIFT;
- end_chunk = (memory_chunk[i].addr + memory_chunk[i].size);
- end_chunk >>= PAGE_SHIFT;
- if (start_chunk < start_pfn)
- start_chunk = start_pfn;
- if (end_chunk > end_pfn)
- end_chunk = end_pfn;
- if (start_chunk < end_chunk) {
- /* Initialize storage key for RAM pages */
- for (init_pfn = start_chunk ; init_pfn < end_chunk;
- init_pfn++)
- page_set_storage_key(init_pfn << PAGE_SHIFT,
- PAGE_DEFAULT_KEY);
- free_bootmem(start_chunk << PAGE_SHIFT,
- (end_chunk - start_chunk) << PAGE_SHIFT);
- if (last_rw_end < start_chunk)
- add_memory_hole(last_rw_end, start_chunk - 1);
- last_rw_end = end_chunk;
- }
+ start_chunk = PFN_DOWN(memory_chunk[i].addr);
+ end_chunk = start_chunk + PFN_DOWN(memory_chunk[i].size) - 1;
+ end_chunk = min(end_chunk, end_pfn);
+ if (start_chunk >= end_chunk)
+ continue;
+ add_active_range(0, start_chunk, end_chunk);
+ pfn = max(start_chunk, start_pfn);
+ for (; pfn <= end_chunk; pfn++)
+ page_set_storage_key(PFN_PHYS(pfn), PAGE_DEFAULT_KEY);
}
psw_set_key(PAGE_DEFAULT_KEY);
- if (last_rw_end < end_pfn - 1)
- add_memory_hole(last_rw_end, end_pfn - 1);
+ free_bootmem_with_active_regions(0, max_pfn);
+ reserve_bootmem(0, PFN_PHYS(start_pfn));
/*
* Reserve the bootmem bitmap itself as well. We do this in two
diff --git a/arch/s390/lib/uaccess_pt.c b/arch/s390/lib/uaccess_pt.c
index 8741bdc..633249c 100644
--- a/arch/s390/lib/uaccess_pt.c
+++ b/arch/s390/lib/uaccess_pt.c
@@ -8,8 +8,8 @@
*/
#include <linux/errno.h>
-#include <asm/uaccess.h>
#include <linux/mm.h>
+#include <asm/uaccess.h>
#include <asm/futex.h>
static inline int __handle_fault(struct mm_struct *mm, unsigned long address,
@@ -60,8 +60,9 @@ out:
out_of_memory:
up_read(&mm->mmap_sem);
- if (current->pid == 1) {
+ if (is_init(current)) {
yield();
+ down_read(&mm->mmap_sem);
goto survive;
}
printk("VM: killing process %s\n", current->comm);
diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
index aa9a42b..8e09db1 100644
--- a/arch/s390/mm/Makefile
+++ b/arch/s390/mm/Makefile
@@ -2,6 +2,6 @@
# Makefile for the linux s390-specific parts of the memory manager.
#
-obj-y := init.o fault.o ioremap.o extmem.o mmap.o
+obj-y := init.o fault.o ioremap.o extmem.o mmap.o vmem.o
obj-$(CONFIG_CMM) += cmm.o
diff --git a/arch/s390/mm/extmem.c b/arch/s390/mm/extmem.c
index 9e9bc484..775bf19 100644
--- a/arch/s390/mm/extmem.c
+++ b/arch/s390/mm/extmem.c
@@ -16,6 +16,7 @@
#include <linux/bootmem.h>
#include <linux/ctype.h>
#include <asm/page.h>
+#include <asm/pgtable.h>
#include <asm/ebcdic.h>
#include <asm/errno.h>
#include <asm/extmem.h>
@@ -238,65 +239,6 @@ query_segment_type (struct dcss_segment *seg)
}
/*
- * check if the given segment collides with guest storage.
- * returns 1 if this is the case, 0 if no collision was found
- */
-static int
-segment_overlaps_storage(struct dcss_segment *seg)
-{
- int i;
-
- for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
- if (memory_chunk[i].type != CHUNK_READ_WRITE)
- continue;
- if ((memory_chunk[i].addr >> 20) > (seg->end >> 20))
- continue;
- if (((memory_chunk[i].addr + memory_chunk[i].size - 1) >> 20)
- < (seg->start_addr >> 20))
- continue;
- return 1;
- }
- return 0;
-}
-
-/*
- * check if segment collides with other segments that are currently loaded
- * returns 1 if this is the case, 0 if no collision was found
- */
-static int
-segment_overlaps_others (struct dcss_segment *seg)
-{
- struct list_head *l;
- struct dcss_segment *tmp;
-
- BUG_ON(!mutex_is_locked(&dcss_lock));
- list_for_each(l, &dcss_list) {
- tmp = list_entry(l, struct dcss_segment, list);
- if ((tmp->start_addr >> 20) > (seg->end >> 20))
- continue;
- if ((tmp->end >> 20) < (seg->start_addr >> 20))
- continue;
- if (seg == tmp)
- continue;
- return 1;
- }
- return 0;
-}
-
-/*
- * check if segment exceeds the kernel mapping range (detected or set via mem=)
- * returns 1 if this is the case, 0 if segment fits into the range
- */
-static inline int
-segment_exceeds_range (struct dcss_segment *seg)
-{
- int seg_last_pfn = (seg->end) >> PAGE_SHIFT;
- if (seg_last_pfn > max_pfn)
- return 1;
- return 0;
-}
-
-/*
* get info about a segment
* possible return values:
* -ENOSYS : we are not running on VM
@@ -341,24 +283,26 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = query_segment_type (seg);
if (rc < 0)
goto out_free;
- if (segment_exceeds_range(seg)) {
- PRINT_WARN ("segment_load: not loading segment %s - exceeds"
- " kernel mapping range\n",name);
- rc = -ERANGE;
+
+ rc = add_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+
+ switch (rc) {
+ case 0:
+ break;
+ case -ENOSPC:
+ PRINT_WARN("segment_load: not loading segment %s - overlaps "
+ "storage/segment\n", name);
goto out_free;
- }
- if (segment_overlaps_storage(seg)) {
- PRINT_WARN ("segment_load: not loading segment %s - overlaps"
- " storage\n",name);
- rc = -ENOSPC;
+ case -ERANGE:
+ PRINT_WARN("segment_load: not loading segment %s - exceeds "
+ "kernel mapping range\n", name);
goto out_free;
- }
- if (segment_overlaps_others(seg)) {
- PRINT_WARN ("segment_load: not loading segment %s - overlaps"
- " other segments\n",name);
- rc = -EBUSY;
+ default:
+ PRINT_WARN("segment_load: not loading segment %s (rc: %d)\n",
+ name, rc);
goto out_free;
}
+
if (do_nonshared)
dcss_command = DCSS_LOADNSR;
else
@@ -372,7 +316,7 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
rc = dcss_diag_translate_rc (seg->end);
dcss_diag(DCSS_PURGESEG, seg->dcss_name,
&seg->start_addr, &seg->end);
- goto out_free;
+ goto out_shared;
}
seg->do_nonshared = do_nonshared;
atomic_set(&seg->ref_count, 1);
@@ -391,6 +335,8 @@ __segment_load (char *name, int do_nonshared, unsigned long *addr, unsigned long
(void*)seg->start_addr, (void*)seg->end,
segtype_string[seg->vm_segtype]);
goto out;
+ out_shared:
+ remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
out_free:
kfree(seg);
out:
@@ -530,12 +476,12 @@ segment_unload(char *name)
"please report to linux390@de.ibm.com\n",name);
goto out_unlock;
}
- if (atomic_dec_return(&seg->ref_count) == 0) {
- list_del(&seg->list);
- dcss_diag(DCSS_PURGESEG, seg->dcss_name,
- &dummy, &dummy);
- kfree(seg);
- }
+ if (atomic_dec_return(&seg->ref_count) != 0)
+ goto out_unlock;
+ remove_shared_memory(seg->start_addr, seg->end - seg->start_addr + 1);
+ list_del(&seg->list);
+ dcss_diag(DCSS_PURGESEG, seg->dcss_name, &dummy, &dummy);
+ kfree(seg);
out_unlock:
mutex_unlock(&dcss_lock);
}
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index e1881c3..4bb21be 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -24,6 +24,7 @@
#include <linux/pagemap.h>
#include <linux/bootmem.h>
#include <linux/pfn.h>
+#include <linux/poison.h>
#include <asm/processor.h>
#include <asm/system.h>
@@ -69,6 +70,8 @@ void show_mem(void)
printk("Free swap: %6ldkB\n", nr_swap_pages<<(PAGE_SHIFT-10));
i = max_mapnr;
while (i-- > 0) {
+ if (!pfn_valid(i))
+ continue;
page = pfn_to_page(i);
total++;
if (PageReserved(page))
@@ -84,150 +87,52 @@ void show_mem(void)
printk("%d pages swap cached\n",cached);
}
-extern unsigned long __initdata zholes_size[];
-/*
- * paging_init() sets up the page tables
- */
-
-#ifndef CONFIG_64BIT
-void __init paging_init(void)
+static void __init setup_ro_region(void)
{
- pgd_t * pg_dir;
- pte_t * pg_table;
- pte_t pte;
- int i;
- unsigned long tmp;
- unsigned long pfn = 0;
- unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
- static const int ssm_mask = 0x04000000L;
- unsigned long ro_start_pfn, ro_end_pfn;
- unsigned long zones_size[MAX_NR_ZONES];
-
- ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
- ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
-
- memset(zones_size, 0, sizeof(zones_size));
- zones_size[ZONE_DMA] = max_low_pfn;
- free_area_init_node(0, &contig_page_data, zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT,
- zholes_size);
-
- /* unmap whole virtual address space */
-
- pg_dir = swapper_pg_dir;
-
- for (i = 0; i < PTRS_PER_PGD; i++)
- pmd_clear((pmd_t *) pg_dir++);
-
- /*
- * map whole physical memory to virtual memory (identity mapping)
- */
-
- pg_dir = swapper_pg_dir;
-
- while (pfn < max_low_pfn) {
- /*
- * pg_table is physical at this point
- */
- pg_table = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
-
- pmd_populate_kernel(&init_mm, (pmd_t *) pg_dir, pg_table);
- pg_dir++;
-
- for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
- if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
- pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
- else
- pte = pfn_pte(pfn, PAGE_KERNEL);
- if (pfn >= max_low_pfn)
- pte_val(pte) = _PAGE_TYPE_EMPTY;
- set_pte(pg_table, pte);
- pfn++;
- }
- }
-
- S390_lowcore.kernel_asce = pgdir_k;
-
- /* enable virtual mapping in kernel mode */
- __ctl_load(pgdir_k, 1, 1);
- __ctl_load(pgdir_k, 7, 7);
- __ctl_load(pgdir_k, 13, 13);
- __raw_local_irq_ssm(ssm_mask);
-
- local_flush_tlb();
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ pte_t new_pte;
+ unsigned long address, end;
+
+ address = ((unsigned long)&__start_rodata) & PAGE_MASK;
+ end = PFN_ALIGN((unsigned long)&__end_rodata);
+
+ for (; address < end; address += PAGE_SIZE) {
+ pgd = pgd_offset_k(address);
+ pmd = pmd_offset(pgd, address);
+ pte = pte_offset_kernel(pmd, address);
+ new_pte = mk_pte_phys(address, __pgprot(_PAGE_RO));
+ set_pte(pte, new_pte);
+ }
}
-#else /* CONFIG_64BIT */
+extern void vmem_map_init(void);
+/*
+ * paging_init() sets up the page tables
+ */
void __init paging_init(void)
{
- pgd_t * pg_dir;
- pmd_t * pm_dir;
- pte_t * pt_dir;
- pte_t pte;
- int i,j,k;
- unsigned long pfn = 0;
- unsigned long pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) |
- _KERN_REGION_TABLE;
+ pgd_t *pg_dir;
+ int i;
+ unsigned long pgdir_k;
static const int ssm_mask = 0x04000000L;
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long dma_pfn, high_pfn;
- unsigned long ro_start_pfn, ro_end_pfn;
-
- memset(zones_size, 0, sizeof(zones_size));
- dma_pfn = MAX_DMA_ADDRESS >> PAGE_SHIFT;
- high_pfn = max_low_pfn;
- ro_start_pfn = PFN_DOWN((unsigned long)&__start_rodata);
- ro_end_pfn = PFN_UP((unsigned long)&__end_rodata);
-
- if (dma_pfn > high_pfn)
- zones_size[ZONE_DMA] = high_pfn;
- else {
- zones_size[ZONE_DMA] = dma_pfn;
- zones_size[ZONE_NORMAL] = high_pfn - dma_pfn;
- }
-
- /* Initialize mem_map[]. */
- free_area_init_node(0, &contig_page_data, zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
+ unsigned long max_zone_pfns[MAX_NR_ZONES];
- /*
- * map whole physical memory to virtual memory (identity mapping)
- */
-
- pg_dir = swapper_pg_dir;
-
- for (i = 0 ; i < PTRS_PER_PGD ; i++,pg_dir++) {
+ pg_dir = swapper_pg_dir;
- if (pfn >= max_low_pfn) {
- pgd_clear(pg_dir);
- continue;
- }
-
- pm_dir = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE * 4);
- pgd_populate(&init_mm, pg_dir, pm_dir);
-
- for (j = 0 ; j < PTRS_PER_PMD ; j++,pm_dir++) {
- if (pfn >= max_low_pfn) {
- pmd_clear(pm_dir);
- continue;
- }
-
- pt_dir = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
- pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
-
- for (k = 0 ; k < PTRS_PER_PTE ; k++,pt_dir++) {
- if (pfn >= ro_start_pfn && pfn < ro_end_pfn)
- pte = pfn_pte(pfn, __pgprot(_PAGE_RO));
- else
- pte = pfn_pte(pfn, PAGE_KERNEL);
- if (pfn >= max_low_pfn)
- pte_val(pte) = _PAGE_TYPE_EMPTY;
- set_pte(pt_dir, pte);
- pfn++;
- }
- }
- }
+#ifdef CONFIG_64BIT
+ pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERN_REGION_TABLE;
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ pgd_clear(pg_dir + i);
+#else
+ pgdir_k = (__pa(swapper_pg_dir) & PAGE_MASK) | _KERNSEG_TABLE;
+ for (i = 0; i < PTRS_PER_PGD; i++)
+ pmd_clear((pmd_t *)(pg_dir + i));
+#endif
+ vmem_map_init();
+ setup_ro_region();
S390_lowcore.kernel_asce = pgdir_k;
@@ -237,9 +142,11 @@ void __init paging_init(void)
__ctl_load(pgdir_k, 13, 13);
__raw_local_irq_ssm(ssm_mask);
- local_flush_tlb();
+ memset(max_zone_pfns, 0, sizeof(max_zone_pfns));
+ max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS);
+ max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
+ free_area_init_nodes(max_zone_pfns);
}
-#endif /* CONFIG_64BIT */
void __init mem_init(void)
{
@@ -269,6 +176,8 @@ void __init mem_init(void)
printk("Write protected kernel read-only data: %#lx - %#lx\n",
(unsigned long)&__start_rodata,
PFN_ALIGN((unsigned long)&__end_rodata) - 1);
+ printk("Virtual memmap size: %ldk\n",
+ (max_pfn * sizeof(struct page)) >> 10);
}
void free_initmem(void)
@@ -279,6 +188,7 @@ void free_initmem(void)
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
ClearPageReserved(virt_to_page(addr));
init_page_count(virt_to_page(addr));
+ memset((void *)addr, POISON_FREE_INITMEM, PAGE_SIZE);
free_page(addr);
totalram_pages++;
}
diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c
new file mode 100644
index 0000000..7f2944d
--- /dev/null
+++ b/arch/s390/mm/vmem.c
@@ -0,0 +1,381 @@
+/*
+ * arch/s390/mm/vmem.c
+ *
+ * Copyright IBM Corp. 2006
+ * Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
+ */
+
+#include <linux/bootmem.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/list.h>
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/setup.h>
+#include <asm/tlbflush.h>
+
+unsigned long vmalloc_end;
+EXPORT_SYMBOL(vmalloc_end);
+
+static struct page *vmem_map;
+static DEFINE_MUTEX(vmem_mutex);
+
+struct memory_segment {
+ struct list_head list;
+ unsigned long start;
+ unsigned long size;
+};
+
+static LIST_HEAD(mem_segs);
+
+void memmap_init(unsigned long size, int nid, unsigned long zone,
+ unsigned long start_pfn)
+{
+ struct page *start, *end;
+ struct page *map_start, *map_end;
+ int i;
+
+ start = pfn_to_page(start_pfn);
+ end = start + size;
+
+ for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+ unsigned long cstart, cend;
+
+ cstart = PFN_DOWN(memory_chunk[i].addr);
+ cend = cstart + PFN_DOWN(memory_chunk[i].size);
+
+ map_start = mem_map + cstart;
+ map_end = mem_map + cend;
+
+ if (map_start < start)
+ map_start = start;
+ if (map_end > end)
+ map_end = end;
+
+ map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
+ / sizeof(struct page);
+ map_end += ((PFN_ALIGN((unsigned long) map_end)
+ - (unsigned long) map_end)
+ / sizeof(struct page));
+
+ if (map_start < map_end)
+ memmap_init_zone((unsigned long)(map_end - map_start),
+ nid, zone, page_to_pfn(map_start));
+ }
+}
+
+static inline void *vmem_alloc_pages(unsigned int order)
+{
+ if (slab_is_available())
+ return (void *)__get_free_pages(GFP_KERNEL, order);
+ return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
+}
+
+static inline pmd_t *vmem_pmd_alloc(void)
+{
+ pmd_t *pmd;
+ int i;
+
+ pmd = vmem_alloc_pages(PMD_ALLOC_ORDER);
+ if (!pmd)
+ return NULL;
+ for (i = 0; i < PTRS_PER_PMD; i++)
+ pmd_clear(pmd + i);
+ return pmd;
+}
+
+static inline pte_t *vmem_pte_alloc(void)
+{
+ pte_t *pte;
+ pte_t empty_pte;
+ int i;
+
+ pte = vmem_alloc_pages(PTE_ALLOC_ORDER);
+ if (!pte)
+ return NULL;
+ pte_val(empty_pte) = _PAGE_TYPE_EMPTY;
+ for (i = 0; i < PTRS_PER_PTE; i++)
+ set_pte(pte + i, empty_pte);
+ return pte;
+}
+
+/*
+ * Add a physical memory range to the 1:1 mapping.
+ */
+static int vmem_add_range(unsigned long start, unsigned long size)
+{
+ unsigned long address;
+ pgd_t *pg_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ pte_t pte;
+ int ret = -ENOMEM;
+
+ for (address = start; address < start + size; address += PAGE_SIZE) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ pm_dir = vmem_pmd_alloc();
+ if (!pm_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pg_dir, address);
+ if (pmd_none(*pm_dir)) {
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ goto out;
+ pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte(pt_dir, pte);
+ }
+ ret = 0;
+out:
+ flush_tlb_kernel_range(start, start + size);
+ return ret;
+}
+
+/*
+ * Remove a physical memory range from the 1:1 mapping.
+ * Currently only invalidates page table entries.
+ */
+static void vmem_remove_range(unsigned long start, unsigned long size)
+{
+ unsigned long address;
+ pgd_t *pg_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ pte_t pte;
+
+ pte_val(pte) = _PAGE_TYPE_EMPTY;
+ for (address = start; address < start + size; address += PAGE_SIZE) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir))
+ continue;
+ pm_dir = pmd_offset(pg_dir, address);
+ if (pmd_none(*pm_dir))
+ continue;
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ set_pte(pt_dir, pte);
+ }
+ flush_tlb_kernel_range(start, start + size);
+}
+
+/*
+ * Add a backed mem_map array to the virtual mem_map array.
+ */
+static int vmem_add_mem_map(unsigned long start, unsigned long size)
+{
+ unsigned long address, start_addr, end_addr;
+ struct page *map_start, *map_end;
+ pgd_t *pg_dir;
+ pmd_t *pm_dir;
+ pte_t *pt_dir;
+ pte_t pte;
+ int ret = -ENOMEM;
+
+ map_start = vmem_map + PFN_DOWN(start);
+ map_end = vmem_map + PFN_DOWN(start + size);
+
+ start_addr = (unsigned long) map_start & PAGE_MASK;
+ end_addr = PFN_ALIGN((unsigned long) map_end);
+
+ for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
+ pg_dir = pgd_offset_k(address);
+ if (pgd_none(*pg_dir)) {
+ pm_dir = vmem_pmd_alloc();
+ if (!pm_dir)
+ goto out;
+ pgd_populate(&init_mm, pg_dir, pm_dir);
+ }
+
+ pm_dir = pmd_offset(pg_dir, address);
+ if (pmd_none(*pm_dir)) {
+ pt_dir = vmem_pte_alloc();
+ if (!pt_dir)
+ goto out;
+ pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
+ }
+
+ pt_dir = pte_offset_kernel(pm_dir, address);
+ if (pte_none(*pt_dir)) {
+ unsigned long new_page;
+
+ new_page =__pa(vmem_alloc_pages(0));
+ if (!new_page)
+ goto out;
+ pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
+ set_pte(pt_dir, pte);
+ }
+ }
+ ret = 0;
+out:
+ flush_tlb_kernel_range(start_addr, end_addr);
+ return ret;
+}
+
+static int vmem_add_mem(unsigned long start, unsigned long size)
+{
+ int ret;
+
+ ret = vmem_add_range(start, size);
+ if (ret)
+ return ret;
+ return vmem_add_mem_map(start, size);
+}
+
+/*
+ * Add memory segment to the segment list if it doesn't overlap with
+ * an already present segment.
+ */
+static int insert_memory_segment(struct memory_segment *seg)
+{
+ struct memory_segment *tmp;
+
+ if (PFN_DOWN(seg->start + seg->size) > max_pfn ||
+ seg->start + seg->size < seg->start)
+ return -ERANGE;
+
+ list_for_each_entry(tmp, &mem_segs, list) {
+ if (seg->start >= tmp->start + tmp->size)
+ continue;
+ if (seg->start + seg->size <= tmp->start)
+ continue;
+ return -ENOSPC;
+ }
+ list_add(&seg->list, &mem_segs);
+ return 0;
+}
+
+/*
+ * Remove memory segment from the segment list.
+ */
+static void remove_memory_segment(struct memory_segment *seg)
+{
+ list_del(&seg->list);
+}
+
+static void __remove_shared_memory(struct memory_segment *seg)
+{
+ remove_memory_segment(seg);
+ vmem_remove_range(seg->start, seg->size);
+}
+
+int remove_shared_memory(unsigned long start, unsigned long size)
+{
+ struct memory_segment *seg;
+ int ret;
+
+ mutex_lock(&vmem_mutex);
+
+ ret = -ENOENT;
+ list_for_each_entry(seg, &mem_segs, list) {
+ if (seg->start == start && seg->size == size)
+ break;
+ }
+
+ if (seg->start != start || seg->size != size)
+ goto out;
+
+ ret = 0;
+ __remove_shared_memory(seg);
+ kfree(seg);
+out:
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
+
+int add_shared_memory(unsigned long start, unsigned long size)
+{
+ struct memory_segment *seg;
+ struct page *page;
+ unsigned long pfn, num_pfn, end_pfn;
+ int ret;
+
+ mutex_lock(&vmem_mutex);
+ ret = -ENOMEM;
+ seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ goto out;
+ seg->start = start;
+ seg->size = size;
+
+ ret = insert_memory_segment(seg);
+ if (ret)
+ goto out_free;
+
+ ret = vmem_add_mem(start, size);
+ if (ret)
+ goto out_remove;
+
+ pfn = PFN_DOWN(start);
+ num_pfn = PFN_DOWN(size);
+ end_pfn = pfn + num_pfn;
+
+ page = pfn_to_page(pfn);
+ memset(page, 0, num_pfn * sizeof(struct page));
+
+ for (; pfn < end_pfn; pfn++) {
+ page = pfn_to_page(pfn);
+ init_page_count(page);
+ reset_page_mapcount(page);
+ SetPageReserved(page);
+ INIT_LIST_HEAD(&page->lru);
+ }
+ goto out;
+
+out_remove:
+ __remove_shared_memory(seg);
+out_free:
+ kfree(seg);
+out:
+ mutex_unlock(&vmem_mutex);
+ return ret;
+}
+
+/*
+ * map whole physical memory to virtual memory (identity mapping)
+ */
+void __init vmem_map_init(void)
+{
+ unsigned long map_size;
+ int i;
+
+ map_size = ALIGN(max_low_pfn, MAX_ORDER_NR_PAGES) * sizeof(struct page);
+ vmalloc_end = PFN_ALIGN(VMALLOC_END_INIT) - PFN_ALIGN(map_size);
+ vmem_map = (struct page *) vmalloc_end;
+ NODE_DATA(0)->node_mem_map = vmem_map;
+
+ for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
+ vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
+}
+
+/*
+ * Convert memory chunk array to a memory segment list so there is a single
+ * list that contains both r/w memory and shared memory segments.
+ */
+static int __init vmem_convert_memory_chunk(void)
+{
+ struct memory_segment *seg;
+ int i;
+
+ mutex_lock(&vmem_mutex);
+ for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
+ if (!memory_chunk[i].size)
+ continue;
+ seg = kzalloc(sizeof(*seg), GFP_KERNEL);
+ if (!seg)
+ panic("Out of memory...\n");
+ seg->start = memory_chunk[i].addr;
+ seg->size = memory_chunk[i].size;
+ insert_memory_segment(seg);
+ }
+ mutex_unlock(&vmem_mutex);
+ return 0;
+}
+
+core_initcall(vmem_convert_memory_chunk);
OpenPOWER on IntegriCloud