summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpeter <peter@FreeBSD.org>2003-05-23 05:04:54 +0000
committerpeter <peter@FreeBSD.org>2003-05-23 05:04:54 +0000
commiteea63ec45a461d952c1d8475e776a659c6d0da7f (patch)
tree42ee5a0610b13a0d25a11750527875816da85e00
parent40b279d76093e6ddc4bcd1eecb07a90ccc00467c (diff)
downloadFreeBSD-src-eea63ec45a461d952c1d8475e776a659c6d0da7f.zip
FreeBSD-src-eea63ec45a461d952c1d8475e776a659c6d0da7f.tar.gz
Major pmap rework to take advantage of the larger address space on amd64
systems. Of note: - Implement a direct mapped region using 2MB pages. This eliminates the need for temporary mappings when getting ptes. This supports up to 512GB of physical memory for now. This should be enough for a while. - Implement a 4-tier page table system. Most of the infrastructure is there for 128TB of userland virtual address space, but only 512GB is presently enabled due to a mystery bug somewhere. The design of this was heavily inspired by the alpha pmap.c. - The kernel is moved into the negative address space(!). - The kernel has 2GB of KVM available. - Provide a uma memory allocator to use the direct map region to take advantage of the 2MB TLBs. - Fixed some assumptions in the bus_space macros about the ability to fit virtual addresses in an 'int'. Notable missing things: - pmap_growkernel() should be able to grow to 512GB of KVM by expanding downwards below kernbase. The kernel must be at the top 2GB of the negative address space because of gcc code generation strategies. - need to fix the >512GB user vm code. Approved by: re (blanket)
-rw-r--r--sys/amd64/amd64/genassym.c12
-rw-r--r--sys/amd64/amd64/locore.S9
-rw-r--r--sys/amd64/amd64/machdep.c86
-rw-r--r--sys/amd64/amd64/mem.c3
-rw-r--r--sys/amd64/amd64/nexus.c4
-rw-r--r--sys/amd64/amd64/pmap.c1003
-rw-r--r--sys/amd64/amd64/trap.c2
-rw-r--r--sys/amd64/amd64/vm_machdep.c2
-rw-r--r--sys/amd64/include/bus_amd64.h12
-rw-r--r--sys/amd64/include/param.h12
-rw-r--r--sys/amd64/include/pmap.h79
-rw-r--r--sys/amd64/include/vmparam.h36
-rw-r--r--sys/conf/kern.mk2
13 files changed, 656 insertions, 606 deletions
diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c
index 3ccad23..7f84e3d 100644
--- a/sys/amd64/amd64/genassym.c
+++ b/sys/amd64/amd64/genassym.c
@@ -99,8 +99,11 @@ ASSYM(KSTACK_PAGES, KSTACK_PAGES);
ASSYM(PAGE_SIZE, PAGE_SIZE);
ASSYM(NPTEPG, NPTEPG);
ASSYM(NPDEPG, NPDEPG);
-ASSYM(NPDEPTD, NPDEPTD);
-ASSYM(NPGPTD, NPGPTD);
+ASSYM(addr_PTmap, addr_PTmap);
+ASSYM(addr_PDmap, addr_PDmap);
+ASSYM(addr_PDPmap, addr_PDPmap);
+ASSYM(addr_PML4map, addr_PML4map);
+ASSYM(addr_PML4pml4e, addr_PML4pml4e);
ASSYM(PDESIZE, sizeof(pd_entry_t));
ASSYM(PTESIZE, sizeof(pt_entry_t));
ASSYM(PTESHIFT, PTESHIFT);
@@ -109,9 +112,14 @@ ASSYM(PAGE_MASK, PAGE_MASK);
ASSYM(PDRSHIFT, PDRSHIFT);
ASSYM(PDPSHIFT, PDPSHIFT);
ASSYM(PML4SHIFT, PML4SHIFT);
+ASSYM(val_KPDPI, KPDPI);
+ASSYM(val_KPML4I, KPML4I);
+ASSYM(val_PML4PML4I, PML4PML4I);
ASSYM(USRSTACK, USRSTACK);
ASSYM(VM_MAXUSER_ADDRESS, VM_MAXUSER_ADDRESS);
ASSYM(KERNBASE, KERNBASE);
+ASSYM(DMAP_MIN_ADDRESS, DMAP_MIN_ADDRESS);
+ASSYM(DMAP_MAX_ADDRESS, DMAP_MAX_ADDRESS);
ASSYM(MCLBYTES, MCLBYTES);
ASSYM(PCB_CR3, offsetof(struct pcb, pcb_cr3));
ASSYM(PCB_R15, offsetof(struct pcb, pcb_r15));
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index 04d2ac5..0d2c2cc 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -36,8 +36,15 @@
/*
* Compiled KERNBASE location
*/
- .globl kernbase
+ .globl kernbase,loc_PTmap,loc_PDmap,loc_PDPmap,loc_PML4map,loc_PML4pml4e,dmapbase,dmapend
.set kernbase,KERNBASE
+ .set loc_PTmap,addr_PTmap
+ .set loc_PDmap,addr_PDmap
+ .set loc_PDPmap,addr_PDPmap
+ .set loc_PML4map,addr_PML4map
+ .set loc_PML4pml4e,addr_PML4pml4e
+ .set dmapbase,DMAP_MIN_ADDRESS
+ .set dmapend,DMAP_MAX_ADDRESS
.text
/**********************************************************************
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index c9c4d93..6afe51a 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -133,11 +133,6 @@ u_long atdevbase;
u_int64_t modulep; /* phys addr of metadata table */
u_int64_t physfree; /* first free page after kernel */
-u_int64_t IdlePTD; /* phys addr of kernel PTD */
-u_int64_t IdlePDP; /* phys addr of kernel level 3 */
-u_int64_t IdlePML4; /* phys addr of kernel level 4 */
-struct user *proc0uarea; /* address of proc 0 uarea space */
-vm_offset_t proc0kstack; /* address of proc 0 kstack space */
int cold = 1;
@@ -945,7 +940,7 @@ physmap_done:
physmap[physmap_idx + 1] = ptoa((vm_paddr_t)Maxmem);
/* call pmap initialization to make new kernel address space */
- pmap_bootstrap(first, 0);
+ pmap_bootstrap(&first);
/*
* Size up each available chunk of physical memory.
@@ -1086,69 +1081,6 @@ allocpages(int n)
return (ret);
}
-static void
-create_pagetables(void)
-{
- u_int64_t p0kpa;
- u_int64_t p0upa;
- u_int64_t KPTphys;
- int i;
-
- /* Allocate pages */
- KPTphys = allocpages(NKPT);
- IdlePML4 = allocpages(NKPML4E);
- IdlePDP = allocpages(NKPDPE);
- IdlePTD = allocpages(NPGPTD);
- p0upa = allocpages(UAREA_PAGES);
- p0kpa = allocpages(KSTACK_PAGES);
-
- proc0uarea = (struct user *)(p0upa + KERNBASE);
- proc0kstack = p0kpa + KERNBASE;
-
- /* Fill in the underlying page table pages */
- /* Read-only from zero to physfree */
- /* XXX not fully used, underneath 2M pages */
- for (i = 0; (i << PAGE_SHIFT) < physfree; i++) {
- ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
- ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V;
- }
-
- /* Now map the page tables at their location within PTmap */
- for (i = 0; i < NKPT; i++) {
- ((pd_entry_t *)IdlePTD)[i + KPTDI] = KPTphys + (i << PAGE_SHIFT);
- ((pd_entry_t *)IdlePTD)[i + KPTDI] |= PG_RW | PG_V;
- }
-
- /* Map from zero to end of allocations under 2M pages */
- /* This replaces some of the PTD entries above */
- for (i = 0; (i << PDRSHIFT) < physfree; i++) {
- ((pd_entry_t *)IdlePTD)[i] = i << PDRSHIFT;
- ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V | PG_PS;
- }
-
- /* Now map the page tables at their location within PTmap */
- for (i = 0; i < NKPT; i++) {
- ((pd_entry_t *)IdlePTD)[i] = KPTphys + (i << PAGE_SHIFT);
- ((pd_entry_t *)IdlePTD)[i] |= PG_RW | PG_V;
- }
-
- /* Now map the PTD at the top of the PTmap (ie: PTD[]) */
- for (i = 0; i < NPGPTD; i++) {
- ((pd_entry_t *)IdlePTD)[i + PTDPTDI] = IdlePTD + (i << PAGE_SHIFT);
- ((pd_entry_t *)IdlePTD)[i + PTDPTDI] |= PG_RW | PG_V;
- }
-
- /* And connect up the PTD to the PDP */
- for (i = 0; i < NPGPTD; i++) {
- ((pdp_entry_t *)IdlePDP)[i] = IdlePTD + (i << PAGE_SHIFT);
- ((pdp_entry_t *)IdlePDP)[i] |= PG_RW | PG_V | PG_U;
- }
-
- /* And connect up the PDP to the PML4 */
- ((pdp_entry_t *)IdlePML4)[0] = IdlePDP;
- ((pdp_entry_t *)IdlePML4)[0] |= PG_RW | PG_V | PG_U;
-}
-
void
hammer_time(void)
{
@@ -1157,18 +1089,14 @@ hammer_time(void)
struct region_descriptor r_gdt, r_idt;
struct pcpu *pc;
u_int64_t msr;
+ char *env;
/* Turn on PTE NX (no execute) bit */
msr = rdmsr(MSR_EFER) | EFER_NXE;
wrmsr(MSR_EFER, msr);
- create_pagetables();
- /* XXX do %cr0 as well */
- load_cr4(rcr4() | CR4_PGE | CR4_PSE);
- load_cr3(IdlePML4);
-
- proc0.p_uarea = proc0uarea;
- thread0.td_kstack = proc0kstack;
+ proc0.p_uarea = (struct user *)(allocpages(UAREA_PAGES) + KERNBASE);
+ thread0.td_kstack = allocpages(KSTACK_PAGES) + KERNBASE;
thread0.td_pcb = (struct pcb *)
(thread0.td_kstack + KSTACK_PAGES * PAGE_SIZE) - 1;
atdevbase = ISA_HOLE_START + KERNBASE;
@@ -1310,8 +1238,12 @@ hammer_time(void)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
- thread0.td_pcb->pcb_cr3 = IdlePML4;
+ thread0.td_pcb->pcb_cr3 = KPML4phys;
thread0.td_frame = &proc0_tf;
+
+ env = getenv("kernelname");
+ if (env != NULL)
+ strlcpy(kernelname, env, sizeof(kernelname));
}
void
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index 38a6e13..dd403f7 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
@@ -63,6 +63,7 @@
#include <machine/frame.h>
#include <machine/psl.h>
#include <machine/specialreg.h>
+#include <machine/vmparam.h>
#include <vm/vm.h>
#include <vm/pmap.h>
@@ -177,7 +178,7 @@ mmrw(dev_t dev, struct uio *uio, int flags)
addr = trunc_page(uio->uio_offset);
eaddr = round_page(uio->uio_offset + c);
- if (addr < (vm_offset_t)VADDR(0, 0, PTDPTDI, 0))
+ if (addr < (vm_offset_t)KERNBASE)
return (EFAULT);
for (; addr < eaddr; addr += PAGE_SIZE)
if (pmap_extract(kernel_pmap, addr) == 0)
diff --git a/sys/amd64/amd64/nexus.c b/sys/amd64/amd64/nexus.c
index a3cc266..a854252 100644
--- a/sys/amd64/amd64/nexus.c
+++ b/sys/amd64/amd64/nexus.c
@@ -355,8 +355,8 @@ nexus_activate_resource(device_t bus, device_t child, int type, int rid,
*/
vaddr = (caddr_t)(uintptr_t)(KERNBASE + rman_get_start(r));
} else {
- u_int32_t paddr;
- u_int32_t psize;
+ u_int64_t paddr;
+ u_int64_t psize;
u_int32_t poffs;
paddr = rman_get_start(r);
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 1700077d..d3cdb59 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -125,6 +125,7 @@
#include <vm/vm_pageout.h>
#include <vm/vm_pager.h>
#include <vm/uma.h>
+#include <vm/uma_int.h>
#include <machine/cpu.h>
#include <machine/cputypes.h>
@@ -149,41 +150,35 @@
#endif
/*
- * Get PDEs and PTEs for user/kernel address space
- */
-#define pmap_pde(m, v) (&((m)->pm_pdir[(vm_offset_t)(v) >> PDRSHIFT]))
-#define pdir_pde(m, v) (m[(vm_offset_t)(v) >> PDRSHIFT])
-
-#define pmap_pde_v(pte) ((*(int *)pte & PG_V) != 0)
-#define pmap_pte_w(pte) ((*(int *)pte & PG_W) != 0)
-#define pmap_pte_m(pte) ((*(int *)pte & PG_M) != 0)
-#define pmap_pte_u(pte) ((*(int *)pte & PG_A) != 0)
-#define pmap_pte_v(pte) ((*(int *)pte & PG_V) != 0)
-
-#define pmap_pte_set_w(pte, v) ((v)?(*(int *)pte |= PG_W):(*(int *)pte &= ~PG_W))
-#define pmap_pte_set_prot(pte, v) ((*(int *)pte &= ~PG_PROT), (*(int *)pte |= (v)))
-
-/*
* Given a map and a machine independent protection code,
* convert to a vax protection code.
*/
#define pte_prot(m, p) (protection_codes[p])
-static int protection_codes[8];
+static pt_entry_t protection_codes[8];
struct pmap kernel_pmap_store;
LIST_HEAD(pmaplist, pmap);
static struct pmaplist allpmaps;
static struct mtx allpmaps_lock;
-vm_paddr_t avail_start; /* PA of first available physical page */
-vm_paddr_t avail_end; /* PA of last available physical page */
+vm_paddr_t avail_start; /* PA of first available physical page */
+vm_paddr_t avail_end; /* PA of last available physical page */
vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */
vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */
static boolean_t pmap_initialized = FALSE; /* Has pmap_init completed? */
static int nkpt;
+static int ndmpdp;
vm_offset_t kernel_vm_end;
+static u_int64_t KPTphys; /* phys addr of kernel level 1 */
+static u_int64_t KPDphys; /* phys addr of kernel level 2 */
+static u_int64_t KPDPphys; /* phys addr of kernel level 3 */
+u_int64_t KPML4phys; /* phys addr of kernel level 4 */
+
+static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */
+static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
+
/*
* Data for the pv entry allocation mechanism
*/
@@ -196,10 +191,8 @@ int pmap_pagedaemon_waken;
* All those kernel PT submaps that BSD is so fond of
*/
pt_entry_t *CMAP1 = 0;
-static pt_entry_t *CMAP2, *CMAP3, *ptmmap;
+static pt_entry_t *ptmmap;
caddr_t CADDR1 = 0, ptvmmap = 0;
-static caddr_t CADDR2, CADDR3;
-static struct mtx CMAPCADDR12_lock;
static pt_entry_t *msgbufmap;
struct msgbuf *msgbufp = 0;
@@ -209,12 +202,9 @@ struct msgbuf *msgbufp = 0;
static pt_entry_t *pt_crashdumpmap;
static caddr_t crashdumpmap;
-static pt_entry_t *PMAP1 = 0;
-static pt_entry_t *PADDR1 = 0;
-
static PMAP_INLINE void free_pv_entry(pv_entry_t pv);
static pv_entry_t get_pv_entry(void);
-static void i386_protection_init(void);
+static void amd64_protection_init(void);
static __inline void pmap_changebit(vm_page_t m, int bit, boolean_t setem);
static vm_page_t pmap_enter_quick(pmap_t pmap, vm_offset_t va,
@@ -228,7 +218,7 @@ static void pmap_insert_entry(pmap_t pmap, vm_offset_t va,
static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va);
-static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
+static vm_page_t _pmap_allocpte(pmap_t pmap, vm_pindex_t ptepindex);
static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex);
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
@@ -252,10 +242,195 @@ pmap_kmem_choose(vm_offset_t addr)
return newaddr;
}
+/********************/
+/* Inline functions */
+/********************/
+
+/* Return a non-clipped PD index for a given VA */
+static __inline vm_pindex_t
+pmap_pde_pindex(vm_offset_t va)
+{
+ return va >> PDRSHIFT;
+}
+
+
+/* Return various clipped indexes for a given VA */
+static __inline vm_pindex_t
+pmap_pte_index(vm_offset_t va)
+{
+
+ return ((va >> PAGE_SHIFT) & ((1ul << NPTEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pde_index(vm_offset_t va)
+{
+
+ return ((va >> PDRSHIFT) & ((1ul << NPDEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pdpe_index(vm_offset_t va)
+{
+
+ return ((va >> PDPSHIFT) & ((1ul << NPDPEPGSHIFT) - 1));
+}
+
+static __inline vm_pindex_t
+pmap_pml4e_index(vm_offset_t va)
+{
+
+ return ((va >> PML4SHIFT) & ((1ul << NPML4EPGSHIFT) - 1));
+}
+
+/* Return a pointer to the PML4 slot that corresponds to a VA */
+static __inline pml4_entry_t *
+pmap_pml4e(pmap_t pmap, vm_offset_t va)
+{
+
+ if (!pmap)
+ return NULL;
+ return (&pmap->pm_pml4[pmap_pml4e_index(va)]);
+}
+
+/* Return a pointer to the PDP slot that corresponds to a VA */
+static __inline pdp_entry_t *
+pmap_pdpe(pmap_t pmap, vm_offset_t va)
+{
+ pml4_entry_t *pml4e;
+ pdp_entry_t *pdpe;
+
+ pml4e = pmap_pml4e(pmap, va);
+ if (pml4e == NULL || (*pml4e & PG_V) == 0)
+ return NULL;
+ pdpe = (pdp_entry_t *)PHYS_TO_DMAP(*pml4e & PG_FRAME);
+ return (&pdpe[pmap_pdpe_index(va)]);
+}
+
+/* Return a pointer to the PD slot that corresponds to a VA */
+static __inline pd_entry_t *
+pmap_pde(pmap_t pmap, vm_offset_t va)
+{
+ pdp_entry_t *pdpe;
+ pd_entry_t *pde;
+
+ pdpe = pmap_pdpe(pmap, va);
+ if (pdpe == NULL || (*pdpe & PG_V) == 0)
+ return NULL;
+ pde = (pd_entry_t *)PHYS_TO_DMAP(*pdpe & PG_FRAME);
+ return (&pde[pmap_pde_index(va)]);
+}
+
+/* Return a pointer to the PT slot that corresponds to a VA */
+static __inline pt_entry_t *
+pmap_pte(pmap_t pmap, vm_offset_t va)
+{
+ pd_entry_t *pde;
+ pt_entry_t *pte;
+
+ pde = pmap_pde(pmap, va);
+ if (pde == NULL || (*pde & PG_V) == 0)
+ return NULL;
+ pte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME);
+ return (&pte[pmap_pte_index(va)]);
+}
+
+
+PMAP_INLINE pt_entry_t *
+vtopte(vm_offset_t va)
+{
+ u_int64_t mask = ((1ul << (NPTEPGSHIFT + NPDEPGSHIFT + NPDPEPGSHIFT + NPML4EPGSHIFT)) - 1);
+
+ return (PTmap + (amd64_btop(va) & mask));
+}
+
+static u_int64_t
+allocpages(int n)
+{
+ u_int64_t ret;
+
+ ret = avail_start;
+ bzero((void *)ret, n * PAGE_SIZE);
+ avail_start += n * PAGE_SIZE;
+ return (ret);
+}
+
+static void
+create_pagetables(void)
+{
+ int i;
+
+ /* Allocate pages */
+ KPTphys = allocpages(NKPT);
+ KPML4phys = allocpages(1);
+ KPDPphys = allocpages(NKPML4E);
+ KPDphys = allocpages(NKPDPE);
+
+ ndmpdp = (ptoa(Maxmem) + NBPDP - 1) >> PDPSHIFT;
+ if (ndmpdp < 1)
+ ndmpdp = 1;
+ DMPDPphys = allocpages(NDMPML4E);
+ DMPDphys = allocpages(ndmpdp);
+
+ /* Fill in the underlying page table pages */
+ /* Read-only from zero to physfree */
+ /* XXX not fully used, underneath 2M pages */
+ for (i = 0; (i << PAGE_SHIFT) < avail_start; i++) {
+ ((pt_entry_t *)KPTphys)[i] = i << PAGE_SHIFT;
+ ((pt_entry_t *)KPTphys)[i] |= PG_RW | PG_V;
+ }
+
+ /* Now map the page tables at their location within PTmap */
+ for (i = 0; i < NKPT; i++) {
+ ((pd_entry_t *)KPDphys)[i] = KPTphys + (i << PAGE_SHIFT);
+ ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V;
+ }
+
+#if 0
+ /* Map from zero to end of allocations under 2M pages */
+ /* This replaces some of the KPTphys entries above */
+ for (i = 0; (i << PDRSHIFT) < avail_start; i++) {
+ ((pd_entry_t *)KPDphys)[i] = i << PDRSHIFT;
+ ((pd_entry_t *)KPDphys)[i] |= PG_RW | PG_V | PG_PS;
+ }
+#endif
+
+ /* And connect up the PD to the PDP */
+ for (i = 0; i < NKPDPE; i++) {
+ ((pdp_entry_t *)KPDPphys)[i + KPDPI] = KPDphys + (i << PAGE_SHIFT);
+ ((pdp_entry_t *)KPDPphys)[i + KPDPI] |= PG_RW | PG_V | PG_U;
+ }
+
+
+ /* Now set up the direct map space using 2MB pages */
+ for (i = 0; i < NPDEPG * ndmpdp; i++) {
+ ((pd_entry_t *)DMPDphys)[i] = i << PDRSHIFT;
+ ((pd_entry_t *)DMPDphys)[i] |= PG_RW | PG_V | PG_PS;
+ }
+
+ /* And the direct map space's PDP */
+ for (i = 0; i < ndmpdp; i++) {
+ ((pdp_entry_t *)DMPDPphys)[i] = DMPDphys + (i << PAGE_SHIFT);
+ ((pdp_entry_t *)DMPDPphys)[i] |= PG_RW | PG_V | PG_U;
+ }
+
+ /* And recursively map PML4 to itself in order to get PTmap */
+ ((pdp_entry_t *)KPML4phys)[PML4PML4I] = KPML4phys;
+ ((pdp_entry_t *)KPML4phys)[PML4PML4I] |= PG_RW | PG_V | PG_U;
+
+ /* Connect the Direct Map slot up to the PML4 */
+ ((pdp_entry_t *)KPML4phys)[DMPML4I] = DMPDPphys;
+ ((pdp_entry_t *)KPML4phys)[DMPML4I] |= PG_RW | PG_V | PG_U;
+
+ /* Connect the KVA slot up to the PML4 */
+ ((pdp_entry_t *)KPML4phys)[KPML4I] = KPDPphys;
+ ((pdp_entry_t *)KPML4phys)[KPML4I] |= PG_RW | PG_V | PG_U;
+}
+
/*
* Bootstrap the system enough to run with virtual memory.
*
- * On the i386 this is called after mapping has already been enabled
+ * On amd64 this is called after mapping has already been enabled
* and just syncs the pmap module with what has already been done.
* [We can't call it easily with mapping off since the kernel is not
* mapped with PA == VA, hence we would have to relocate every address
@@ -263,40 +438,39 @@ pmap_kmem_choose(vm_offset_t addr)
* (physical) address starting relative to 0]
*/
void
-pmap_bootstrap(firstaddr, loadaddr)
- vm_paddr_t firstaddr;
- vm_paddr_t loadaddr;
+pmap_bootstrap(firstaddr)
+ vm_paddr_t *firstaddr;
{
vm_offset_t va;
pt_entry_t *pte;
- int i;
- avail_start = firstaddr;
+ avail_start = *firstaddr;
/*
- * XXX The calculation of virtual_avail is wrong. It's NKPT*PAGE_SIZE too
- * large. It should instead be correctly calculated in locore.s and
- * not based on 'first' (which is a physical address, not a virtual
- * address, for the start of unused physical memory). The kernel
- * page tables are NOT double mapped and thus should not be included
- * in this calculation.
+ * Create an initial set of page tables to run the kernel in.
*/
- virtual_avail = (vm_offset_t) KERNBASE + firstaddr;
+ create_pagetables();
+ *firstaddr = avail_start;
+
+ virtual_avail = (vm_offset_t) KERNBASE + avail_start;
virtual_avail = pmap_kmem_choose(virtual_avail);
virtual_end = VM_MAX_KERNEL_ADDRESS;
+
+ /* XXX do %cr0 as well */
+ load_cr4(rcr4() | CR4_PGE | CR4_PSE);
+ load_cr3(KPML4phys);
+
/*
* Initialize protection array.
*/
- i386_protection_init();
+ amd64_protection_init();
/*
* Initialize the kernel pmap (which is statically allocated).
*/
- kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + IdlePTD);
- kernel_pmap->pm_pdp = (pdp_entry_t *) (KERNBASE + IdlePDP);
- kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + IdlePML4);
+ kernel_pmap->pm_pml4 = (pdp_entry_t *) (KERNBASE + KPML4phys);
kernel_pmap->pm_active = -1; /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvlist);
LIST_INIT(&allpmaps);
@@ -316,15 +490,10 @@ pmap_bootstrap(firstaddr, loadaddr)
va = virtual_avail;
pte = vtopte(va);
- /*
- * CMAP1/CMAP2 are used for zeroing and copying pages.
- * CMAP3 is used for the idle process page zeroing.
+ /*
+ * CMAP1 is only used for the memory test.
*/
SYSMAP(caddr_t, CMAP1, CADDR1, 1)
- SYSMAP(caddr_t, CMAP2, CADDR2, 1)
- SYSMAP(caddr_t, CMAP3, CADDR3, 1)
-
- mtx_init(&CMAPCADDR12_lock, "CMAPCADDR12", NULL, MTX_DEF);
/*
* Crashdump maps.
@@ -344,16 +513,9 @@ pmap_bootstrap(firstaddr, loadaddr)
SYSMAP(struct msgbuf *, msgbufmap, msgbufp,
atop(round_page(MSGBUF_SIZE)))
- /*
- * ptemap is used for pmap_pte_quick
- */
- SYSMAP(pt_entry_t *, PMAP1, PADDR1, 1);
-
virtual_avail = va;
- *CMAP1 = *CMAP2 = 0;
- for (i = 0; i < NKPT; i++)
- PTD[i] = 0;
+ *CMAP1 = 0;
invltlb();
}
@@ -365,6 +527,52 @@ pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
return (void *)kmem_alloc(kernel_map, bytes);
}
+void *
+uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+ static vm_pindex_t colour;
+ vm_page_t m;
+ int pflags;
+ void *va;
+
+ *flags = UMA_SLAB_PRIV;
+
+ if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT)
+ pflags = VM_ALLOC_INTERRUPT;
+ else
+ pflags = VM_ALLOC_SYSTEM;
+
+ if (wait & M_ZERO)
+ pflags |= VM_ALLOC_ZERO;
+
+ for (;;) {
+ m = vm_page_alloc(NULL, colour++, pflags | VM_ALLOC_NOOBJ);
+ if (m == NULL) {
+ if (wait & M_NOWAIT)
+ return (NULL);
+ else
+ VM_WAIT;
+ } else
+ break;
+ }
+
+ va = (void *)PHYS_TO_DMAP(m->phys_addr);
+ if ((m->flags & PG_ZERO) == 0)
+ pagezero(va);
+ return (va);
+}
+
+void
+uma_small_free(void *mem, int size, u_int8_t flags)
+{
+ vm_page_t m;
+
+ m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem));
+ vm_page_lock_queues();
+ vm_page_free(m);
+ vm_page_unlock_queues();
+}
+
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
@@ -502,39 +710,7 @@ static __inline int
pmap_is_current(pmap_t pmap)
{
return (pmap == kernel_pmap ||
- (pmap->pm_pdir[PTDPTDI] & PG_FRAME) == (PTDpde[0] & PG_FRAME));
-}
-
-/*
- * Super fast pmap_pte routine best used when scanning
- * the pv lists. This eliminates many coarse-grained
- * invltlb calls. Note that many of the pv list
- * scans are across different pmaps. It is very wasteful
- * to do an entire invltlb for checking a single mapping.
- */
-pt_entry_t *
-pmap_pte_quick(pmap, va)
- register pmap_t pmap;
- vm_offset_t va;
-{
- pd_entry_t newpf;
- pd_entry_t *pde;
-
- pde = pmap_pde(pmap, va);
- if (*pde & PG_PS)
- return (pde);
- if (*pde != 0) {
- /* are we current address space or kernel? */
- if (pmap_is_current(pmap))
- return vtopte(va);
- newpf = *pde & PG_FRAME;
- if (((*PMAP1) & PG_FRAME) != newpf) {
- *PMAP1 = newpf | PG_RW | PG_V;
- pmap_invalidate_page(kernel_pmap, (vm_offset_t)PADDR1);
- }
- return PADDR1 + (amd64_btop(va) & (NPTEPG - 1));
- }
- return (0);
+ (pmap->pm_pml4[PML4PML4I] & PG_FRAME) == (PML4pml4e[0] & PG_FRAME));
}
/*
@@ -550,24 +726,47 @@ pmap_extract(pmap, va)
{
vm_paddr_t rtval;
pt_entry_t *pte;
- pd_entry_t pde;
+ pd_entry_t pde, *pdep;
if (pmap == 0)
return 0;
- pde = pmap->pm_pdir[va >> PDRSHIFT];
- if (pde != 0) {
- if ((pde & PG_PS) != 0) {
- rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+ pdep = pmap_pde(pmap, va);
+ if (pdep) {
+ pde = *pdep;
+ if (pde) {
+ if ((pde & PG_PS) != 0) {
+ rtval = (pde & ~PDRMASK) | (va & PDRMASK);
+ return rtval;
+ }
+ pte = pmap_pte(pmap, va);
+ rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
return rtval;
}
- pte = pmap_pte_quick(pmap, va);
- rtval = ((*pte & PG_FRAME) | (va & PAGE_MASK));
- return rtval;
}
return 0;
}
+vm_paddr_t
+pmap_kextract(vm_offset_t va)
+{
+ pd_entry_t *pde;
+ vm_paddr_t pa;
+
+ if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
+ pa = DMAP_TO_PHYS(va);
+ } else {
+ pde = pmap_pde(kernel_pmap, va);
+ if (*pde & PG_PS) {
+ pa = (*pde & ~(NBPDR - 1)) | (va & (NBPDR - 1));
+ } else {
+ pa = *vtopte(va);
+ pa = (pa & PG_FRAME) | (va & PAGE_MASK);
+ }
+ }
+ return pa;
+}
+
/***************************************************
* Low level mapping routines.....
***************************************************/
@@ -689,7 +888,7 @@ retry:
#endif
/*
- * Create the kernel stack (including pcb for i386) for a new thread.
+ * Create the kernel stack (including pcb for amd64) for a new thread.
* This routine directly affects the fork perf for a process and
* create performance for a thread.
*/
@@ -899,7 +1098,7 @@ pmap_swapin_thread(td)
* drops to zero, then it decrements the wire count.
*/
static int
-_pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+_pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
while (vm_page_sleep_if_busy(m, FALSE, "pmuwpt"))
@@ -910,14 +1109,51 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
/*
* unmap the page table page
*/
- pmap->pm_pdir[m->pindex] = 0;
+ if (m->pindex >= (NUPDE + NUPDPE)) {
+ /* PDP page */
+ pml4_entry_t *pml4;
+ pml4 = pmap_pml4e(pmap, va);
+ pteva = (vm_offset_t) PDPmap + amd64_ptob(m->pindex - (NUPDE + NUPDPE));
+ *pml4 = 0;
+ } else if (m->pindex >= NUPDE) {
+ /* PD page */
+ pdp_entry_t *pdp;
+ pdp = pmap_pdpe(pmap, va);
+ pteva = (vm_offset_t) PDmap + amd64_ptob(m->pindex - NUPDE);
+ *pdp = 0;
+ } else {
+ /* PTE page */
+ pd_entry_t *pd;
+ pd = pmap_pde(pmap, va);
+ pteva = (vm_offset_t) PTmap + amd64_ptob(m->pindex);
+ *pd = 0;
+ }
--pmap->pm_stats.resident_count;
+ if (m->pindex < NUPDE) {
+ /* Unhold the PD page */
+ vm_page_t pdpg;
+ pdpg = vm_page_lookup(pmap->pm_pteobj, NUPDE + pmap_pdpe_index(va));
+ while (vm_page_sleep_if_busy(pdpg, FALSE, "pulook"))
+ vm_page_lock_queues();
+ vm_page_unhold(pdpg);
+ if (pdpg->hold_count == 0)
+ _pmap_unwire_pte_hold(pmap, va, pdpg);
+ }
+ if (m->pindex >= NUPDE && m->pindex < (NUPDE + NUPDPE)) {
+ /* Unhold the PDP page */
+ vm_page_t pdppg;
+ pdppg = vm_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + pmap_pml4e_index(va));
+ while (vm_page_sleep_if_busy(pdppg, FALSE, "pulooK"))
+ vm_page_lock_queues();
+ vm_page_unhold(pdppg);
+ if (pdppg->hold_count == 0)
+ _pmap_unwire_pte_hold(pmap, va, pdppg);
+ }
if (pmap_is_current(pmap)) {
/*
* Do an invltlb to make the invalidated mapping
* take effect immediately.
*/
- pteva = VM_MAXUSER_ADDRESS + amd64_ptob(m->pindex);
pmap_invalidate_page(pmap, pteva);
}
@@ -936,11 +1172,11 @@ _pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
}
static PMAP_INLINE int
-pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
+pmap_unwire_pte_hold(pmap_t pmap, vm_offset_t va, vm_page_t m)
{
vm_page_unhold(m);
if (m->hold_count == 0)
- return _pmap_unwire_pte_hold(pmap, m);
+ return _pmap_unwire_pte_hold(pmap, va, m);
else
return 0;
}
@@ -952,23 +1188,24 @@ pmap_unwire_pte_hold(pmap_t pmap, vm_page_t m)
static int
pmap_unuse_pt(pmap_t pmap, vm_offset_t va, vm_page_t mpte)
{
- unsigned ptepindex;
+ vm_pindex_t ptepindex;
+
if (va >= VM_MAXUSER_ADDRESS)
return 0;
if (mpte == NULL) {
- ptepindex = (va >> PDRSHIFT);
+ ptepindex = pmap_pde_pindex(va);
if (pmap->pm_pteobj->root &&
- (pmap->pm_pteobj->root->pindex == ptepindex)) {
+ pmap->pm_pteobj->root->pindex == ptepindex) {
mpte = pmap->pm_pteobj->root;
} else {
while ((mpte = vm_page_lookup(pmap->pm_pteobj, ptepindex)) != NULL &&
- vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
+ vm_page_sleep_if_busy(mpte, FALSE, "pulook"))
vm_page_lock_queues();
}
}
- return pmap_unwire_pte_hold(pmap, mpte);
+ return pmap_unwire_pte_hold(pmap, va, mpte);
}
void
@@ -976,9 +1213,7 @@ pmap_pinit0(pmap)
struct pmap *pmap;
{
- pmap->pm_pdir = (pd_entry_t *)(KERNBASE + IdlePTD);
- pmap->pm_pdp = (pdp_entry_t *)(KERNBASE + IdlePDP);
- pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + IdlePML4);
+ pmap->pm_pml4 = (pml4_entry_t *)(KERNBASE + KPML4phys);
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -995,90 +1230,39 @@ void
pmap_pinit(pmap)
register struct pmap *pmap;
{
- vm_page_t ptdpg[NPGPTD];
- vm_page_t pdppg;
vm_page_t pml4pg;
- vm_paddr_t pa;
- int i;
-
- /*
- * No need to allocate page table space yet but we do need a valid
- * page directory table.
- */
- if (pmap->pm_pdir == NULL) {
- pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
- NBPTD);
- pmap->pm_pdp = (pdp_entry_t *)kmem_alloc_pageable(kernel_map,
- PAGE_SIZE);
- pmap->pm_pml4 = (pml4_entry_t *)kmem_alloc_pageable(kernel_map,
- PAGE_SIZE);
- }
/*
* allocate object for the ptes
*/
if (pmap->pm_pteobj == NULL)
- pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, PTDPTDI +
- NPGPTD + 2);
+ pmap->pm_pteobj = vm_object_allocate(OBJT_DEFAULT, NUPDE + NUPDPE + NUPML4E + 1);
/*
- * allocate the page directory page(s)
+ * allocate the page directory page
*/
- for (i = 0; i < NPGPTD; i++) {
- ptdpg[i] = vm_page_grab(pmap->pm_pteobj, PTDPTDI + i,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED |
- VM_ALLOC_ZERO);
- vm_page_lock_queues();
- vm_page_flag_clear(ptdpg[i], PG_BUSY);
- ptdpg[i]->valid = VM_PAGE_BITS_ALL;
- vm_page_unlock_queues();
- }
-
- pml4pg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD,
+ pml4pg = vm_page_grab(pmap->pm_pteobj, NUPDE + NUPDPE + NUPML4E,
VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
vm_page_lock_queues();
vm_page_flag_clear(pml4pg, PG_BUSY);
pml4pg->valid = VM_PAGE_BITS_ALL;
vm_page_unlock_queues();
- pdppg = vm_page_grab(pmap->pm_pteobj, PTDPTDI + NPGPTD + 1,
- VM_ALLOC_NORMAL | VM_ALLOC_RETRY | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
- vm_page_lock_queues();
- vm_page_flag_clear(pdppg, PG_BUSY);
- pdppg->valid = VM_PAGE_BITS_ALL;
- vm_page_unlock_queues();
-
- pmap_qenter((vm_offset_t)pmap->pm_pdir, ptdpg, NPGPTD);
- pmap_qenter((vm_offset_t)pmap->pm_pdp, &pdppg, 1);
- pmap_qenter((vm_offset_t)pmap->pm_pml4, &pml4pg, 1);
+ pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(pml4pg));
- for (i = 0; i < NPGPTD; i++) {
- if ((ptdpg[i]->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdir + (i * NPDEPG), PAGE_SIZE);
- }
- if ((pdppg->flags & PG_ZERO) == 0)
- bzero(pmap->pm_pdp, PAGE_SIZE);
if ((pml4pg->flags & PG_ZERO) == 0)
bzero(pmap->pm_pml4, PAGE_SIZE);
mtx_lock_spin(&allpmaps_lock);
LIST_INSERT_HEAD(&allpmaps, pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
+
/* Wire in kernel global address entries. */
- bcopy(PTD + KPTDI, pmap->pm_pdir + KPTDI, nkpt * sizeof(pd_entry_t));
+ pmap->pm_pml4[KPML4I] = KPDPphys | PG_RW | PG_V | PG_U;
+ pmap->pm_pml4[DMPML4I] = DMPDPphys | PG_RW | PG_V | PG_U;
/* install self-referential address mapping entry(s) */
- for (i = 0; i < NPGPTD; i++) {
- pa = VM_PAGE_TO_PHYS(ptdpg[i]);
- pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
- }
- /* Connect ptd pages to pdp */
- for (i = 0; i < NPGPTD; i++) {
- pa = VM_PAGE_TO_PHYS(ptdpg[i]);
- pmap->pm_pdp[i] = pa | PG_RW | PG_V | PG_U;
- }
- /* connect pdp to pml4 */
- pmap->pm_pml4[0] = VM_PAGE_TO_PHYS(pdppg) | PG_RW | PG_V | PG_U;
+ pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
@@ -1105,11 +1289,9 @@ pmap_pinit2(pmap)
static vm_page_t
_pmap_allocpte(pmap, ptepindex)
pmap_t pmap;
- unsigned ptepindex;
+ vm_pindex_t ptepindex;
{
- vm_paddr_t ptepa;
- vm_offset_t pteva;
- vm_page_t m;
+ vm_page_t m, pdppg, pdpg;
/*
* Find or fabricate a new pagetable page
@@ -1133,22 +1315,83 @@ _pmap_allocpte(pmap, ptepindex)
pmap->pm_stats.resident_count++;
- ptepa = VM_PAGE_TO_PHYS(m);
- pmap->pm_pdir[ptepindex] =
- (pd_entry_t) (ptepa | PG_U | PG_RW | PG_V | PG_A | PG_M);
+ if (ptepindex >= (NUPDE + NUPDPE)) {
+ pml4_entry_t *pml4;
+ vm_pindex_t pml4index;
+
+ /* Wire up a new PDPE page */
+ pml4index = ptepindex - (NUPDE + NUPDPE);
+ pml4 = &pmap->pm_pml4[pml4index];
+ *pml4 = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
+
+ } else if (ptepindex >= NUPDE) {
+ vm_pindex_t pml4index;
+ vm_pindex_t pdpindex;
+ pml4_entry_t *pml4;
+ pdp_entry_t *pdp;
+
+ /* Wire up a new PDE page */
+ pdpindex = ptepindex - NUPDE;
+ pml4index = pdpindex >> NPML4EPGSHIFT;
+
+ pml4 = &pmap->pm_pml4[pml4index];
+ if ((*pml4 & PG_V) == 0) {
+ /* Have to allocate a new pdp, recurse */
+ _pmap_allocpte(pmap, NUPDE + NUPDPE + pml4index);
+ } else {
+ /* Add reference to pdp page */
+ pdppg = pmap_page_lookup(pmap->pm_pteobj, NUPDE + NUPDPE + pml4index);
+ pdppg->hold_count++;
+ }
+ pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
+
+ /* Now find the pdp page */
+ pdp = &pdp[pdpindex & ((1ul << NPDPEPGSHIFT) - 1)];
+ *pdp = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
+
+ } else {
+ vm_pindex_t pml4index;
+ vm_pindex_t pdpindex;
+ pml4_entry_t *pml4;
+ pdp_entry_t *pdp;
+ pd_entry_t *pd;
+
+ /* Wire up a new PTE page */
+ pdpindex = ptepindex >> NPDPEPGSHIFT;
+ pml4index = pdpindex >> NPML4EPGSHIFT;
+
+ /* First, find the pdp and check that its valid. */
+ pml4 = &pmap->pm_pml4[pml4index];
+ if ((*pml4 & PG_V) == 0) {
+ /* Have to allocate a new pd, recurse */
+ _pmap_allocpte(pmap, NUPDE + pdpindex);
+ pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
+ pdp = &pdp[pdpindex];
+ } else {
+ pdp = (pdp_entry_t *)PHYS_TO_DMAP(*pml4 & PG_FRAME);
+ pdp = &pdp[pdpindex];
+ if ((*pdp & PG_V) == 0) {
+ /* Have to allocate a new pd, recurse */
+ _pmap_allocpte(pmap, NUPDE + pdpindex);
+ } else {
+ /* Add reference to the pd page */
+ pdpg = pmap_page_lookup(pmap->pm_pteobj, NUPDE + pdpindex);
+ pdpg->hold_count++;
+ }
+ }
+ pd = (pd_entry_t *)PHYS_TO_DMAP(*pdp & PG_FRAME);
+
+ /* Now we know where the page directory page is */
+ pd = &pd[ptepindex & ((1ul << NPDEPGSHIFT) - 1)];
+ *pd = VM_PAGE_TO_PHYS(m) | PG_U | PG_RW | PG_V | PG_A | PG_M;
+ }
/*
* Try to use the new mapping, but if we cannot, then
* do it with the routine that maps the page explicitly.
*/
- if ((m->flags & PG_ZERO) == 0) {
- if (pmap_is_current(pmap)) {
- pteva = VM_MAXUSER_ADDRESS + amd64_ptob(ptepindex);
- bzero((caddr_t) pteva, PAGE_SIZE);
- } else {
- pmap_zero_page(m);
- }
- }
+ if ((m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
vm_page_lock_queues();
m->valid = VM_PAGE_BITS_ALL;
vm_page_flag_clear(m, PG_ZERO);
@@ -1161,27 +1404,27 @@ _pmap_allocpte(pmap, ptepindex)
static vm_page_t
pmap_allocpte(pmap_t pmap, vm_offset_t va)
{
- unsigned ptepindex;
- pd_entry_t ptepa;
+ vm_pindex_t ptepindex;
+ pd_entry_t *pd;
vm_page_t m;
/*
* Calculate pagetable page index
*/
- ptepindex = va >> PDRSHIFT;
+ ptepindex = pmap_pde_pindex(va);
/*
* Get the page directory entry
*/
- ptepa = pmap->pm_pdir[ptepindex];
+ pd = pmap_pde(pmap, va);
/*
* This supports switching from a 2MB page to a
* normal 4K page.
*/
- if (ptepa & PG_PS) {
- pmap->pm_pdir[ptepindex] = 0;
- ptepa = 0;
+ if (pd != 0 && (*pd & (PG_PS | PG_V)) == (PG_PS | PG_V)) {
+ *pd = 0;
+ pd = 0;
pmap_invalidate_all(kernel_pmap);
}
@@ -1189,7 +1432,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va)
* If the page table page is mapped, we just increment the
* hold count, and activate it.
*/
- if (ptepa) {
+ if (pd != 0 && (*pd & PG_V) != 0) {
/*
* In order to get the page table page, try the
* hint first.
@@ -1206,7 +1449,8 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va)
/*
* Here if the pte page isn't mapped, or if it has been deallocated.
*/
- return _pmap_allocpte(pmap, ptepindex);
+ m = _pmap_allocpte(pmap, ptepindex);
+ return m;
}
@@ -1224,7 +1468,6 @@ pmap_release(pmap_t pmap)
{
vm_object_t object;
vm_page_t m;
- int i;
object = pmap->pm_pteobj;
@@ -1239,24 +1482,8 @@ pmap_release(pmap_t pmap)
LIST_REMOVE(pmap, pm_list);
mtx_unlock_spin(&allpmaps_lock);
- bzero(pmap->pm_pdir + PTDPTDI, (nkpt + NPGPTD) *
- sizeof(*pmap->pm_pdir));
-
- pmap_qremove((vm_offset_t)pmap->pm_pdir, NPGPTD);
-
vm_page_lock_queues();
- for (i = 0; i < NPGPTD; i++) {
- m = TAILQ_FIRST(&object->memq);
- KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdp[i] & PG_FRAME),
- ("pmap_release: got wrong ptd page"));
- m->wire_count--;
- atomic_subtract_int(&cnt.v_wire_count, 1);
- vm_page_busy(m);
- vm_page_free_zero(m);
- }
- /* now free pdp and pml4 */
- for (i = 0; i < 2; i++) {
- m = TAILQ_FIRST(&object->memq);
+ while ((m = TAILQ_FIRST(&object->memq)) != NULL) {
m->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_busy(m);
@@ -1293,26 +1520,24 @@ SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
void
pmap_growkernel(vm_offset_t addr)
{
- struct pmap *pmap;
int s;
vm_paddr_t ptppaddr;
vm_page_t nkpg;
pd_entry_t newpdir;
- pt_entry_t *pde;
s = splhigh();
mtx_assert(&kernel_map->system_mtx, MA_OWNED);
if (kernel_vm_end == 0) {
kernel_vm_end = KERNBASE;
nkpt = 0;
- while (pdir_pde(PTD, kernel_vm_end)) {
+ while ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) {
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
nkpt++;
}
}
addr = roundup2(addr, PAGE_SIZE * NPTEPG);
while (kernel_vm_end < addr) {
- if (pdir_pde(PTD, kernel_vm_end)) {
+ if ((*pmap_pde(kernel_pmap, kernel_vm_end) & PG_V) != 0) {
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
continue;
}
@@ -1330,14 +1555,8 @@ pmap_growkernel(vm_offset_t addr)
pmap_zero_page(nkpg);
ptppaddr = VM_PAGE_TO_PHYS(nkpg);
newpdir = (pd_entry_t) (ptppaddr | PG_V | PG_RW | PG_A | PG_M);
- pdir_pde(PTD, kernel_vm_end) = newpdir;
+ *pmap_pde(kernel_pmap, kernel_vm_end) = newpdir;
- mtx_lock_spin(&allpmaps_lock);
- LIST_FOREACH(pmap, &allpmaps, pm_list) {
- pde = pmap_pde(pmap, kernel_vm_end);
- pde_store(pde, newpdir);
- }
- mtx_unlock_spin(&allpmaps_lock);
kernel_vm_end = (kernel_vm_end + PAGE_SIZE * NPTEPG) & ~(PAGE_SIZE * NPTEPG - 1);
}
splx(s);
@@ -1495,7 +1714,8 @@ pmap_remove_page(pmap_t pmap, vm_offset_t va)
{
pt_entry_t *pte;
- if ((pte = pmap_pte_quick(pmap, va)) == NULL || *pte == 0)
+ pte = pmap_pte(pmap, va);
+ if (pte == NULL || (*pte & PG_V) == 0)
return;
pmap_remove_pte(pmap, pte, va);
pmap_invalidate_page(pmap, va);
@@ -1511,7 +1731,7 @@ void
pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
{
vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
+ pd_entry_t ptpaddr, *pde;
pt_entry_t *pte;
int anyvalid;
@@ -1526,26 +1746,30 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
* common operation and easy to short circuit some
* code.
*/
- if ((sva + PAGE_SIZE == eva) &&
- ((pmap->pm_pdir[(sva >> PDRSHIFT)] & PG_PS) == 0)) {
- pmap_remove_page(pmap, sva);
- return;
+ if (sva + PAGE_SIZE == eva) {
+ pde = pmap_pde(pmap, sva);
+ if (pde && (*pde & PG_PS) == 0) {
+ pmap_remove_page(pmap, sva);
+ return;
+ }
}
anyvalid = 0;
for (; sva < eva; sva = pdnxt) {
- unsigned pdirindex;
+
+ if (pmap->pm_stats.resident_count == 0)
+ break;
/*
* Calculate index for next page table.
*/
pdnxt = (sva + NBPDR) & ~PDRMASK;
- if (pmap->pm_stats.resident_count == 0)
- break;
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
+ pde = pmap_pde(pmap, sva);
+ if (pde == 0)
+ continue;
+ ptpaddr = *pde;
/*
* Weed out invalid mappings. Note: we assume that the page
@@ -1558,7 +1782,7 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- pmap->pm_pdir[pdirindex] = 0;
+ *pde = 0;
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
anyvalid = 1;
continue;
@@ -1573,8 +1797,8 @@ pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
pdnxt = eva;
for (; sva != pdnxt; sva += PAGE_SIZE) {
- if ((pte = pmap_pte_quick(pmap, sva)) == NULL ||
- *pte == 0)
+ pte = pmap_pte(pmap, sva);
+ if (pte == NULL || *pte == 0)
continue;
anyvalid = 1;
if (pmap_remove_pte(pmap, pte, sva))
@@ -1619,7 +1843,7 @@ pmap_remove_all(vm_page_t m)
s = splvm();
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pv->pv_pmap->pm_stats.resident_count--;
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pte = pmap_pte(pv->pv_pmap, pv->pv_va);
tpte = pte_load_clear(pte);
if (tpte & PG_W)
pv->pv_pmap->pm_stats.wired_count--;
@@ -1659,7 +1883,7 @@ void
pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
{
vm_offset_t pdnxt;
- pd_entry_t ptpaddr;
+ pd_entry_t ptpaddr, *pde;
int anychanged;
if (pmap == NULL)
@@ -1676,12 +1900,13 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
anychanged = 0;
for (; sva < eva; sva = pdnxt) {
- unsigned pdirindex;
pdnxt = (sva + NBPDR) & ~PDRMASK;
- pdirindex = sva >> PDRSHIFT;
- ptpaddr = pmap->pm_pdir[pdirindex];
+ pde = pmap_pde(pmap, sva);
+ if (pde == NULL)
+ continue;
+ ptpaddr = *pde;
/*
* Weed out invalid mappings. Note: we assume that the page
@@ -1694,7 +1919,7 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
* Check for large page.
*/
if ((ptpaddr & PG_PS) != 0) {
- pmap->pm_pdir[pdirindex] &= ~(PG_M|PG_RW);
+ *pde &= ~(PG_M|PG_RW);
pmap->pm_stats.resident_count -= NBPDR / PAGE_SIZE;
anychanged = 1;
continue;
@@ -1708,7 +1933,8 @@ pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
pt_entry_t *pte;
vm_page_t m;
- if ((pte = pmap_pte_quick(pmap, sva)) == NULL)
+ pte = pmap_pte(pmap, sva);
+ if (pte == NULL)
continue;
pbits = *pte;
if (pbits & PG_MANAGED) {
@@ -1785,21 +2011,19 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
pd_entry_t *pdeaddr = pmap_pde(pmap, va);
origpte = *pdeaddr;
if ((origpte & PG_V) == 0) {
- panic("pmap_enter: invalid kernel page table page, pdir=%p, pde=%p, va=%p\n",
- pmap->pm_pdir[PTDPTDI], origpte, va);
+ panic("pmap_enter: invalid kernel page table page, pde=%p, va=%p\n",
+ origpte, va);
}
}
#endif
- pte = pmap_pte_quick(pmap, va);
+ pte = pmap_pte(pmap, va);
/*
* Page Directory table entry not valid, we need a new PT page
*/
- if (pte == NULL) {
- panic("pmap_enter: invalid page directory pdir=%#lx, va=%#lx\n",
- pmap->pm_pdir[PTDPTDI], va);
- }
+ if (pte == NULL)
+ panic("pmap_enter: invalid page directory va=%#lx\n", va);
pa = VM_PAGE_TO_PHYS(m) & PG_FRAME;
origpte = *pte;
@@ -1937,13 +2161,13 @@ pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_t mpte)
* resident, we are creating it here.
*/
if (va < VM_MAXUSER_ADDRESS) {
- unsigned ptepindex;
- pd_entry_t ptepa;
+ vm_pindex_t ptepindex;
+ pd_entry_t *ptepa;
/*
* Calculate pagetable page index
*/
- ptepindex = va >> PDRSHIFT;
+ ptepindex = pmap_pde_pindex(va);
if (mpte && (mpte->pindex == ptepindex)) {
mpte->hold_count++;
} else {
@@ -1951,14 +2175,14 @@ retry:
/*
* Get the page directory entry
*/
- ptepa = pmap->pm_pdir[ptepindex];
+ ptepa = pmap_pde(pmap, va);
/*
* If the page table page is mapped, we just increment
* the hold count, and activate it.
*/
- if (ptepa) {
- if (ptepa & PG_PS)
+ if (ptepa && (*ptepa & PG_V) != 0) {
+ if (*ptepa & PG_PS)
panic("pmap_enter_quick: unexpected mapping into 2MB page");
if (pmap->pm_pteobj->root &&
(pmap->pm_pteobj->root->pindex == ptepindex)) {
@@ -1980,14 +2204,14 @@ retry:
/*
* This call to vtopte makes the assumption that we are
* entering the page into the current pmap. In order to support
- * quick entry into any pmap, one would likely use pmap_pte_quick.
+ * quick entry into any pmap, one would likely use pmap_pte.
* But that isn't as quick as vtopte.
*/
pte = vtopte(va);
if (*pte) {
if (mpte != NULL) {
vm_page_lock_queues();
- pmap_unwire_pte_hold(pmap, mpte);
+ pmap_unwire_pte_hold(pmap, va, mpte);
vm_page_unlock_queues();
}
return 0;
@@ -2045,7 +2269,7 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
vm_object_t object, vm_pindex_t pindex,
vm_size_t size, int limit)
{
- vm_offset_t tmpidx;
+ vm_pindex_t tmpidx;
int psize;
vm_page_t p, mpte;
@@ -2061,13 +2285,12 @@ pmap_object_init_pt(pmap_t pmap, vm_offset_t addr,
((addr & (NBPDR - 1)) == 0) && ((size & (NBPDR - 1)) == 0)) {
int i;
vm_page_t m[1];
- unsigned int ptepindex;
int npdes;
- pd_entry_t ptepa;
+ pd_entry_t ptepa, *pde;
- if (pmap->pm_pdir[ptepindex = (addr >> PDRSHIFT)])
+ pde = pmap_pde(pmap, addr);
+ if (pde != 0 && (*pde & PG_V) != 0)
return;
-
retry:
p = vm_page_lookup(object, pindex);
if (p != NULL) {
@@ -2103,10 +2326,9 @@ retry:
pmap->pm_stats.resident_count += size >> PAGE_SHIFT;
npdes = size >> PDRSHIFT;
for(i = 0; i < npdes; i++) {
- pde_store(&pmap->pm_pdir[ptepindex],
- ptepa | PG_U | PG_RW | PG_V | PG_PS);
+ pde_store(pde, ptepa | PG_U | PG_RW | PG_V | PG_PS);
ptepa += NBPDR;
- ptepindex += 1;
+ pde++;
}
pmap_invalidate_all(kernel_pmap);
return;
@@ -2166,7 +2388,6 @@ retry:
}
vm_page_unlock_queues();
}
- return;
}
/*
@@ -2198,6 +2419,7 @@ pmap_prefault(pmap, addra, entry)
vm_pindex_t pindex;
vm_page_t m, mpte;
vm_object_t object;
+ pd_entry_t *pde;
if (!curthread || (pmap != vmspace_pmap(curthread->td_proc->p_vmspace)))
return;
@@ -2223,11 +2445,12 @@ pmap_prefault(pmap, addra, entry)
if (addr < starta || addr >= entry->end)
continue;
- if ((*pmap_pde(pmap, addr)) == 0)
+ pde = pmap_pde(pmap, addr);
+ if (pde == NULL || (*pde & PG_V) == 0)
continue;
pte = vtopte(addr);
- if (*pte)
+ if ((*pte & PG_V) == 0)
continue;
pindex = ((addr - entry->start) + entry->offset) >> PAGE_SHIFT;
@@ -2282,18 +2505,18 @@ pmap_change_wiring(pmap, va, wired)
if (pmap == NULL)
return;
- pte = pmap_pte_quick(pmap, va);
-
- if (wired && !pmap_pte_w(pte))
- pmap->pm_stats.wired_count++;
- else if (!wired && pmap_pte_w(pte))
- pmap->pm_stats.wired_count--;
-
/*
* Wiring is not a hardware characteristic so there is no need to
* invalidate TLB.
*/
- pmap_pte_set_w(pte, wired);
+ pte = pmap_pte(pmap, va);
+ if (wired && (*pte & PG_W) == 0) {
+ pmap->pm_stats.wired_count++;
+ *pte |= PG_W;
+ } else if (!wired && (*pte & PG_W) != 0) {
+ pmap->pm_stats.wired_count--;
+ *pte &= ~PG_W;
+ }
}
@@ -2324,8 +2547,8 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
for (addr = src_addr; addr < end_addr; addr = pdnxt) {
pt_entry_t *src_pte, *dst_pte;
vm_page_t dstmpte, srcmpte;
- pd_entry_t srcptepaddr;
- unsigned ptepindex;
+ pd_entry_t srcptepaddr, *pde;
+ vm_pindex_t ptepindex;
if (addr >= UPT_MIN_ADDRESS)
panic("pmap_copy: invalid to pmap_copy page tables\n");
@@ -2340,15 +2563,27 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
break;
pdnxt = (addr + NBPDR) & ~PDRMASK;
- ptepindex = addr >> PDRSHIFT;
+ ptepindex = pmap_pde_pindex(addr);
- srcptepaddr = src_pmap->pm_pdir[ptepindex];
+ pde = pmap_pde(src_pmap, addr);
+ if (pde)
+ srcptepaddr = *pde;
+ else
+ continue;
if (srcptepaddr == 0)
continue;
if (srcptepaddr & PG_PS) {
- if (dst_pmap->pm_pdir[ptepindex] == 0) {
- dst_pmap->pm_pdir[ptepindex] = srcptepaddr;
+ pde = pmap_pde(dst_pmap, addr);
+ if (pde == 0) {
+ /*
+ * XXX should do an allocpte here to
+ * instantiate the pde
+ */
+ continue;
+ }
+ if (*pde == 0) {
+ *pde = srcptepaddr;
dst_pmap->pm_stats.resident_count +=
NBPDR / PAGE_SIZE;
}
@@ -2377,7 +2612,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
* block.
*/
dstmpte = pmap_allocpte(dst_pmap, addr);
- dst_pte = pmap_pte_quick(dst_pmap, addr);
+ dst_pte = pmap_pte(dst_pmap, addr);
if ((*dst_pte == 0) && (ptetemp = *src_pte)) {
/*
* Clear the modified and
@@ -2391,7 +2626,7 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
dstmpte, m);
} else {
vm_page_lock_queues();
- pmap_unwire_pte_hold(dst_pmap, dstmpte);
+ pmap_unwire_pte_hold(dst_pmap, addr, dstmpte);
vm_page_unlock_queues();
}
if (dstmpte->hold_count >= srcmpte->hold_count)
@@ -2410,15 +2645,9 @@ pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
void
pmap_zero_page(vm_page_t m)
{
+ vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
- invlpg((u_long)CADDR2);
- pagezero(CADDR2);
- *CMAP2 = 0;
- mtx_unlock(&CMAPCADDR12_lock);
+ pagezero((void *)va);
}
/*
@@ -2430,18 +2659,12 @@ pmap_zero_page(vm_page_t m)
void
pmap_zero_page_area(vm_page_t m, int off, int size)
{
+ vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP2)
- panic("pmap_zero_page: CMAP2 busy");
- *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
- invlpg((u_long)CADDR2);
if (off == 0 && size == PAGE_SIZE)
- pagezero(CADDR2);
+ pagezero((void *)va);
else
- bzero((char *)CADDR2 + off, size);
- *CMAP2 = 0;
- mtx_unlock(&CMAPCADDR12_lock);
+ bzero((char *)va + off, size);
}
/*
@@ -2453,13 +2676,9 @@ pmap_zero_page_area(vm_page_t m, int off, int size)
void
pmap_zero_page_idle(vm_page_t m)
{
+ vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
- if (*CMAP3)
- panic("pmap_zero_page: CMAP3 busy");
- *CMAP3 = PG_V | PG_RW | VM_PAGE_TO_PHYS(m) | PG_A | PG_M;
- invlpg((u_long)CADDR3);
- pagezero(CADDR3);
- *CMAP3 = 0;
+ pagezero((void *)va);
}
/*
@@ -2469,22 +2688,12 @@ pmap_zero_page_idle(vm_page_t m)
* time.
*/
void
-pmap_copy_page(vm_page_t src, vm_page_t dst)
-{
-
- mtx_lock(&CMAPCADDR12_lock);
- if (*CMAP1)
- panic("pmap_copy_page: CMAP1 busy");
- if (*CMAP2)
- panic("pmap_copy_page: CMAP2 busy");
- *CMAP1 = PG_V | VM_PAGE_TO_PHYS(src) | PG_A;
- *CMAP2 = PG_V | PG_RW | VM_PAGE_TO_PHYS(dst) | PG_A | PG_M;
- invlpg((u_long)CADDR1);
- invlpg((u_long)CADDR2);
- bcopy(CADDR1, CADDR2, PAGE_SIZE);
- *CMAP1 = 0;
- *CMAP2 = 0;
- mtx_unlock(&CMAPCADDR12_lock);
+pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
+{
+ vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
+ vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
+
+ bcopy((void *)src, (void *)dst, PAGE_SIZE);
}
/*
@@ -2558,7 +2767,7 @@ pmap_remove_pages(pmap, sva, eva)
#ifdef PMAP_REMOVE_PAGES_CURPROC_ONLY
pte = vtopte(pv->pv_va);
#else
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pte = pmap_pte(pv->pv_pmap, pv->pv_va);
#endif
tpte = *pte;
@@ -2643,7 +2852,7 @@ pmap_is_modified(vm_page_t m)
continue;
}
#endif
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pte = pmap_pte(pv->pv_pmap, pv->pv_va);
if (*pte & PG_M) {
splx(s);
return TRUE;
@@ -2689,7 +2898,7 @@ pmap_changebit(vm_page_t m, int bit, boolean_t setem)
}
#endif
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pte = pmap_pte(pv->pv_pmap, pv->pv_va);
if (setem) {
*pte |= bit;
@@ -2771,7 +2980,7 @@ pmap_ts_referenced(vm_page_t m)
if (!pmap_track_modified(pv->pv_va))
continue;
- pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
+ pte = pmap_pte(pv->pv_pmap, pv->pv_va);
if (pte && ((v = pte_load(pte)) & PG_A) != 0) {
pte_store(pte, v & ~PG_A);
@@ -2814,26 +3023,32 @@ pmap_clear_reference(vm_page_t m)
*/
static void
-i386_protection_init()
+amd64_protection_init()
{
- register int *kp, prot;
+ register long *kp, prot;
+
+#if 0
+#define PG_NX (1ul << 63)
+#else
+#define PG_NX 0
+#endif
kp = protection_codes;
for (prot = 0; prot < 8; prot++) {
switch (prot) {
case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
- /*
- * Read access is also 0. There isn't any execute bit,
- * so just make it readable.
- */
case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
+ *kp++ = PG_NX;
+ break;
case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
*kp++ = 0;
break;
case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
- case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
+ *kp++ = PG_RW | PG_NX;
+ break;
+ case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
*kp++ = PG_RW;
break;
@@ -2905,7 +3120,7 @@ pmap_mincore(pmap, addr)
vm_page_t m;
int val = 0;
- ptep = pmap_pte_quick(pmap, addr);
+ ptep = pmap_pte(pmap, addr);
if (ptep == 0) {
return 0;
}
@@ -2994,113 +3209,3 @@ pmap_addr_hint(vm_object_t obj, vm_offset_t addr, vm_size_t size)
addr = (addr + (NBPDR - 1)) & ~(NBPDR - 1);
return addr;
}
-
-
-#if defined(PMAP_DEBUG)
-pmap_pid_dump(int pid)
-{
- pmap_t pmap;
- struct proc *p;
- int npte = 0;
- int index;
-
- sx_slock(&allproc_lock);
- LIST_FOREACH(p, &allproc, p_list) {
- if (p->p_pid != pid)
- continue;
-
- if (p->p_vmspace) {
- int i,j;
- index = 0;
- pmap = vmspace_pmap(p->p_vmspace);
- for (i = 0; i < NPDEPTD; i++) {
- pd_entry_t *pde;
- pt_entry_t *pte;
- vm_offset_t base = i << PDRSHIFT;
-
- pde = &pmap->pm_pdir[i];
- if (pde && pmap_pde_v(pde)) {
- for (j = 0; j < NPTEPG; j++) {
- vm_offset_t va = base + (j << PAGE_SHIFT);
- if (va >= (vm_offset_t) VM_MIN_KERNEL_ADDRESS) {
- if (index) {
- index = 0;
- printf("\n");
- }
- sx_sunlock(&allproc_lock);
- return npte;
- }
- pte = pmap_pte_quick(pmap, va);
- if (pte && pmap_pte_v(pte)) {
- pt_entry_t pa;
- vm_page_t m;
- pa = *pte;
- m = PHYS_TO_VM_PAGE(pa);
- printf("va: 0x%x, pt: 0x%x, h: %d, w: %d, f: 0x%x",
- va, pa, m->hold_count, m->wire_count, m->flags);
- npte++;
- index++;
- if (index >= 2) {
- index = 0;
- printf("\n");
- } else {
- printf(" ");
- }
- }
- }
- }
- }
- }
- }
- sx_sunlock(&allproc_lock);
- return npte;
-}
-#endif
-
-#if defined(DEBUG)
-
-static void pads(pmap_t pm);
-void pmap_pvdump(vm_offset_t pa);
-
-/* print address space of pmap*/
-static void
-pads(pm)
- pmap_t pm;
-{
- int i, j;
- vm_paddr_t va;
- pt_entry_t *ptep;
-
- if (pm == kernel_pmap)
- return;
- for (i = 0; i < NPDEPTD; i++)
- if (pm->pm_pdir[i])
- for (j = 0; j < NPTEPG; j++) {
- va = (i << PDRSHIFT) + (j << PAGE_SHIFT);
- if (pm == kernel_pmap && va < KERNBASE)
- continue;
- if (pm != kernel_pmap && va > UPT_MAX_ADDRESS)
- continue;
- ptep = pmap_pte_quick(pm, va);
- if (pmap_pte_v(ptep))
- printf("%x:%x ", va, *ptep);
- };
-
-}
-
-void
-pmap_pvdump(pa)
- vm_paddr_t pa;
-{
- pv_entry_t pv;
- vm_page_t m;
-
- printf("pa %x", pa);
- m = PHYS_TO_VM_PAGE(pa);
- TAILQ_FOREACH(pv, &m->md.pv_list, pv_list) {
- printf(" -> pmap %p, va %x", (void *)pv->pv_pmap, pv->pv_va);
- pads(pv->pv_pmap);
- }
- printf(" ");
-}
-#endif
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index bc69e42..72ba590 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -212,7 +212,7 @@ trap(frame)
* kernel can print out a useful trap message and even get
* to the debugger.
*/
- if (td->td_critnest == 0)
+ if (td->td_critnest != 0)
trap_fatal(&frame, frame.tf_addr);
}
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 0b7f510..f978290 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -264,7 +264,7 @@ cpu_reset_real()
printf("Keyboard reset did not work, attempting CPU shutdown\n");
DELAY(1000000); /* wait 1 sec for printf to complete */
/* force a shutdown by unmapping entire address space ! */
- bzero((caddr_t)PTD, NBPTD);
+ bzero((caddr_t)PML4map, PAGE_SIZE);
/* "good night, sweet prince .... <THUNK!>" */
invltlb();
diff --git a/sys/amd64/include/bus_amd64.h b/sys/amd64/include/bus_amd64.h
index 5f0e22a..71a25e4 100644
--- a/sys/amd64/include/bus_amd64.h
+++ b/sys/amd64/include/bus_amd64.h
@@ -401,7 +401,7 @@ bus_space_read_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
- int _port_ = bsh + offset;
+ bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@@ -443,7 +443,7 @@ bus_space_read_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
- int _port_ = bsh + offset;
+ bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@@ -485,7 +485,7 @@ bus_space_read_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
- int _port_ = bsh + offset;
+ bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@@ -741,7 +741,7 @@ bus_space_write_region_1(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
- int _port_ = bsh + offset;
+ bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@@ -783,7 +783,7 @@ bus_space_write_region_2(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
- int _port_ = bsh + offset;
+ bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
@@ -825,7 +825,7 @@ bus_space_write_region_4(bus_space_tag_t tag, bus_space_handle_t bsh,
else
#endif
{
- int _port_ = bsh + offset;
+ bus_space_handle_t _port_ = bsh + offset;
#ifdef __GNUC__
__asm __volatile(" \n\
cld \n\
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index 355e95d..9f73c2c 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -89,34 +89,32 @@
#define ALIGN(p) _ALIGN(p)
#define ALIGNED_POINTER(p,t) _ALIGNED_POINTER((p),(t))
+
/* Size of the level 1 page table units */
#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
+#define NPTEPGSHIFT 9 /* LOG2(NPTEPG) */
#define PAGE_SHIFT 12 /* LOG2(PAGE_SIZE) */
#define PAGE_SIZE (1<<PAGE_SHIFT) /* bytes/page */
#define PAGE_MASK (PAGE_SIZE-1)
/* Size of the level 2 page directory units */
#define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t)))
+#define NPDEPGSHIFT 9 /* LOG2(NPDEPG) */
#define PDRSHIFT 21 /* LOG2(NBPDR) */
#define NBPDR (1<<PDRSHIFT) /* bytes/page dir */
#define PDRMASK (NBPDR-1)
/* Size of the level 3 page directory pointer table units */
#define NPDPEPG (PAGE_SIZE/(sizeof (pdp_entry_t)))
+#define NPDPEPGSHIFT 9 /* LOG2(NPDPEPG) */
#define PDPSHIFT 30 /* LOG2(NBPDP) */
#define NBPDP (1<<PDPSHIFT) /* bytes/page dir ptr table */
#define PDPMASK (NBPDP-1)
/* Size of the level 4 page-map level-4 table units */
#define NPML4EPG (PAGE_SIZE/(sizeof (pml4_entry_t)))
+#define NPML4EPGSHIFT 9 /* LOG2(NPML4EPG) */
#define PML4SHIFT 39 /* LOG2(NBPML4T) */
#define NBPML4T (1ul<<PML4SHIFT)/* bytes/page map lev4 table */
#define PML4MASK (NBPML4T-1)
-#define NKPML4E 1 /* addressable number of page tables/pde's */
-#define NKPDPE 1 /* addressable number of page tables/pde's */
-#define NPGPTD 4
-
-#define NBPTD (NPGPTD<<PAGE_SHIFT)
-#define NPDEPTD (NBPTD/(sizeof (pd_entry_t)))
-
#define IOPAGES 2 /* pages of i/o permission bitmap */
#define KSTACK_PAGES 4 /* pages of kstack (with pcb) */
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 5ffa054..ef02ac1 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -83,13 +83,6 @@
#define PGEX_U 0x04 /* access from User mode (UPL) */
/*
- * Size of Kernel address space. This is the number of level 4 (top)
- * entries. We use half of them for the kernel due to the 48 bit
- * virtual address sign extension.
- */
-#define KVA_PAGES 1536
-
-/*
* Pte related macros. This is complicated by having to deal with
* the sign extension of the 48th bit.
*/
@@ -105,15 +98,26 @@
#ifndef NKPT
#define NKPT 120 /* initial number of kernel page tables */
#endif
-#ifndef NKPDE
-#define NKPDE (KVA_PAGES) /* number of page tables/pde's */
-#endif
+
+#define NKPML4E 1 /* number of kernel PML4 slots */
+#define NKPDPE 1 /* number of kernel PDP slots */
+#define NKPDE (NKPDPE*NPDEPG) /* number of kernel PD slots */
+
+#define NUPML4E 1 /* number of userland PML4 pages */
+#define NUPDPE (NUPML4E*NPDPEPG)/* number of userland PDP pages */
+#define NUPDE (NUPDPE*NPDEPG) /* number of userland PD entries */
+
+#define NDMPML4E 1 /* number of dmap PML4 slots */
/*
- * The *PTDI values control the layout of virtual memory
+ * The *PDI values control the layout of virtual memory
*/
-#define KPTDI (NPDEPTD-NKPDE) /* start of kernel virtual pde's */
-#define PTDPTDI (KPTDI-NPGPTD) /* ptd entry that points to ptd! */
+#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
+
+#define KPML4I (NPML4EPG-1)
+#define DMPML4I (KPML4I-1)
+
+#define KPDPI (NPDPEPG-1)
/*
* XXX doesn't really belong here I guess...
@@ -145,13 +149,18 @@ typedef u_int64_t pml4_entry_t;
* in the page tables and the evil overlapping.
*/
#ifdef _KERNEL
-#define PTmap ((pt_entry_t *)(VADDR(0, 0, PTDPTDI, 0)))
-#define PTD ((pd_entry_t *)(VADDR(0, 0, PTDPTDI, PTDPTDI)))
-#define PTDpde ((pd_entry_t *)(VADDR(0, 0, PTDPTDI, PTDPTDI) + (PTDPTDI * sizeof(pd_entry_t))))
-
-extern u_int64_t IdlePML4; /* physical address of "Idle" state directory */
-extern u_int64_t IdlePDP; /* physical address of "Idle" state directory */
-extern u_int64_t IdlePTD; /* physical address of "Idle" state directory */
+#define addr_PTmap (VADDR(PML4PML4I, 0, 0, 0))
+#define addr_PDmap (VADDR(PML4PML4I, PML4PML4I, 0, 0))
+#define addr_PDPmap (VADDR(PML4PML4I, PML4PML4I, PML4PML4I, 0))
+#define addr_PML4map (VADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I))
+#define addr_PML4pml4e (addr_PML4map + (PML4PML4I * sizeof(pml4_entry_t)))
+#define PTmap ((pt_entry_t *)(addr_PTmap))
+#define PDmap ((pd_entry_t *)(addr_PDmap))
+#define PDPmap ((pd_entry_t *)(addr_PDPmap))
+#define PML4map ((pd_entry_t *)(addr_PML4map))
+#define PML4pml4e ((pd_entry_t *)(addr_PML4pml4e))
+
+extern u_int64_t KPML4phys; /* physical address of kernel level 4 */
#endif
#ifdef _KERNEL
@@ -161,28 +170,8 @@ extern u_int64_t IdlePTD; /* physical address of "Idle" state directory */
* Note: these work recursively, thus vtopte of a pte will give
* the corresponding pde that in turn maps it.
*/
-#define vtopte(va) (PTmap + amd64_btop(va))
-
-/*
- * Routine: pmap_kextract
- * Function:
- * Extract the physical page address associated
- * kernel virtual address.
- */
-static __inline vm_paddr_t
-pmap_kextract(vm_offset_t va)
-{
- vm_paddr_t pa;
-
- pa = PTD[va >> PDRSHIFT];
- if (pa & PG_PS) {
- pa = (pa & ~(NBPDR - 1)) | (va & (NBPDR - 1));
- } else {
- pa = *vtopte(va);
- pa = (pa & PG_FRAME) | (va & PAGE_MASK);
- }
- return pa;
-}
+pt_entry_t *vtopte(vm_offset_t);
+vm_paddr_t pmap_kextract(vm_offset_t);
#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
@@ -225,14 +214,12 @@ struct md_page {
};
struct pmap {
- pd_entry_t *pm_pdir; /* KVA of page directory */
+ pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
vm_object_t pm_pteobj; /* Container for pte's */
TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */
u_long pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
- pdp_entry_t *pm_pdp; /* KVA of level 3 page table */
- pml4_entry_t *pm_pml4; /* KVA of level 4 page table */
};
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
@@ -278,7 +265,7 @@ extern char *ptvmmap; /* poor name! */
extern vm_offset_t virtual_avail;
extern vm_offset_t virtual_end;
-void pmap_bootstrap(vm_paddr_t, vm_paddr_t);
+void pmap_bootstrap(vm_paddr_t *);
void pmap_kenter(vm_offset_t va, vm_paddr_t pa);
void pmap_kremove(vm_offset_t);
void *pmap_mapdev(vm_paddr_t, vm_size_t);
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 0e9697d..91a62ed 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -41,7 +41,7 @@
#ifndef _MACHINE_VMPARAM_H_
-#define _MACHINE_VMPARAM_H_ 1
+#define _MACHINE_VMPARAM_H_ 1
/*
* Machine dependent constants for AMD64.
@@ -64,7 +64,7 @@
#define MAXSSIZ (64UL*1024*1024) /* max stack size */
#endif
#ifndef SGROWSIZ
-#define SGROWSIZ (128UL*1024) /* amount to grow stack */
+#define SGROWSIZ (128UL*1024) /* amount to grow stack */
#endif
/*
@@ -78,6 +78,12 @@
*/
#define MAXSLP 20
+/*
+ * We provide a machine specific single page allocator through the tuse
+ * of the direct mapped segment. This uses 2MB pages for reduced
+ * TLB pressure.
+ */
+#define UMA_MD_SMALL_ALLOC
/*
* Virtual addresses of things. Derived from the page directory and
@@ -86,24 +92,30 @@
* messy at times, but hey, we'll do anything to save a page :-)
*/
-#define VM_MAX_KERNEL_ADDRESS VADDR(0, 0, KPTDI+NKPDE-1, NPTEPG-1)
-#define VM_MIN_KERNEL_ADDRESS VADDR(0, 0, PTDPTDI, PTDPTDI)
+#define VM_MAX_KERNEL_ADDRESS VADDR(KPML4I, NPDPEPG-1, NKPDE-1, NPTEPG-1)
+#define VM_MIN_KERNEL_ADDRESS VADDR(KPML4I, KPDPI, 0, 0)
+
+#define DMAP_MIN_ADDRESS VADDR(DMPML4I, 0, 0, 0)
+#define DMAP_MAX_ADDRESS VADDR(DMPML4I+1, 0, 0, 0)
+
+#define KERNBASE VADDR(KPML4I, KPDPI, 0, 0)
-#define KERNBASE VADDR(0, 0, KPTDI, 0)
+#define UPT_MAX_ADDRESS VADDR(PML4PML4I, PML4PML4I, PML4PML4I, PML4PML4I)
+#define UPT_MIN_ADDRESS VADDR(PML4PML4I, 0, 0, 0)
-#define UPT_MAX_ADDRESS VADDR(0, 0, PTDPTDI, PTDPTDI)
-#define UPT_MIN_ADDRESS VADDR(0, 0, PTDPTDI, 0)
+#define VM_MAXUSER_ADDRESS VADDR(NUPML4E, 0, 0, 0)
-#define VM_MAXUSER_ADDRESS UPT_MIN_ADDRESS
+#define USRSTACK VM_MAXUSER_ADDRESS
-#define USRSTACK VM_MAXUSER_ADDRESS
+#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
+#define VM_MIN_ADDRESS (0)
-#define VM_MAX_ADDRESS UPT_MAX_ADDRESS
-#define VM_MIN_ADDRESS (0)
+#define PHYS_TO_DMAP(x) ((x) | DMAP_MIN_ADDRESS)
+#define DMAP_TO_PHYS(x) ((x) & ~DMAP_MIN_ADDRESS)
/* virtual sizes (bytes) for various kernel submaps */
#ifndef VM_KMEM_SIZE
-#define VM_KMEM_SIZE (12 * 1024 * 1024)
+#define VM_KMEM_SIZE (12 * 1024 * 1024)
#endif
/*
diff --git a/sys/conf/kern.mk b/sys/conf/kern.mk
index ba85101..253d267 100644
--- a/sys/conf/kern.mk
+++ b/sys/conf/kern.mk
@@ -57,7 +57,7 @@ CFLAGS+= -mcmodel=medlow -msoft-float
# once pmap is ready. Be excessively careful to not generate FPU code.
#
.if ${MACHINE_ARCH} == "amd64"
-CFLAGS+= -mcmodel=medium -mno-red-zone \
+CFLAGS+= -mcmodel=kernel -mno-red-zone \
-mfpmath=387 -mno-sse -mno-sse2 -mno-mmx -mno-3dnow -msoft-float
.endif
OpenPOWER on IntegriCloud