summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authorjake <jake@FreeBSD.org>2003-03-30 05:24:52 +0000
committerjake <jake@FreeBSD.org>2003-03-30 05:24:52 +0000
commitabd082c83338e1987726a6ef68574de92e2b5ce7 (patch)
tree41325c88af7f4552023a9b5c8da9296a899c3f45 /sys
parent751b8b88db5312dc1394b1ddadf537cbe79765d8 (diff)
downloadFreeBSD-src-abd082c83338e1987726a6ef68574de92e2b5ce7.zip
FreeBSD-src-abd082c83338e1987726a6ef68574de92e2b5ce7.tar.gz
- Add support for PAE and more than 4 gigs of ram on x86, dependent on the
kernel opition 'options PAE'. This will only work with device drivers which either use busdma, or are able to handle 64 bit physical addresses. Thanks to Lanny Baron from FreeBSD Systems for the loan of a test machine with 6 gigs of ram. Sponsored by: DARPA, Network Associates Laboratories, FreeBSD Systems
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/bios.c8
-rw-r--r--sys/amd64/amd64/locore.S35
-rw-r--r--sys/amd64/amd64/locore.s35
-rw-r--r--sys/amd64/amd64/machdep.c10
-rw-r--r--sys/amd64/amd64/mpboot.S12
-rw-r--r--sys/amd64/amd64/pmap.c60
-rw-r--r--sys/amd64/amd64/vm_machdep.c8
-rw-r--r--sys/amd64/include/bus_amd64.h12
-rw-r--r--sys/amd64/include/bus_at386.h12
-rw-r--r--sys/amd64/include/pmap.h56
-rw-r--r--sys/conf/options.i3863
-rw-r--r--sys/i386/i386/bios.c8
-rw-r--r--sys/i386/i386/locore.s35
-rw-r--r--sys/i386/i386/machdep.c10
-rw-r--r--sys/i386/i386/mpboot.s12
-rw-r--r--sys/i386/i386/pmap.c60
-rw-r--r--sys/i386/i386/vm86bios.s3
-rw-r--r--sys/i386/i386/vm_machdep.c8
-rw-r--r--sys/i386/include/_types.h4
-rw-r--r--sys/i386/include/bus_at386.h12
-rw-r--r--sys/i386/include/param.h5
-rw-r--r--sys/i386/include/pmap.h56
22 files changed, 436 insertions, 28 deletions
diff --git a/sys/amd64/amd64/bios.c b/sys/amd64/amd64/bios.c
index 747a3a1..9357438 100644
--- a/sys/amd64/amd64/bios.c
+++ b/sys/amd64/amd64/bios.c
@@ -384,12 +384,16 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.code32.limit = 0xffff;
ptd = (pd_entry_t *)rcr3();
- if (ptd == (u_int *)IdlePTD) {
+#ifdef PAE
+ if (ptd == IdlePDPT) {
+#else
+ if (ptd == IdlePTD) {
+#endif
/*
* no page table, so create one and install it.
*/
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
- ptd = (pd_entry_t *)((u_int)ptd + KERNBASE);
+ ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
*ptd = vtophys(pte) | PG_RW | PG_V;
} else {
/*
diff --git a/sys/amd64/amd64/locore.S b/sys/amd64/amd64/locore.S
index d1bac06..12e2e19 100644
--- a/sys/amd64/amd64/locore.S
+++ b/sys/amd64/amd64/locore.S
@@ -138,6 +138,11 @@ SMPpt: .long 0 /* relocated version */
.globl IdlePTD
IdlePTD: .long 0 /* phys addr of kernel PTD */
+#ifdef PAE
+ .globl IdlePDPT
+IdlePDPT: .long 0 /* phys addr of kernel PDPT */
+#endif
+
#ifdef SMP
.globl KPTphys
#endif
@@ -323,8 +328,16 @@ NON_GPROF_ENTRY(btext)
1:
/* Now enable paging */
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl %eax, %cr3
+ movl %cr4, %eax
+ orl $CR4_PAE, %eax
+ movl %eax, %cr4
+#else
movl R(IdlePTD), %eax
movl %eax,%cr3 /* load ptd addr into mmu */
+#endif
movl %cr0,%eax /* get control word */
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* and let's page NOW! */
@@ -341,7 +354,11 @@ begin:
xorl %ebp,%ebp /* mark end of frames */
+#ifdef PAE
+ movl IdlePDPT,%esi
+#else
movl IdlePTD,%esi
+#endif
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
pushl physfree /* value of first for init386(first) */
@@ -749,6 +766,11 @@ no_kernend:
movl %esi,R(KPTphys)
/* Allocate Page Table Directory */
+#ifdef PAE
+ /* XXX only need 32 bytes (easier for now) */
+ ALLOCPAGES(1)
+ movl %esi,R(IdlePDPT)
+#endif
ALLOCPAGES(NPGPTD)
movl %esi,R(IdlePTD)
@@ -804,6 +826,12 @@ no_kernend:
fillkptphys(%edx)
/* Map page directory. */
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl $1, %ecx
+ fillkptphys($PG_RW)
+#endif
+
movl R(IdlePTD), %eax
movl $NPGPTD, %ecx
fillkptphys($PG_RW)
@@ -889,4 +917,11 @@ no_kernend:
movl $NPGPTD,%ecx
fillkpt(R(IdlePTD), $PG_RW)
+#ifdef PAE
+ movl R(IdlePTD), %eax
+ xorl %ebx, %ebx
+ movl $NPGPTD, %ecx
+ fillkpt(R(IdlePDPT), $0x0)
+#endif
+
ret
diff --git a/sys/amd64/amd64/locore.s b/sys/amd64/amd64/locore.s
index d1bac06..12e2e19 100644
--- a/sys/amd64/amd64/locore.s
+++ b/sys/amd64/amd64/locore.s
@@ -138,6 +138,11 @@ SMPpt: .long 0 /* relocated version */
.globl IdlePTD
IdlePTD: .long 0 /* phys addr of kernel PTD */
+#ifdef PAE
+ .globl IdlePDPT
+IdlePDPT: .long 0 /* phys addr of kernel PDPT */
+#endif
+
#ifdef SMP
.globl KPTphys
#endif
@@ -323,8 +328,16 @@ NON_GPROF_ENTRY(btext)
1:
/* Now enable paging */
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl %eax, %cr3
+ movl %cr4, %eax
+ orl $CR4_PAE, %eax
+ movl %eax, %cr4
+#else
movl R(IdlePTD), %eax
movl %eax,%cr3 /* load ptd addr into mmu */
+#endif
movl %cr0,%eax /* get control word */
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* and let's page NOW! */
@@ -341,7 +354,11 @@ begin:
xorl %ebp,%ebp /* mark end of frames */
+#ifdef PAE
+ movl IdlePDPT,%esi
+#else
movl IdlePTD,%esi
+#endif
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
pushl physfree /* value of first for init386(first) */
@@ -749,6 +766,11 @@ no_kernend:
movl %esi,R(KPTphys)
/* Allocate Page Table Directory */
+#ifdef PAE
+ /* XXX only need 32 bytes (easier for now) */
+ ALLOCPAGES(1)
+ movl %esi,R(IdlePDPT)
+#endif
ALLOCPAGES(NPGPTD)
movl %esi,R(IdlePTD)
@@ -804,6 +826,12 @@ no_kernend:
fillkptphys(%edx)
/* Map page directory. */
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl $1, %ecx
+ fillkptphys($PG_RW)
+#endif
+
movl R(IdlePTD), %eax
movl $NPGPTD, %ecx
fillkptphys($PG_RW)
@@ -889,4 +917,11 @@ no_kernend:
movl $NPGPTD,%ecx
fillkpt(R(IdlePTD), $PG_RW)
+#ifdef PAE
+ movl R(IdlePTD), %eax
+ xorl %ebx, %ebx
+ movl $NPGPTD, %ecx
+ fillkpt(R(IdlePDPT), $0x0)
+#endif
+
ret
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 80b941e..ac14f72 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -1578,11 +1578,13 @@ int15e820:
if (smap->length == 0)
goto next_run;
+#ifndef PAE
if (smap->base >= 0xffffffff) {
printf("%uK of memory above 4GB ignored\n",
(u_int)(smap->length / 1024));
goto next_run;
}
+#endif
for (i = 0; i <= physmap_idx; i += 2) {
if (smap->base < physmap[i + 1]) {
@@ -2071,7 +2073,11 @@ init386(first)
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#ifdef PAE
+ dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
dblfault_tss.tss_eip = (int)dblfault_handler;
dblfault_tss.tss_eflags = PSL_KERNEL;
dblfault_tss.tss_ds = dblfault_tss.tss_es =
@@ -2115,7 +2121,11 @@ init386(first)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+#ifdef PAE
+ thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
+#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+#endif
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
}
diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S
index 5c26a73..e245007 100644
--- a/sys/amd64/amd64/mpboot.S
+++ b/sys/amd64/amd64/mpboot.S
@@ -40,6 +40,8 @@
#include "assym.s"
+#define R(x) ((x)-KERNBASE)
+
/*
* this code MUST be enabled here and in mp_machdep.c
* it follows the very early stages of AP boot by placing values in CMOS ram.
@@ -74,8 +76,16 @@
NON_GPROF_ENTRY(MPentry)
CHECKPOINT(0x36, 3)
/* Now enable paging mode */
- movl IdlePTD-KERNBASE, %eax
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl %eax, %cr3
+ movl %cr4, %eax
+ orl $CR4_PAE, %eax
+ movl %eax, %cr4
+#else
+ movl R(IdlePTD), %eax
movl %eax,%cr3
+#endif
movl %cr0,%eax
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* let the games begin! */
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 059c007..76b45b4 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -197,6 +197,10 @@ static int nkpt;
vm_offset_t kernel_vm_end;
extern u_int32_t KERNend;
+#ifdef PAE
+static uma_zone_t pdptzone;
+#endif
+
/*
* Data for the pv entry allocation mechanism
*/
@@ -248,7 +252,10 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex);
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
-static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#ifdef PAE
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
static pd_entry_t pdir4mb;
@@ -323,6 +330,9 @@ pmap_bootstrap(firstaddr, loadaddr)
* Initialize the kernel pmap (which is statically allocated).
*/
kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
+#ifdef PAE
+ kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
kernel_pmap->pm_active = -1; /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvlist);
LIST_INIT(&allpmaps);
@@ -504,12 +514,21 @@ pmap_set_opt(void)
}
static void *
-pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
*flags = UMA_SLAB_PRIV;
return (void *)kmem_alloc(kernel_map, bytes);
}
+#ifdef PAE
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+ *flags = UMA_SLAB_PRIV;
+ return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0));
+}
+#endif
+
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
@@ -545,9 +564,15 @@ pmap_init(phys_start, phys_end)
initial_pvs = MINPV;
pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
- uma_zone_set_allocf(pvzone, pmap_allocf);
+ uma_zone_set_allocf(pvzone, pmap_pv_allocf);
uma_prealloc(pvzone, initial_pvs);
+#ifdef PAE
+ pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+ NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0);
+ uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+
/*
* Now it is safe to enable pv_table recording.
*/
@@ -1241,6 +1266,9 @@ pmap_pinit0(pmap)
{
pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
+#ifdef PAE
+ pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1265,9 +1293,18 @@ pmap_pinit(pmap)
* No need to allocate page table space yet but we do need a valid
* page directory table.
*/
- if (pmap->pm_pdir == NULL)
+ if (pmap->pm_pdir == NULL) {
pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
NBPTD);
+#ifdef PAE
+ pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+ KASSERT(((vm_offset_t)pmap->pm_pdpt &
+ ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+ ("pmap_pinit: pdpt misaligned"));
+ KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+ ("pmap_pinit: pdpt above 4g"));
+#endif
+ }
/*
* allocate object for the ptes
@@ -1310,6 +1347,9 @@ pmap_pinit(pmap)
for (i = 0; i < NPGPTD; i++) {
pa = VM_PAGE_TO_PHYS(ptdpg[i]);
pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
+#ifdef PAE
+ pmap->pm_pdpt[i] = pa | PG_V;
+#endif
}
pmap->pm_active = 0;
@@ -1485,6 +1525,10 @@ pmap_release(pmap_t pmap)
vm_page_lock_queues();
for (i = 0; i < NPGPTD; i++) {
m = TAILQ_FIRST(&object->memq);
+#ifdef PAE
+ KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+ ("pmap_release: got wrong ptd page"));
+#endif
m->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_busy(m);
@@ -1680,7 +1724,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
pt_entry_t oldpte;
vm_page_t m;
- oldpte = atomic_readandclear_int(ptq);
+ oldpte = pte_load_clear(ptq);
if (oldpte & PG_W)
pmap->pm_stats.wired_count -= 1;
/*
@@ -1846,7 +1890,7 @@ pmap_remove_all(vm_page_t m)
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pv->pv_pmap->pm_stats.resident_count--;
pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
- tpte = atomic_readandclear_int(pte);
+ tpte = pte_load_clear(pte);
if (tpte & PG_W)
pv->pv_pmap->pm_stats.wired_count--;
if (tpte & PG_A)
@@ -3283,7 +3327,11 @@ pmap_activate(struct thread *td)
#else
pmap->pm_active |= 1;
#endif
+#ifdef PAE
+ cr3 = vtophys(pmap->pm_pdpt);
+#else
cr3 = vtophys(pmap->pm_pdir);
+#endif
/* XXXKSE this is wrong.
* pmap_activate is for the current thread on the current cpu
*/
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index fdd9568..8b395b2 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -170,7 +170,11 @@ cpu_fork(td1, p2, td2, flags)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
+#ifdef PAE
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
+#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
+#endif
pcb2->pcb_edi = 0;
pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */
pcb2->pcb_ebp = 0;
@@ -342,7 +346,11 @@ cpu_set_upcall(struct thread *td, void *pcb)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
+#ifdef PAE
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
+#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+#endif
pcb2->pcb_edi = 0;
pcb2->pcb_esi = (int)fork_return; /* trampoline arg */
pcb2->pcb_ebp = 0;
diff --git a/sys/amd64/include/bus_amd64.h b/sys/amd64/include/bus_amd64.h
index 20d21ec..e1f6e8e 100644
--- a/sys/amd64/include/bus_amd64.h
+++ b/sys/amd64/include/bus_amd64.h
@@ -92,15 +92,23 @@
/*
* Bus address and size types
*/
-typedef u_int bus_addr_t;
-typedef u_int bus_size_t;
+#ifdef PAE
+typedef uint64_t bus_addr_t;
+#else
+typedef uint32_t bus_addr_t;
+#endif
+typedef uint32_t bus_size_t;
#define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF
#define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
#define BUS_SPACE_MAXSIZE 0xFFFFFFFF
#define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF
#define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
+#ifdef PAE
+#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL
+#else
#define BUS_SPACE_MAXADDR 0xFFFFFFFF
+#endif
#define BUS_SPACE_UNRESTRICTED (~0)
diff --git a/sys/amd64/include/bus_at386.h b/sys/amd64/include/bus_at386.h
index 20d21ec..e1f6e8e 100644
--- a/sys/amd64/include/bus_at386.h
+++ b/sys/amd64/include/bus_at386.h
@@ -92,15 +92,23 @@
/*
* Bus address and size types
*/
-typedef u_int bus_addr_t;
-typedef u_int bus_size_t;
+#ifdef PAE
+typedef uint64_t bus_addr_t;
+#else
+typedef uint32_t bus_addr_t;
+#endif
+typedef uint32_t bus_size_t;
#define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF
#define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
#define BUS_SPACE_MAXSIZE 0xFFFFFFFF
#define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF
#define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
+#ifdef PAE
+#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL
+#else
#define BUS_SPACE_MAXADDR 0xFFFFFFFF
+#endif
#define BUS_SPACE_UNRESTRICTED (~0)
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index d7f0f66..8fcf2cb 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -88,8 +88,12 @@
* This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
*/
#ifndef KVA_PAGES
+#ifdef PAE
+#define KVA_PAGES 512
+#else
#define KVA_PAGES 256
#endif
+#endif
/*
* Pte related macros
@@ -97,8 +101,12 @@
#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
#ifndef NKPT
+#ifdef PAE
+#define NKPT 120 /* actual number of kernel page tables */
+#else
#define NKPT 30 /* actual number of kernel page tables */
#endif
+#endif
#ifndef NKPDE
#ifdef SMP
#define NKPDE (KVA_PAGES - (NPGPTD + 1)) /* number of page tables/pde's */
@@ -134,12 +142,25 @@
#include <sys/queue.h>
-typedef u_int32_t pd_entry_t;
-typedef u_int32_t pt_entry_t;
+#ifdef PAE
+
+typedef uint64_t pdpt_entry_t;
+typedef uint64_t pd_entry_t;
+typedef uint64_t pt_entry_t;
+
+#define PTESHIFT (3)
+#define PDESHIFT (3)
+
+#else
+
+typedef uint32_t pd_entry_t;
+typedef uint32_t pt_entry_t;
#define PTESHIFT (2)
#define PDESHIFT (2)
+#endif
+
/*
* Address of current and alternate address space page table maps
* and directories.
@@ -149,6 +170,9 @@ extern pt_entry_t PTmap[], APTmap[];
extern pd_entry_t PTD[], APTD[];
extern pd_entry_t PTDpde[], APTDpde[];
+#ifdef PAE
+extern pdpt_entry_t *IdlePDPT;
+#endif
extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
#endif
@@ -183,6 +207,30 @@ pmap_kextract(vm_offset_t va)
}
#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
+
+#ifdef PAE
+
+static __inline pt_entry_t
+pte_load_clear(pt_entry_t *pte)
+{
+ pt_entry_t r;
+
+ r = *pte;
+ __asm __volatile(
+ "1:\n"
+ "\tcmpxchg8b %1\n"
+ "\tjnz 1b"
+ : "+A" (r)
+ : "m" (*pte), "b" (0), "c" (0));
+ return (r);
+}
+
+#else
+
+#define pte_load_clear(pte) atomic_readandclear_int(pte)
+
+#endif
+
#endif
/*
@@ -202,6 +250,10 @@ struct pmap {
int pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
+#ifdef PAE
+ pdpt_entry_t *pm_pdpt; /* KVA of page director pointer
+ table */
+#endif
};
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
diff --git a/sys/conf/options.i386 b/sys/conf/options.i386
index 81fd7db..868cfbd 100644
--- a/sys/conf/options.i386
+++ b/sys/conf/options.i386
@@ -33,6 +33,9 @@ APIC_IO opt_global.h
# Change KVM size. Changes things all over the kernel.
KVA_PAGES opt_global.h
+# Physical address extensions and support for >4G ram. As above.
+PAE opt_global.h
+
CLK_CALIBRATION_LOOP opt_clock.h
CLK_USE_I8254_CALIBRATION opt_clock.h
CLK_USE_TSC_CALIBRATION opt_clock.h
diff --git a/sys/i386/i386/bios.c b/sys/i386/i386/bios.c
index 747a3a1..9357438 100644
--- a/sys/i386/i386/bios.c
+++ b/sys/i386/i386/bios.c
@@ -384,12 +384,16 @@ bios16(struct bios_args *args, char *fmt, ...)
args->seg.code32.limit = 0xffff;
ptd = (pd_entry_t *)rcr3();
- if (ptd == (u_int *)IdlePTD) {
+#ifdef PAE
+ if (ptd == IdlePDPT) {
+#else
+ if (ptd == IdlePTD) {
+#endif
/*
* no page table, so create one and install it.
*/
pte = (pt_entry_t *)malloc(PAGE_SIZE, M_TEMP, M_WAITOK);
- ptd = (pd_entry_t *)((u_int)ptd + KERNBASE);
+ ptd = (pd_entry_t *)((u_int)IdlePTD + KERNBASE);
*ptd = vtophys(pte) | PG_RW | PG_V;
} else {
/*
diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s
index d1bac06..12e2e19 100644
--- a/sys/i386/i386/locore.s
+++ b/sys/i386/i386/locore.s
@@ -138,6 +138,11 @@ SMPpt: .long 0 /* relocated version */
.globl IdlePTD
IdlePTD: .long 0 /* phys addr of kernel PTD */
+#ifdef PAE
+ .globl IdlePDPT
+IdlePDPT: .long 0 /* phys addr of kernel PDPT */
+#endif
+
#ifdef SMP
.globl KPTphys
#endif
@@ -323,8 +328,16 @@ NON_GPROF_ENTRY(btext)
1:
/* Now enable paging */
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl %eax, %cr3
+ movl %cr4, %eax
+ orl $CR4_PAE, %eax
+ movl %eax, %cr4
+#else
movl R(IdlePTD), %eax
movl %eax,%cr3 /* load ptd addr into mmu */
+#endif
movl %cr0,%eax /* get control word */
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* and let's page NOW! */
@@ -341,7 +354,11 @@ begin:
xorl %ebp,%ebp /* mark end of frames */
+#ifdef PAE
+ movl IdlePDPT,%esi
+#else
movl IdlePTD,%esi
+#endif
movl %esi,(KSTACK_PAGES*PAGE_SIZE-PCB_SIZE+PCB_CR3)(%eax)
pushl physfree /* value of first for init386(first) */
@@ -749,6 +766,11 @@ no_kernend:
movl %esi,R(KPTphys)
/* Allocate Page Table Directory */
+#ifdef PAE
+ /* XXX only need 32 bytes (easier for now) */
+ ALLOCPAGES(1)
+ movl %esi,R(IdlePDPT)
+#endif
ALLOCPAGES(NPGPTD)
movl %esi,R(IdlePTD)
@@ -804,6 +826,12 @@ no_kernend:
fillkptphys(%edx)
/* Map page directory. */
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl $1, %ecx
+ fillkptphys($PG_RW)
+#endif
+
movl R(IdlePTD), %eax
movl $NPGPTD, %ecx
fillkptphys($PG_RW)
@@ -889,4 +917,11 @@ no_kernend:
movl $NPGPTD,%ecx
fillkpt(R(IdlePTD), $PG_RW)
+#ifdef PAE
+ movl R(IdlePTD), %eax
+ xorl %ebx, %ebx
+ movl $NPGPTD, %ecx
+ fillkpt(R(IdlePDPT), $0x0)
+#endif
+
ret
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index 80b941e..ac14f72 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1578,11 +1578,13 @@ int15e820:
if (smap->length == 0)
goto next_run;
+#ifndef PAE
if (smap->base >= 0xffffffff) {
printf("%uK of memory above 4GB ignored\n",
(u_int)(smap->length / 1024));
goto next_run;
}
+#endif
for (i = 0; i <= physmap_idx; i += 2) {
if (smap->base < physmap[i + 1]) {
@@ -2071,7 +2073,11 @@ init386(first)
dblfault_tss.tss_esp2 = (int)&dblfault_stack[sizeof(dblfault_stack)];
dblfault_tss.tss_ss = dblfault_tss.tss_ss0 = dblfault_tss.tss_ss1 =
dblfault_tss.tss_ss2 = GSEL(GDATA_SEL, SEL_KPL);
+#ifdef PAE
+ dblfault_tss.tss_cr3 = (int)IdlePDPT;
+#else
dblfault_tss.tss_cr3 = (int)IdlePTD;
+#endif
dblfault_tss.tss_eip = (int)dblfault_handler;
dblfault_tss.tss_eflags = PSL_KERNEL;
dblfault_tss.tss_ds = dblfault_tss.tss_es =
@@ -2115,7 +2121,11 @@ init386(first)
/* setup proc 0's pcb */
thread0.td_pcb->pcb_flags = 0; /* XXXKSE */
+#ifdef PAE
+ thread0.td_pcb->pcb_cr3 = (int)IdlePDPT;
+#else
thread0.td_pcb->pcb_cr3 = (int)IdlePTD;
+#endif
thread0.td_pcb->pcb_ext = 0;
thread0.td_frame = &proc0_tf;
}
diff --git a/sys/i386/i386/mpboot.s b/sys/i386/i386/mpboot.s
index 5c26a73..e245007 100644
--- a/sys/i386/i386/mpboot.s
+++ b/sys/i386/i386/mpboot.s
@@ -40,6 +40,8 @@
#include "assym.s"
+#define R(x) ((x)-KERNBASE)
+
/*
* this code MUST be enabled here and in mp_machdep.c
* it follows the very early stages of AP boot by placing values in CMOS ram.
@@ -74,8 +76,16 @@
NON_GPROF_ENTRY(MPentry)
CHECKPOINT(0x36, 3)
/* Now enable paging mode */
- movl IdlePTD-KERNBASE, %eax
+#ifdef PAE
+ movl R(IdlePDPT), %eax
+ movl %eax, %cr3
+ movl %cr4, %eax
+ orl $CR4_PAE, %eax
+ movl %eax, %cr4
+#else
+ movl R(IdlePTD), %eax
movl %eax,%cr3
+#endif
movl %cr0,%eax
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* let the games begin! */
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 059c007..76b45b4 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -197,6 +197,10 @@ static int nkpt;
vm_offset_t kernel_vm_end;
extern u_int32_t KERNend;
+#ifdef PAE
+static uma_zone_t pdptzone;
+#endif
+
/*
* Data for the pv entry allocation mechanism
*/
@@ -248,7 +252,10 @@ static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex);
static vm_page_t pmap_page_lookup(vm_object_t object, vm_pindex_t pindex);
static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t);
static vm_offset_t pmap_kmem_choose(vm_offset_t addr);
-static void *pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+static void *pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#ifdef PAE
+static void *pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait);
+#endif
static pd_entry_t pdir4mb;
@@ -323,6 +330,9 @@ pmap_bootstrap(firstaddr, loadaddr)
* Initialize the kernel pmap (which is statically allocated).
*/
kernel_pmap->pm_pdir = (pd_entry_t *) (KERNBASE + (u_int)IdlePTD);
+#ifdef PAE
+ kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
+#endif
kernel_pmap->pm_active = -1; /* don't allow deactivation */
TAILQ_INIT(&kernel_pmap->pm_pvlist);
LIST_INIT(&allpmaps);
@@ -504,12 +514,21 @@ pmap_set_opt(void)
}
static void *
-pmap_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+pmap_pv_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
{
*flags = UMA_SLAB_PRIV;
return (void *)kmem_alloc(kernel_map, bytes);
}
+#ifdef PAE
+static void *
+pmap_pdpt_allocf(uma_zone_t zone, int bytes, u_int8_t *flags, int wait)
+{
+ *flags = UMA_SLAB_PRIV;
+ return (contigmalloc(PAGE_SIZE, NULL, 0, 0x0ULL, 0xffffffffULL, 1, 0));
+}
+#endif
+
/*
* Initialize the pmap module.
* Called by vm_init, to initialize any structures that the pmap
@@ -545,9 +564,15 @@ pmap_init(phys_start, phys_end)
initial_pvs = MINPV;
pvzone = uma_zcreate("PV ENTRY", sizeof (struct pv_entry), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_VM);
- uma_zone_set_allocf(pvzone, pmap_allocf);
+ uma_zone_set_allocf(pvzone, pmap_pv_allocf);
uma_prealloc(pvzone, initial_pvs);
+#ifdef PAE
+ pdptzone = uma_zcreate("PDPT", NPGPTD * sizeof(pdpt_entry_t), NULL,
+ NULL, NULL, NULL, (NPGPTD * sizeof(pdpt_entry_t)) - 1, 0);
+ uma_zone_set_allocf(pdptzone, pmap_pdpt_allocf);
+#endif
+
/*
* Now it is safe to enable pv_table recording.
*/
@@ -1241,6 +1266,9 @@ pmap_pinit0(pmap)
{
pmap->pm_pdir = (pd_entry_t *)(KERNBASE + (vm_offset_t)IdlePTD);
+#ifdef PAE
+ pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
+#endif
pmap->pm_active = 0;
TAILQ_INIT(&pmap->pm_pvlist);
bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1265,9 +1293,18 @@ pmap_pinit(pmap)
* No need to allocate page table space yet but we do need a valid
* page directory table.
*/
- if (pmap->pm_pdir == NULL)
+ if (pmap->pm_pdir == NULL) {
pmap->pm_pdir = (pd_entry_t *)kmem_alloc_pageable(kernel_map,
NBPTD);
+#ifdef PAE
+ pmap->pm_pdpt = uma_zalloc(pdptzone, M_WAITOK | M_ZERO);
+ KASSERT(((vm_offset_t)pmap->pm_pdpt &
+ ((NPGPTD * sizeof(pdpt_entry_t)) - 1)) == 0,
+ ("pmap_pinit: pdpt misaligned"));
+ KASSERT(pmap_kextract((vm_offset_t)pmap->pm_pdpt) < (4ULL<<30),
+ ("pmap_pinit: pdpt above 4g"));
+#endif
+ }
/*
* allocate object for the ptes
@@ -1310,6 +1347,9 @@ pmap_pinit(pmap)
for (i = 0; i < NPGPTD; i++) {
pa = VM_PAGE_TO_PHYS(ptdpg[i]);
pmap->pm_pdir[PTDPTDI + i] = pa | PG_V | PG_RW | PG_A | PG_M;
+#ifdef PAE
+ pmap->pm_pdpt[i] = pa | PG_V;
+#endif
}
pmap->pm_active = 0;
@@ -1485,6 +1525,10 @@ pmap_release(pmap_t pmap)
vm_page_lock_queues();
for (i = 0; i < NPGPTD; i++) {
m = TAILQ_FIRST(&object->memq);
+#ifdef PAE
+ KASSERT(VM_PAGE_TO_PHYS(m) == (pmap->pm_pdpt[i] & PG_FRAME),
+ ("pmap_release: got wrong ptd page"));
+#endif
m->wire_count--;
atomic_subtract_int(&cnt.v_wire_count, 1);
vm_page_busy(m);
@@ -1680,7 +1724,7 @@ pmap_remove_pte(pmap_t pmap, pt_entry_t *ptq, vm_offset_t va)
pt_entry_t oldpte;
vm_page_t m;
- oldpte = atomic_readandclear_int(ptq);
+ oldpte = pte_load_clear(ptq);
if (oldpte & PG_W)
pmap->pm_stats.wired_count -= 1;
/*
@@ -1846,7 +1890,7 @@ pmap_remove_all(vm_page_t m)
while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
pv->pv_pmap->pm_stats.resident_count--;
pte = pmap_pte_quick(pv->pv_pmap, pv->pv_va);
- tpte = atomic_readandclear_int(pte);
+ tpte = pte_load_clear(pte);
if (tpte & PG_W)
pv->pv_pmap->pm_stats.wired_count--;
if (tpte & PG_A)
@@ -3283,7 +3327,11 @@ pmap_activate(struct thread *td)
#else
pmap->pm_active |= 1;
#endif
+#ifdef PAE
+ cr3 = vtophys(pmap->pm_pdpt);
+#else
cr3 = vtophys(pmap->pm_pdir);
+#endif
/* XXXKSE this is wrong.
* pmap_activate is for the current thread on the current cpu
*/
diff --git a/sys/i386/i386/vm86bios.s b/sys/i386/i386/vm86bios.s
index 1f36e0e..68b8b91 100644
--- a/sys/i386/i386/vm86bios.s
+++ b/sys/i386/i386/vm86bios.s
@@ -123,6 +123,9 @@ ENTRY(vm86_bioscall)
movl SCR_NEWPTD(%edx),%eax /* mapping for vm86 page table */
movl %eax,0(%ebx) /* ... install as PTD entry 0 */
+#ifdef PAE
+ movl IdlePDPT,%ecx
+#endif
movl %ecx,%cr3 /* new page tables */
movl SCR_VMFRAME(%edx),%esp /* switch to new stack */
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index fdd9568..8b395b2 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -170,7 +170,11 @@ cpu_fork(td1, p2, td2, flags)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
+#ifdef PAE
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdpt);
+#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(p2->p_vmspace)->pm_pdir);
+#endif
pcb2->pcb_edi = 0;
pcb2->pcb_esi = (int)fork_return; /* fork_trampoline argument */
pcb2->pcb_ebp = 0;
@@ -342,7 +346,11 @@ cpu_set_upcall(struct thread *td, void *pcb)
* Set registers for trampoline to user mode. Leave space for the
* return address on stack. These are the kernel mode register values.
*/
+#ifdef PAE
+ pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdpt);
+#else
pcb2->pcb_cr3 = vtophys(vmspace_pmap(td->td_proc->p_vmspace)->pm_pdir);
+#endif
pcb2->pcb_edi = 0;
pcb2->pcb_esi = (int)fork_return; /* trampoline arg */
pcb2->pcb_ebp = 0;
diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h
index ad57438..93c2a36 100644
--- a/sys/i386/include/_types.h
+++ b/sys/i386/include/_types.h
@@ -102,7 +102,11 @@ typedef __uint64_t __uint_least64_t;
typedef __uint32_t __u_register_t;
typedef __uint32_t __vm_offset_t;
typedef __int64_t __vm_ooffset_t;
+#ifdef PAE
+typedef __uint64_t __vm_paddr_t;
+#else
typedef __uint32_t __vm_paddr_t;
+#endif
typedef __uint64_t __vm_pindex_t;
typedef __uint32_t __vm_size_t;
diff --git a/sys/i386/include/bus_at386.h b/sys/i386/include/bus_at386.h
index 20d21ec..e1f6e8e 100644
--- a/sys/i386/include/bus_at386.h
+++ b/sys/i386/include/bus_at386.h
@@ -92,15 +92,23 @@
/*
* Bus address and size types
*/
-typedef u_int bus_addr_t;
-typedef u_int bus_size_t;
+#ifdef PAE
+typedef uint64_t bus_addr_t;
+#else
+typedef uint32_t bus_addr_t;
+#endif
+typedef uint32_t bus_size_t;
#define BUS_SPACE_MAXSIZE_24BIT 0xFFFFFF
#define BUS_SPACE_MAXSIZE_32BIT 0xFFFFFFFF
#define BUS_SPACE_MAXSIZE 0xFFFFFFFF
#define BUS_SPACE_MAXADDR_24BIT 0xFFFFFF
#define BUS_SPACE_MAXADDR_32BIT 0xFFFFFFFF
+#ifdef PAE
+#define BUS_SPACE_MAXADDR 0xFFFFFFFFFFFFFFFFULL
+#else
#define BUS_SPACE_MAXADDR 0xFFFFFFFF
+#endif
#define BUS_SPACE_UNRESTRICTED (~0)
diff --git a/sys/i386/include/param.h b/sys/i386/include/param.h
index 4da42a9..c3b9c59 100644
--- a/sys/i386/include/param.h
+++ b/sys/i386/include/param.h
@@ -87,8 +87,13 @@
#define PAGE_MASK (PAGE_SIZE-1)
#define NPTEPG (PAGE_SIZE/(sizeof (pt_entry_t)))
+#ifdef PAE
+#define NPGPTD 4
+#define PDRSHIFT 21 /* LOG2(NBPDR) */
+#else
#define NPGPTD 1
#define PDRSHIFT 22 /* LOG2(NBPDR) */
+#endif
#define NBPTD (NPGPTD<<PAGE_SHIFT)
#define NPDEPTD (NBPTD/(sizeof (pd_entry_t)))
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index d7f0f66..8fcf2cb 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -88,8 +88,12 @@
* This **MUST** be a multiple of 4 (eg: 252, 256, 260, etc).
*/
#ifndef KVA_PAGES
+#ifdef PAE
+#define KVA_PAGES 512
+#else
#define KVA_PAGES 256
#endif
+#endif
/*
* Pte related macros
@@ -97,8 +101,12 @@
#define VADDR(pdi, pti) ((vm_offset_t)(((pdi)<<PDRSHIFT)|((pti)<<PAGE_SHIFT)))
#ifndef NKPT
+#ifdef PAE
+#define NKPT 120 /* actual number of kernel page tables */
+#else
#define NKPT 30 /* actual number of kernel page tables */
#endif
+#endif
#ifndef NKPDE
#ifdef SMP
#define NKPDE (KVA_PAGES - (NPGPTD + 1)) /* number of page tables/pde's */
@@ -134,12 +142,25 @@
#include <sys/queue.h>
-typedef u_int32_t pd_entry_t;
-typedef u_int32_t pt_entry_t;
+#ifdef PAE
+
+typedef uint64_t pdpt_entry_t;
+typedef uint64_t pd_entry_t;
+typedef uint64_t pt_entry_t;
+
+#define PTESHIFT (3)
+#define PDESHIFT (3)
+
+#else
+
+typedef uint32_t pd_entry_t;
+typedef uint32_t pt_entry_t;
#define PTESHIFT (2)
#define PDESHIFT (2)
+#endif
+
/*
* Address of current and alternate address space page table maps
* and directories.
@@ -149,6 +170,9 @@ extern pt_entry_t PTmap[], APTmap[];
extern pd_entry_t PTD[], APTD[];
extern pd_entry_t PTDpde[], APTDpde[];
+#ifdef PAE
+extern pdpt_entry_t *IdlePDPT;
+#endif
extern pd_entry_t *IdlePTD; /* physical address of "Idle" state directory */
#endif
@@ -183,6 +207,30 @@ pmap_kextract(vm_offset_t va)
}
#define vtophys(va) pmap_kextract(((vm_offset_t) (va)))
+
+#ifdef PAE
+
+static __inline pt_entry_t
+pte_load_clear(pt_entry_t *pte)
+{
+ pt_entry_t r;
+
+ r = *pte;
+ __asm __volatile(
+ "1:\n"
+ "\tcmpxchg8b %1\n"
+ "\tjnz 1b"
+ : "+A" (r)
+ : "m" (*pte), "b" (0), "c" (0));
+ return (r);
+}
+
+#else
+
+#define pte_load_clear(pte) atomic_readandclear_int(pte)
+
+#endif
+
#endif
/*
@@ -202,6 +250,10 @@ struct pmap {
int pm_active; /* active on cpus */
struct pmap_statistics pm_stats; /* pmap statistics */
LIST_ENTRY(pmap) pm_list; /* List of all pmaps */
+#ifdef PAE
+ pdpt_entry_t *pm_pdpt; /* KVA of page director pointer
+ table */
+#endif
};
#define pmap_page_is_mapped(m) (!TAILQ_EMPTY(&(m)->md.pv_list))
OpenPOWER on IntegriCloud