summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2000-05-29 22:40:54 +0000
committerdillon <dillon@FreeBSD.org>2000-05-29 22:40:54 +0000
commit82627e96a026e438a7251b0e5dc0a5322435693c (patch)
tree30efad495e652bce5e17011dd662ffce150ecd07 /sys
parent2e7f60e876b44bc7a2762fa018f17c30864bc19d (diff)
downloadFreeBSD-src-82627e96a026e438a7251b0e5dc0a5322435693c.zip
FreeBSD-src-82627e96a026e438a7251b0e5dc0a5322435693c.tar.gz
This is a cleanup patch to Peter's new OBJT_PHYS VM object type
and sysv shared memory support for it. It implements a new PG_UNMANAGED flag that has slightly different characteristics from PG_FICTICIOUS. A new sysctl, kern.ipc.shm_use_phys has been added to enable the use of physically-backed sysv shared memory rather then swap-backed. Physically backed shm segments are not tracked with PV entries, allowing programs which use a large shm segment as a rendezvous point to operate without eating an insane amount of KVM in the PV entry management. Read: Oracle. Peter's OBJT_PHYS object will also allow us to eventually implement page-table sharing and/or 4MB physical page support for such segments. We're half way there.
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/pmap.c11
-rw-r--r--sys/i386/i386/pmap.c11
-rw-r--r--sys/kern/sysv_shm.c17
-rw-r--r--sys/vm/phys_pager.c21
-rw-r--r--sys/vm/vm_fault.c2
-rw-r--r--sys/vm/vm_object.c13
-rw-r--r--sys/vm/vm_object.h2
-rw-r--r--sys/vm/vm_page.c63
-rw-r--r--sys/vm/vm_page.h9
-rw-r--r--sys/vm/vm_pageout.c16
10 files changed, 118 insertions, 47 deletions
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 5e3d6b5..7b27446 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
- if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
+ if (pmap_initialized &&
+ (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
pmap_insert_entry(pmap, va, mpte, m);
pa |= PG_MANAGED;
}
@@ -2223,7 +2224,8 @@ retry:
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
- pmap_insert_entry(pmap, va, mpte, m);
+ if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+ pmap_insert_entry(pmap, va, mpte, m);
/*
* Increment counters
@@ -2235,7 +2237,10 @@ retry:
/*
* Now validate mapping with RO protection
*/
- *pte = pa | PG_V | PG_U | PG_MANAGED;
+ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+ *pte = pa | PG_V | PG_U;
+ else
+ *pte = pa | PG_V | PG_U | PG_MANAGED;
return mpte;
}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 5e3d6b5..7b27446 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -2095,7 +2095,8 @@ pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
- if (pmap_initialized && (m->flags & PG_FICTITIOUS) == 0) {
+ if (pmap_initialized &&
+ (m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0) {
pmap_insert_entry(pmap, va, mpte, m);
pa |= PG_MANAGED;
}
@@ -2223,7 +2224,8 @@ retry:
* raise IPL while manipulating pv_table since pmap_enter can be
* called at interrupt time.
*/
- pmap_insert_entry(pmap, va, mpte, m);
+ if ((m->flags & (PG_FICTITIOUS|PG_UNMANAGED)) == 0)
+ pmap_insert_entry(pmap, va, mpte, m);
/*
* Increment counters
@@ -2235,7 +2237,10 @@ retry:
/*
* Now validate mapping with RO protection
*/
- *pte = pa | PG_V | PG_U | PG_MANAGED;
+ if (m->flags & (PG_FICTITIOUS|PG_UNMANAGED))
+ *pte = pa | PG_V | PG_U;
+ else
+ *pte = pa | PG_V | PG_U | PG_MANAGED;
return mpte;
}
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index 16019dc..11a5568 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -126,12 +126,15 @@ struct shminfo shminfo = {
SHMALL
};
+static int shm_use_phys;
+
SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmax, CTLFLAG_RW, &shminfo.shmmax, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmin, CTLFLAG_RW, &shminfo.shmmin, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmmni, CTLFLAG_RD, &shminfo.shmmni, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmseg, CTLFLAG_RW, &shminfo.shmseg, 0, "");
SYSCTL_INT(_kern_ipc, OID_AUTO, shmall, CTLFLAG_RW, &shminfo.shmall, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, shm_use_phys, CTLFLAG_RW, &shm_use_phys, 0, "");
static int
shm_find_segment_by_key(key)
@@ -528,13 +531,13 @@ shmget_allocate_segment(p, uap, mode)
* We make sure that we have allocated a pager before we need
* to.
*/
-#ifdef SHM_PHYS_BACKED
- shm_handle->shm_object =
- vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
-#else
- shm_handle->shm_object =
- vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
-#endif
+ if (shm_use_phys) {
+ shm_handle->shm_object =
+ vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
+ } else {
+ shm_handle->shm_object =
+ vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
+ }
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index a48126c..586844f 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -104,7 +104,9 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
object = vm_object_allocate(OBJT_PHYS,
OFF_TO_IDX(foff + size));
object->handle = handle;
+#if 0
TAILQ_INIT(&object->un_pager.physp.physp_pglist);
+#endif
TAILQ_INSERT_TAIL(&phys_pager_object_list, object,
pager_object_list);
} else {
@@ -131,20 +133,6 @@ phys_pager_dealloc(object)
int s;
TAILQ_REMOVE(&phys_pager_object_list, object, pager_object_list);
- /*
- * Free up our fake pages.
- */
- s = splvm();
- while ((m = TAILQ_FIRST(&object->un_pager.physp.physp_pglist)) != 0) {
- TAILQ_REMOVE(&object->un_pager.physp.physp_pglist, m, pageq);
- /* return the page back to normal */
- m->flags &= ~PG_FICTITIOUS;
- m->dirty = 0;
- vm_page_unwire(m, 0);
- vm_page_flag_clear(m, PG_ZERO);
- vm_page_free(m);
- }
- splx(s);
}
static int
@@ -165,8 +153,7 @@ phys_pager_getpages(object, m, count, reqpage)
vm_page_zero_fill(m[i]);
vm_page_flag_set(m[i], PG_ZERO);
/* Switch off pv_entries */
- vm_page_wire(m[i]);
- vm_page_flag_set(m[i], PG_FICTITIOUS);
+ vm_page_unmanage(m[i]);
m[i]->valid = VM_PAGE_BITS_ALL;
m[i]->dirty = 0;
/* The requested page must remain busy, the others not. */
@@ -174,8 +161,6 @@ phys_pager_getpages(object, m, count, reqpage)
vm_page_flag_clear(m[i], PG_BUSY);
m[i]->busy = 0;
}
- TAILQ_INSERT_TAIL(&object->un_pager.physp.physp_pglist, m[i],
- pageq);
}
splx(s);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 14133fa..af4fe35 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -423,7 +423,7 @@ readrest:
if (mt == NULL || (mt->valid != VM_PAGE_BITS_ALL))
break;
if (mt->busy ||
- (mt->flags & (PG_BUSY | PG_FICTITIOUS)) ||
+ (mt->flags & (PG_BUSY | PG_FICTITIOUS | PG_UNMANAGED)) ||
mt->hold_count ||
mt->wire_count)
continue;
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 1b33f78..e5403d1 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -833,12 +833,14 @@ shadowlookup:
/*
* If the page is busy or not in a normal active state,
- * we skip it. Things can break if we mess with pages
- * in any of the below states.
+ * we skip it. If the page is not managed there are no
+ * page queues to mess with. Things can break if we mess
+ * with pages in any of the below states.
*/
if (
m->hold_count ||
m->wire_count ||
+ (m->flags & PG_UNMANAGED) ||
m->valid != VM_PAGE_BITS_ALL
) {
continue;
@@ -1394,6 +1396,13 @@ vm_object_page_remove(object, start, end, clean_only)
all = ((end == 0) && (start == 0));
+ /*
+ * Since physically-backed objects do not use managed pages, we can't
+ * remove pages from the object (we must instead remove the page
+ * references, and then destroy the object).
+ */
+ KASSERT(object->type != OBJT_PHYS, ("attempt to remove pages from a physical object"));
+
vm_object_pip_add(object, 1);
again:
size = end - start;
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 62a3bbe..c9f239c 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -123,6 +123,7 @@ struct vm_object {
TAILQ_HEAD(, vm_page) devp_pglist;
} devp;
+#if 0
/*
* Physmem pager
*
@@ -131,6 +132,7 @@ struct vm_object {
struct {
TAILQ_HEAD(, vm_page) physp_pglist;
} physp;
+#endif
/*
* Swap pager
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6b2b320..9701150 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -688,7 +688,7 @@ vm_page_select_cache(object, pindex)
(pindex + object->pg_color) & PQ_L2_MASK,
FALSE
);
- if (m && ((m->flags & PG_BUSY) || m->busy ||
+ if (m && ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy ||
m->hold_count || m->wire_count)) {
vm_page_deactivate(m);
continue;
@@ -997,7 +997,7 @@ vm_page_activate(m)
vm_page_unqueue(m);
- if (m->wire_count == 0) {
+ if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
m->queue = PQ_ACTIVE;
vm_page_queues[PQ_ACTIVE].lcnt++;
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
@@ -1128,9 +1128,17 @@ vm_page_free_toq(vm_page_t m)
}
}
+ /*
+ * Clear the UNMANAGED flag when freeing an unmanaged page.
+ */
+
+ if (m->flags & PG_UNMANAGED) {
+ m->flags &= ~PG_UNMANAGED;
+ } else {
#ifdef __alpha__
- pmap_page_is_free(m);
+ pmap_page_is_free(m);
#endif
+ }
m->queue = PQ_FREE + m->pc;
pq = &vm_page_queues[m->queue];
@@ -1155,6 +1163,39 @@ vm_page_free_toq(vm_page_t m)
}
/*
+ * vm_page_unmanage:
+ *
+ * Prevent PV management from being done on the page. The page is
+ * removed from the paging queues as if it were wired, and as a
+ * consequence of no longer being managed the pageout daemon will not
+ * touch it (since there is no way to locate the pte mappings for the
+ * page). madvise() calls that mess with the pmap will also no longer
+ * operate on the page.
+ *
+ * Beyond that the page is still reasonably 'normal'. Freeing the page
+ * will clear the flag.
+ *
+ * This routine is used by OBJT_PHYS objects - objects using unswappable
+ * physical memory as backing store rather then swap-backed memory and
+ * will eventually be extended to support 4MB unmanaged physical
+ * mappings.
+ */
+
+void
+vm_page_unmanage(vm_page_t m)
+{
+ int s;
+
+ s = splvm();
+ if ((m->flags & PG_UNMANAGED) == 0) {
+ if (m->wire_count == 0)
+ vm_page_unqueue(m);
+ }
+ vm_page_flag_set(m, PG_UNMANAGED);
+ splx(s);
+}
+
+/*
* vm_page_wire:
*
* Mark this page as wired down by yet
@@ -1170,9 +1211,15 @@ vm_page_wire(m)
{
int s;
+ /*
+ * Only bump the wire statistics if the page is not already wired,
+ * and only unqueue the page if it is on some queue (if it is unmanaged
+ * it is already off the queues).
+ */
s = splvm();
if (m->wire_count == 0) {
- vm_page_unqueue(m);
+ if ((m->flags & PG_UNMANAGED) == 0)
+ vm_page_unqueue(m);
cnt.v_wire_count++;
}
m->wire_count++;
@@ -1218,7 +1265,9 @@ vm_page_unwire(m, activate)
m->wire_count--;
if (m->wire_count == 0) {
cnt.v_wire_count--;
- if (activate) {
+ if (m->flags & PG_UNMANAGED) {
+ ;
+ } else if (activate) {
TAILQ_INSERT_TAIL(&vm_page_queues[PQ_ACTIVE].pl, m, pageq);
m->queue = PQ_ACTIVE;
vm_page_queues[PQ_ACTIVE].lcnt++;
@@ -1259,7 +1308,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
return;
s = splvm();
- if (m->wire_count == 0) {
+ if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
vm_page_unqueue(m);
@@ -1293,7 +1342,7 @@ vm_page_cache(m)
{
int s;
- if ((m->flags & PG_BUSY) || m->busy || m->wire_count) {
+ if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
printf("vm_page_cache: attempting to cache busy page\n");
return;
}
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index e61be7f..a25c6ac 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -225,6 +225,13 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
* These are the flags defined for vm_page.
*
* Note: PG_FILLED and PG_DIRTY are added for the filesystems.
+ *
+ * Note: PG_UNMANAGED (used by OBJT_PHYS) indicates that the page is
+ * not under PV management but otherwise should be treated as a
+ * normal page. Pages not under PV management cannot be paged out
+ * via the object/vm_page_t because there is no knowledge of their
+ * pte mappings, nor can they be removed from their objects via
+ * the object, and such pages are also not on any PQ queue.
*/
#define PG_BUSY 0x0001 /* page is in transit (O) */
#define PG_WANTED 0x0002 /* someone is waiting for page (O) */
@@ -236,6 +243,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
#define PG_CLEANCHK 0x0100 /* page will be checked for cleaning */
#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
#define PG_NOSYNC 0x0400 /* do not collect for syncer */
+#define PG_UNMANAGED 0x0800 /* No PV management for page */
/*
* Misc constants.
@@ -399,6 +407,7 @@ void vm_page_remove __P((vm_page_t));
void vm_page_rename __P((vm_page_t, vm_object_t, vm_pindex_t));
vm_offset_t vm_page_startup __P((vm_offset_t, vm_offset_t, vm_offset_t));
vm_page_t vm_add_new_page __P((vm_offset_t pa));
+void vm_page_unmanage __P((vm_page_t));
void vm_page_unwire __P((vm_page_t, int));
void vm_page_wire __P((vm_page_t));
void vm_page_unqueue __P((vm_page_t));
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 1627118..307dd0b 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -233,11 +233,12 @@ vm_pageout_clean(m)
*/
/*
- * Don't mess with the page if it's busy.
+ * Don't mess with the page if it's busy, held, or special
*/
if ((m->hold_count != 0) ||
- ((m->busy != 0) || (m->flags & PG_BUSY)))
+ ((m->busy != 0) || (m->flags & (PG_BUSY|PG_UNMANAGED)))) {
return 0;
+ }
mc[vm_pageout_page_count] = m;
pageout_count = 1;
@@ -279,7 +280,7 @@ more:
break;
}
if (((p->queue - p->pc) == PQ_CACHE) ||
- (p->flags & PG_BUSY) || p->busy) {
+ (p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
ib = 0;
break;
}
@@ -309,7 +310,7 @@ more:
if ((p = vm_page_lookup(object, pindex + is)) == NULL)
break;
if (((p->queue - p->pc) == PQ_CACHE) ||
- (p->flags & PG_BUSY) || p->busy) {
+ (p->flags & (PG_BUSY|PG_UNMANAGED)) || p->busy) {
break;
}
vm_page_test_dirty(p);
@@ -474,7 +475,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
if (p->wire_count != 0 ||
p->hold_count != 0 ||
p->busy != 0 ||
- (p->flags & PG_BUSY) ||
+ (p->flags & (PG_BUSY|PG_UNMANAGED)) ||
!pmap_page_exists(vm_map_pmap(map), p)) {
p = next;
continue;
@@ -1047,7 +1048,10 @@ rescan0:
m = vm_page_list_find(PQ_CACHE, cache_rover, FALSE);
if (!m)
break;
- if ((m->flags & PG_BUSY) || m->busy || m->hold_count || m->wire_count) {
+ if ((m->flags & (PG_BUSY|PG_UNMANAGED)) ||
+ m->busy ||
+ m->hold_count ||
+ m->wire_count) {
#ifdef INVARIANTS
printf("Warning: busy page %p found in cache\n", m);
#endif
OpenPOWER on IntegriCloud