summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authorkib <kib@FreeBSD.org>2011-09-06 10:30:11 +0000
committerkib <kib@FreeBSD.org>2011-09-06 10:30:11 +0000
commita9d505a22a9d9d343bf6874e995b921ad977453c (patch)
tree608b3b06589b15335451f37a8c8b11d1779e9a72 /sys/vm
parenta6bb123606f7afa6fb3342b35dad217c76951ee3 (diff)
downloadFreeBSD-src-a9d505a22a9d9d343bf6874e995b921ad977453c.zip
FreeBSD-src-a9d505a22a9d9d343bf6874e995b921ad977453c.tar.gz
Split the vm_page flags PG_WRITEABLE and PG_REFERENCED into atomic
flags field. Updates to the atomic flags are performed using the atomic ops on the containing word, do not require any vm lock to be held, and are non-blocking. The vm_page_aflag_set(9) and vm_page_aflag_clear(9) functions are provided to modify afalgs. Document the changes to flags field to only require the page lock. Introduce vm_page_reference(9) function to provide a stable KPI and KBI for filesystems like tmpfs and zfs which need to mark a page as referenced. Reviewed by: alc, attilio Tested by: marius, flo (sparc64); andreast (powerpc, powerpc64) Approved by: re (bz)
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/swap_pager.c2
-rw-r--r--sys/vm/vm_fault.c4
-rw-r--r--sys/vm/vm_mmap.c8
-rw-r--r--sys/vm/vm_object.c4
-rw-r--r--sys/vm/vm_page.c107
-rw-r--r--sys/vm/vm_page.h40
-rw-r--r--sys/vm/vm_pageout.c28
-rw-r--r--sys/vm/vnode_pager.c2
8 files changed, 106 insertions, 89 deletions
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index f491adc..d7da4f9 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1593,7 +1593,7 @@ swp_pager_async_iodone(struct buf *bp)
* status, then finish the I/O ( which decrements the
* busy count and possibly wakes waiter's up ).
*/
- KASSERT((m->flags & PG_WRITEABLE) == 0,
+ KASSERT((m->aflags & PGA_WRITEABLE) == 0,
("swp_pager_async_iodone: page %p is not write"
" protected", m));
vm_page_undirty(m);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index eeb10a4..1b8ac2f 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -345,9 +345,7 @@ RetryFault:;
* sleeping so that the page daemon is less
* likely to reclaim it.
*/
- vm_page_lock_queues();
- vm_page_flag_set(fs.m, PG_REFERENCED);
- vm_page_unlock_queues();
+ vm_page_aflag_set(fs.m, PGA_REFERENCED);
vm_page_unlock(fs.m);
if (fs.object != fs.first_object) {
if (!VM_OBJECT_TRYLOCK(
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index a46d6b5..ce899e9 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -901,16 +901,16 @@ RestartScan:
if (m->dirty != 0)
mincoreinfo |= MINCORE_MODIFIED_OTHER;
/*
- * The first test for PG_REFERENCED is an
+ * The first test for PGA_REFERENCED is an
* optimization. The second test is
* required because a concurrent pmap
* operation could clear the last reference
- * and set PG_REFERENCED before the call to
+ * and set PGA_REFERENCED before the call to
* pmap_is_referenced().
*/
- if ((m->flags & PG_REFERENCED) != 0 ||
+ if ((m->aflags & PGA_REFERENCED) != 0 ||
pmap_is_referenced(m) ||
- (m->flags & PG_REFERENCED) != 0)
+ (m->aflags & PGA_REFERENCED) != 0)
mincoreinfo |= MINCORE_REFERENCED_OTHER;
}
if (object != NULL)
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 602d99e..3de793b 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1098,9 +1098,7 @@ shadowlookup:
* sleeping so that the page daemon is less
* likely to reclaim it.
*/
- vm_page_lock_queues();
- vm_page_flag_set(m, PG_REFERENCED);
- vm_page_unlock_queues();
+ vm_page_aflag_set(m, PGA_REFERENCED);
}
vm_page_unlock(m);
if (object != tobject)
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6d55892..341c238 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -67,30 +67,9 @@
* page queue (vm_page_queue[]), regardless of other mutexes or the
* busy state of a page.
*
- * - a hash chain mutex is required when associating or disassociating
- * a page from the VM PAGE CACHE hash table (vm_page_buckets),
- * regardless of other mutexes or the busy state of a page.
- *
- * - either a hash chain mutex OR a busied page is required in order
- * to modify the page flags. A hash chain mutex must be obtained in
- * order to busy a page. A page's flags cannot be modified by a
- * hash chain mutex if the page is marked busy.
- *
- * - The object memq mutex is held when inserting or removing
- * pages from an object (vm_page_insert() or vm_page_remove()). This
- * is different from the object's main mutex.
- *
- * Generally speaking, you have to be aware of side effects when running
- * vm_page ops. A vm_page_lookup() will return with the hash chain
- * locked, whether it was able to lookup the page or not. vm_page_free(),
- * vm_page_cache(), vm_page_activate(), and a number of other routines
- * will release the hash chain mutex for you. Intermediate manipulation
- * routines such as vm_page_flag_set() expect the hash chain to be held
- * on entry and the hash chain will remain held on return.
- *
- * pageq scanning can only occur with the pageq in question locked.
- * We have a known bottleneck with the active queue, but the cache
- * and free queues are actually arrays already.
+ * - The object mutex is held when inserting or removing
+ * pages from an object (vm_page_insert() or vm_page_remove()).
+ *
*/
/*
@@ -473,33 +452,68 @@ vm_page_startup(vm_offset_t vaddr)
return (vaddr);
}
+
+CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
+
void
-vm_page_flag_set(vm_page_t m, unsigned short bits)
+vm_page_aflag_set(vm_page_t m, uint8_t bits)
{
+ uint32_t *addr, val;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
/*
- * The PG_WRITEABLE flag can only be set if the page is managed and
+ * The PGA_WRITEABLE flag can only be set if the page is managed and
* VPO_BUSY. Currently, this flag is only set by pmap_enter().
*/
- KASSERT((bits & PG_WRITEABLE) == 0 ||
+ KASSERT((bits & PGA_WRITEABLE) == 0 ||
(m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
- ("PG_WRITEABLE and !VPO_BUSY"));
- m->flags |= bits;
+ ("PGA_WRITEABLE and !VPO_BUSY"));
+
+ /*
+ * We want to use atomic updates for m->aflags, which is a
+ * byte wide. Not all architectures provide atomic operations
+ * on the single-byte destination. Punt and access the whole
+ * 4-byte word with an atomic update. Parallel non-atomic
+ * updates to the fields included in the update by proximity
+ * are handled properly by atomics.
+ */
+ addr = (void *)&m->aflags;
+ MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
+ val = bits;
+#if BYTE_ORDER == BIG_ENDIAN
+ val <<= 24;
+#endif
+ atomic_set_32(addr, val);
}
void
-vm_page_flag_clear(vm_page_t m, unsigned short bits)
+vm_page_aflag_clear(vm_page_t m, uint8_t bits)
{
+ uint32_t *addr, val;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
/*
- * The PG_REFERENCED flag can only be cleared if the object
+ * The PGA_REFERENCED flag can only be cleared if the object
* containing the page is locked.
*/
- KASSERT((bits & PG_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object),
- ("PG_REFERENCED and !VM_OBJECT_LOCKED"));
- m->flags &= ~bits;
+ KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object),
+ ("PGA_REFERENCED and !VM_OBJECT_LOCKED"));
+
+ /*
+ * See the comment in vm_page_aflag_set().
+ */
+ addr = (void *)&m->aflags;
+ MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
+ val = bits;
+#if BYTE_ORDER == BIG_ENDIAN
+ val <<= 24;
+#endif
+ atomic_clear_32(addr, val);
+}
+
+void
+vm_page_reference(vm_page_t m)
+{
+
+ vm_page_aflag_set(m, PGA_REFERENCED);
}
void
@@ -874,7 +888,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
* Since we are inserting a new and possibly dirty page,
* update the object's OBJ_MIGHTBEDIRTY flag.
*/
- if (m->flags & PG_WRITEABLE)
+ if (m->aflags & PGA_WRITEABLE)
vm_object_set_writeable_dirty(object);
}
@@ -1390,6 +1404,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
}
m->flags = flags;
mtx_unlock(&vm_page_queue_free_mtx);
+ m->aflags = 0;
if (object == NULL || object->type == OBJT_PHYS)
m->oflags = VPO_UNMANAGED;
else
@@ -1480,6 +1495,7 @@ vm_page_alloc_init(vm_page_t m)
vm_page_zero_count--;
/* Don't clear the PG_ZERO flag; we'll need it later. */
m->flags &= PG_ZERO;
+ m->aflags = 0;
m->oflags = VPO_UNMANAGED;
/* Unmanaged pages don't use "act_count". */
return (drop);
@@ -1880,7 +1896,7 @@ vm_page_unwire(vm_page_t m, int activate)
if (activate)
vm_page_enqueue(PQ_ACTIVE, m);
else {
- vm_page_flag_clear(m, PG_WINATCFLS);
+ m->flags &= ~PG_WINATCFLS;
vm_page_enqueue(PQ_INACTIVE, m);
}
vm_page_unlock_queues();
@@ -1923,7 +1939,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
return;
if (m->wire_count == 0 && (m->oflags & VPO_UNMANAGED) == 0) {
vm_page_lock_queues();
- vm_page_flag_clear(m, PG_WINATCFLS);
+ m->flags &= ~PG_WINATCFLS;
if (queue != PQ_NONE)
vm_page_queue_remove(queue, m);
if (athead)
@@ -2156,15 +2172,13 @@ vm_page_dontneed(vm_page_t m)
*
* Perform the pmap_clear_reference() first. Otherwise, a concurrent
* pmap operation, such as pmap_remove(), could clear a reference in
- * the pmap and set PG_REFERENCED on the page before the
+ * the pmap and set PGA_REFERENCED on the page before the
* pmap_clear_reference() had completed. Consequently, the page would
* appear referenced based upon an old reference that occurred before
* this function ran.
*/
pmap_clear_reference(m);
- vm_page_lock_queues();
- vm_page_flag_clear(m, PG_REFERENCED);
- vm_page_unlock_queues();
+ vm_page_aflag_clear(m, PGA_REFERENCED);
if (m->dirty == 0 && pmap_is_modified(m))
vm_page_dirty(m);
@@ -2213,8 +2227,7 @@ retrylookup:
* sleeping so that the page daemon is less
* likely to reclaim it.
*/
- vm_page_lock_queues();
- vm_page_flag_set(m, PG_REFERENCED);
+ vm_page_aflag_set(m, PGA_REFERENCED);
vm_page_sleep(m, "pgrbwt");
goto retrylookup;
} else {
@@ -2329,11 +2342,11 @@ vm_page_clear_dirty_mask(vm_page_t m, int pagebits)
/*
* If the object is locked and the page is neither VPO_BUSY nor
- * PG_WRITEABLE, then the page's dirty field cannot possibly be
+ * PGA_WRITEABLE, then the page's dirty field cannot possibly be
* set by a concurrent pmap operation.
*/
VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
- if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0)
+ if ((m->oflags & VPO_BUSY) == 0 && (m->aflags & PGA_WRITEABLE) == 0)
m->dirty &= ~pagebits;
else {
#if defined(__amd64__) || defined(__i386__) || defined(__ia64__)
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 1dda1e2..5431d79 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -125,12 +125,13 @@ struct vm_page {
struct md_page md; /* machine dependant stuff */
uint8_t queue; /* page queue index (P,Q) */
int8_t segind;
- u_short flags; /* see below */
+ short hold_count; /* page hold count (P) */
uint8_t order; /* index of the buddy queue */
uint8_t pool;
u_short cow; /* page cow mapping count (P) */
u_int wire_count; /* wired down maps refs (P) */
- short hold_count; /* page hold count (P) */
+ uint8_t aflags; /* access is atomic */
+ uint8_t flags; /* see below, often immutable after alloc */
u_short oflags; /* page flags (O) */
u_char act_count; /* page usage count (O) */
u_char busy; /* page busy count (O) */
@@ -225,21 +226,29 @@ extern struct vpglocks pa_lock[];
/*
* These are the flags defined for vm_page.
*
- * PG_REFERENCED may be cleared only if the object containing the page is
+ * aflags are updated by atomic accesses. Use the vm_page_aflag_set()
+ * and vm_page_aflag_clear() functions to set and clear the flags.
+ *
+ * PGA_REFERENCED may be cleared only if the object containing the page is
* locked.
*
- * PG_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it
+ * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it
* does so, the page must be VPO_BUSY.
*/
-#define PG_CACHED 0x0001 /* page is cached */
-#define PG_FREE 0x0002 /* page is free */
-#define PG_WINATCFLS 0x0004 /* flush dirty page on inactive q */
-#define PG_FICTITIOUS 0x0008 /* physical page doesn't exist (O) */
-#define PG_WRITEABLE 0x0010 /* page is mapped writeable */
-#define PG_ZERO 0x0040 /* page is zeroed */
-#define PG_REFERENCED 0x0080 /* page has been referenced */
-#define PG_MARKER 0x1000 /* special queue marker page */
-#define PG_SLAB 0x2000 /* object pointer is actually a slab */
+#define PGA_WRITEABLE 0x01 /* page may be mapped writeable */
+#define PGA_REFERENCED 0x02 /* page has been referenced */
+
+/*
+ * Page flags. If changed at any other time than page allocation or
+ * freeing, the modification must be protected by the vm_page lock.
+ */
+#define PG_CACHED 0x01 /* page is cached */
+#define PG_FREE 0x02 /* page is free */
+#define PG_FICTITIOUS 0x04 /* physical page doesn't exist (O) */
+#define PG_ZERO 0x08 /* page is zeroed */
+#define PG_MARKER 0x10 /* special queue marker page */
+#define PG_SLAB 0x20 /* object pointer is actually a slab */
+#define PG_WINATCFLS 0x40 /* flush dirty page on inactive q */
/*
* Misc constants.
@@ -341,8 +350,8 @@ extern struct vpglocks vm_page_queue_lock;
#define VM_ALLOC_COUNT_SHIFT 16
#define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT)
-void vm_page_flag_set(vm_page_t m, unsigned short bits);
-void vm_page_flag_clear(vm_page_t m, unsigned short bits);
+void vm_page_aflag_set(vm_page_t m, uint8_t bits);
+void vm_page_aflag_clear(vm_page_t m, uint8_t bits);
void vm_page_busy(vm_page_t m);
void vm_page_flash(vm_page_t m);
void vm_page_io_start(vm_page_t m);
@@ -377,6 +386,7 @@ vm_page_t vm_page_next(vm_page_t m);
int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
vm_page_t vm_page_prev(vm_page_t m);
void vm_page_putfake(vm_page_t m);
+void vm_page_reference(vm_page_t m);
void vm_page_remove (vm_page_t);
void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
void vm_page_requeue(vm_page_t m);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index e9c9927..5dd450e 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -497,7 +497,7 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen)
vm_page_t mt = mc[i];
KASSERT(pageout_status[i] == VM_PAGER_PEND ||
- (mt->flags & PG_WRITEABLE) == 0,
+ (mt->aflags & PGA_WRITEABLE) == 0,
("vm_pageout_flush: page %p is not write protected", mt));
switch (pageout_status[i]) {
case VM_PAGER_OK:
@@ -597,12 +597,10 @@ vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
continue;
}
actcount = pmap_ts_referenced(p);
- if ((p->flags & PG_REFERENCED) != 0) {
+ if ((p->aflags & PGA_REFERENCED) != 0) {
if (actcount == 0)
actcount = 1;
- vm_page_lock_queues();
- vm_page_flag_clear(p, PG_REFERENCED);
- vm_page_unlock_queues();
+ vm_page_aflag_clear(p, PGA_REFERENCED);
}
if (p->queue != PQ_ACTIVE && actcount != 0) {
vm_page_activate(p);
@@ -846,7 +844,7 @@ rescan0:
* references.
*/
if (object->ref_count == 0) {
- vm_page_flag_clear(m, PG_REFERENCED);
+ vm_page_aflag_clear(m, PGA_REFERENCED);
KASSERT(!pmap_page_is_mapped(m),
("vm_pageout_scan: page %p is mapped", m));
@@ -859,7 +857,7 @@ rescan0:
* level VM system not knowing anything about existing
* references.
*/
- } else if (((m->flags & PG_REFERENCED) == 0) &&
+ } else if (((m->aflags & PGA_REFERENCED) == 0) &&
(actcount = pmap_ts_referenced(m))) {
vm_page_activate(m);
vm_page_unlock(m);
@@ -874,8 +872,8 @@ rescan0:
* "activation count" higher than normal so that we will less
* likely place pages back onto the inactive queue again.
*/
- if ((m->flags & PG_REFERENCED) != 0) {
- vm_page_flag_clear(m, PG_REFERENCED);
+ if ((m->aflags & PGA_REFERENCED) != 0) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
actcount = pmap_ts_referenced(m);
vm_page_activate(m);
vm_page_unlock(m);
@@ -891,7 +889,7 @@ rescan0:
* be updated.
*/
if (m->dirty != VM_PAGE_BITS_ALL &&
- (m->flags & PG_WRITEABLE) != 0) {
+ (m->aflags & PGA_WRITEABLE) != 0) {
/*
* Avoid a race condition: Unless write access is
* removed from the page, another processor could
@@ -938,7 +936,7 @@ rescan0:
* before being freed. This significantly extends
* the thrash point for a heavily loaded machine.
*/
- vm_page_flag_set(m, PG_WINATCFLS);
+ m->flags |= PG_WINATCFLS;
vm_page_requeue(m);
} else if (maxlaunder > 0) {
/*
@@ -1178,7 +1176,7 @@ unlock_and_continue:
*/
actcount = 0;
if (object->ref_count != 0) {
- if (m->flags & PG_REFERENCED) {
+ if (m->aflags & PGA_REFERENCED) {
actcount += 1;
}
actcount += pmap_ts_referenced(m);
@@ -1192,7 +1190,7 @@ unlock_and_continue:
/*
* Since we have "tested" this bit, we need to clear it now.
*/
- vm_page_flag_clear(m, PG_REFERENCED);
+ vm_page_aflag_clear(m, PGA_REFERENCED);
/*
* Only if an object is currently being used, do we use the
@@ -1435,8 +1433,8 @@ vm_pageout_page_stats()
}
actcount = 0;
- if (m->flags & PG_REFERENCED) {
- vm_page_flag_clear(m, PG_REFERENCED);
+ if (m->aflags & PGA_REFERENCED) {
+ vm_page_aflag_clear(m, PGA_REFERENCED);
actcount += 1;
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 23ade63..cb652f7 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -1132,7 +1132,7 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
m = ma[ncount - 1];
KASSERT(m->busy > 0,
("vnode_pager_generic_putpages: page %p is not busy", m));
- KASSERT((m->flags & PG_WRITEABLE) == 0,
+ KASSERT((m->aflags & PGA_WRITEABLE) == 0,
("vnode_pager_generic_putpages: page %p is not read-only", m));
vm_page_clear_dirty(m, pgoff, PAGE_SIZE -
pgoff);
OpenPOWER on IntegriCloud