summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authortegge <tegge@FreeBSD.org>2005-08-10 00:17:36 +0000
committertegge <tegge@FreeBSD.org>2005-08-10 00:17:36 +0000
commita73af1b81d7cd48bf2240a18ec324de4fd37cb27 (patch)
tree7e3a066c1d3dab26c83129720740e8344543d624 /sys/vm
parent1739e5021692bfaef8f06397985c7f7606a38e0d (diff)
downloadFreeBSD-src-a73af1b81d7cd48bf2240a18ec324de4fd37cb27.zip
FreeBSD-src-a73af1b81d7cd48bf2240a18ec324de4fd37cb27.tar.gz
Don't allow pagedaemon to skip pages while scanning PQ_ACTIVE or PQ_INACTIVE
due to the vm object being locked. When a process writes large amounts of data to a file, the vm object associated with that file can contain most of the physical pages on the machine. If the process is preempted while holding the lock on the vm object, pagedaemon would be able to move very few pages from PQ_INACTIVE to PQ_CACHE or from PQ_ACTIVE to PQ_INACTIVE, resulting in unlimited cleaning of dirty pages belonging to other vm objects. Temporarily unlock the page queues lock while locking vm objects to avoid lock order violation. Detect and handle relevant page queue changes. This change depends on both the lock portion of struct vm_object and normal struct vm_page being type stable. Reviewed by: alc
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/vm_object.c5
-rw-r--r--sys/vm/vm_pageout.c75
2 files changed, 75 insertions, 5 deletions
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 213501c..949ffda 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -250,6 +250,11 @@ vm_object_init(void)
_vm_object_allocate(OBJT_DEFAULT, OFF_TO_IDX(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS),
kmem_object);
+ /*
+ * The lock portion of struct vm_object must be type stable due
+ * to vm_pageout_fallback_object_lock locking a vm object
+ * without holding any references to it.
+ */
obj_zone = uma_zcreate("VM OBJECT", sizeof (struct vm_object), NULL,
#ifdef INVARIANTS
vm_object_zdtor,
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 73562db..6860dd6 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -5,6 +5,8 @@
* All rights reserved.
* Copyright (c) 1994 David Greenman
* All rights reserved.
+ * Copyright (c) 2005 Yahoo! Technologies Norway AS
+ * All rights reserved.
*
* This code is derived from software contributed to Berkeley by
* The Mach Operating System project at Carnegie-Mellon University.
@@ -210,6 +212,55 @@ static void vm_req_vmdaemon(void);
static void vm_pageout_page_stats(void);
/*
+ * vm_pageout_fallback_object_lock:
+ *
+ * Lock vm object currently associated with `m'. VM_OBJECT_TRYLOCK is
+ * known to have failed and page queue must be either PQ_ACTIVE or
+ * PQ_INACTIVE. To avoid lock order violation, unlock the page queues
+ * while locking the vm object. Use marker page to detect page queue
+ * changes and maintain notion of next page on page queue. Return
+ * TRUE if no changes were detected, FALSE otherwise. vm object is
+ * locked on return.
+ *
+ * This function depends on both the lock portion of struct vm_object
+ * and normal struct vm_page being type stable.
+ */
+static boolean_t
+vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
+{
+ struct vm_page marker;
+ boolean_t unchanged;
+ u_short queue;
+ vm_object_t object;
+
+ /*
+ * Initialize our marker
+ */
+ bzero(&marker, sizeof(marker));
+ marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
+ marker.queue = m->queue;
+ marker.wire_count = 1;
+
+ queue = m->queue;
+ object = m->object;
+
+ TAILQ_INSERT_AFTER(&vm_page_queues[queue].pl,
+ m, &marker, pageq);
+ vm_page_unlock_queues();
+ VM_OBJECT_LOCK(object);
+ vm_page_lock_queues();
+
+ /* Page queue might have changed. */
+ *next = TAILQ_NEXT(&marker, pageq);
+ unchanged = (m->queue == queue &&
+ m->object == object &&
+ &marker == TAILQ_NEXT(m, pageq));
+ TAILQ_REMOVE(&vm_page_queues[queue].pl,
+ &marker, pageq);
+ return (unchanged);
+}
+
+/*
* vm_pageout_clean:
*
* Clean the page and remove it from the laundry.
@@ -749,7 +800,10 @@ rescan0:
* Don't mess with busy pages, keep in the front of the
* queue, most likely are being paged out.
*/
- if (!VM_OBJECT_TRYLOCK(object)) {
+ if (!VM_OBJECT_TRYLOCK(object) &&
+ (!vm_pageout_fallback_object_lock(m, &next) ||
+ m->hold_count != 0)) {
+ VM_OBJECT_UNLOCK(object);
addl_page_shortage++;
continue;
}
@@ -1024,8 +1078,13 @@ unlock_and_continue:
next = TAILQ_NEXT(m, pageq);
object = m->object;
- if (!VM_OBJECT_TRYLOCK(object)) {
- vm_pageq_requeue(m);
+ if ((m->flags & PG_MARKER) != 0) {
+ m = next;
+ continue;
+ }
+ if (!VM_OBJECT_TRYLOCK(object) &&
+ !vm_pageout_fallback_object_lock(m, &next)) {
+ VM_OBJECT_UNLOCK(object);
m = next;
continue;
}
@@ -1296,8 +1355,14 @@ vm_pageout_page_stats()
next = TAILQ_NEXT(m, pageq);
object = m->object;
- if (!VM_OBJECT_TRYLOCK(object)) {
- vm_pageq_requeue(m);
+
+ if ((m->flags & PG_MARKER) != 0) {
+ m = next;
+ continue;
+ }
+ if (!VM_OBJECT_TRYLOCK(object) &&
+ !vm_pageout_fallback_object_lock(m, &next)) {
+ VM_OBJECT_UNLOCK(object);
m = next;
continue;
}
OpenPOWER on IntegriCloud