Implement global and per-uid accounting of the anonymous memory. Add

rlimit RLIMIT_SWAP that limits the amount of swap that may be reserved for the uid. The accounting information (charge) is associated with either map entry, or vm object backing the entry, assuming the object is the first one in the shadow chain and entry does not require COW. Charge is moved from entry to object on allocation of the object, e.g. during the mmap, assuming the object is allocated, or on the first page fault on the entry. It moves back to the entry on forks due to COW setup. The per-entry granularity of accounting makes the charge process fair for processes that change uid during lifetime, and decrements charge for proper uid when region is unmapped. The interface of vm_pager_allocate(9) is extended by adding struct ucred *, that is used to charge appropriate uid when allocation if performed by kernel, e.g. md(4). Several syscalls, among them is fork(2), may now return ENOMEM when global or per-uid limits are enforced. In collaboration with: pho Reviewed by: alc Approved by: re (kensmith)
author: kib <kib@FreeBSD.org> 2009-06-23 20:45:22 +0000
committer: kib <kib@FreeBSD.org> 2009-06-23 20:45:22 +0000
commit: fa686c638eece83a18de058d1934f4722487818b (patch)
tree: efadbd0bda4d9f0ec36869d4d465b2cabf2dcd1b /sys/kern
parent: 39fa9f1c9918ad9bb25af4f1bbce28c34cb2cd65 (diff)
download: FreeBSD-src-fa686c638eece83a18de058d1934f4722487818b.zip
FreeBSD-src-fa686c638eece83a18de058d1934f4722487818b.tar.gz
5 files changed, 54 insertions, 18 deletions
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index 721d48d..bfa43a9 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -214,6 +214,7 @@ fork1(td, flags, pages, procp)
 	struct thread *td2;
 	struct sigacts *newsigacts;
 	struct vmspace *vm2;
+	vm_ooffset_t mem_charged;
 	int error;
 
 	/* Can't copy and clear. */
@@ -274,6 +275,7 @@ norfproc_fail:
 	 * however it proved un-needed and caused problems
 	 */
 
+	mem_charged = 0;
 	vm2 = NULL;
 	/* Allocate new proc. */
 	newproc = uma_zalloc(proc_zone, M_WAITOK);
@@ -295,12 +297,24 @@ norfproc_fail:
 		}
 	}
 	if ((flags & RFMEM) == 0) {
-		vm2 = vmspace_fork(p1->p_vmspace);
+		vm2 = vmspace_fork(p1->p_vmspace, &mem_charged);
 		if (vm2 == NULL) {
 			error = ENOMEM;
 			goto fail1;
 		}
-	}
+		if (!swap_reserve(mem_charged)) {
+			/*
+			 * The swap reservation failed. The accounting
+			 * from the entries of the copied vm2 will be
+			 * substracted in vmspace_free(), so force the
+			 * reservation there.
+			 */
+			swap_reserve_force(mem_charged);
+			error = ENOMEM;
+			goto fail1;
+		}
+	} else
+		vm2 = NULL;
 #ifdef MAC
 	mac_proc_init(newproc);
 #endif
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index c0b9ebd..81a03ef 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -1213,6 +1213,8 @@ uifind(uid)
 		} else {
 			refcount_init(&uip->ui_ref, 0);
 			uip->ui_uid = uid;
+			mtx_init(&uip->ui_vmsize_mtx, "ui_vmsize", NULL,
+			    MTX_DEF);
 			LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
 		}
 	}
@@ -1269,6 +1271,10 @@ uifree(uip)
 		if (uip->ui_proccnt != 0)
 			printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
 			    uip->ui_uid, uip->ui_proccnt);
+		if (uip->ui_vmsize != 0)
+			printf("freeing uidinfo: uid = %d, swapuse = %lld\n",
+			    uip->ui_uid, (unsigned long long)uip->ui_vmsize);
+		mtx_destroy(&uip->ui_vmsize_mtx);
 		free(uip, M_UIDINFO);
 		return;
 	}
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 0ed7ce4..0cce905 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_kern.h>
 #include <vm/vm_object.h>
 #include <vm/vm_page.h>
+#include <vm/vm_param.h>
 
 #ifdef COMPAT_IA32
 #include <sys/procfs.h>
@@ -270,7 +271,10 @@ proc_rwmem(struct proc *p, struct uio *uio)
 		 */
 		error = vm_fault(map, pageno, reqprot, fault_flags);
 		if (error) {
-			error = EFAULT;
+			if (error == KERN_RESOURCE_SHORTAGE)
+				error = ENOMEM;
+			else
+				error = EFAULT;
 			break;
 		}
 
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index dd9e302..16316b4 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -770,13 +770,10 @@ shmget_allocate_segment(td, uap, mode)
 	 * We make sure that we have allocated a pager before we need
 	 * to.
 	 */
-	if (shm_use_phys) {
-		shm_object =
-		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
-	} else {
-		shm_object =
-		    vm_pager_allocate(OBJT_SWAP, 0, size, VM_PROT_DEFAULT, 0);
-	}
+	shm_object = vm_pager_allocate(shm_use_phys ? OBJT_PHYS : OBJT_SWAP,
+	    0, size, VM_PROT_DEFAULT, 0, cred);
+	if (shm_object == NULL)
+		return (ENOMEM);
 	VM_OBJECT_LOCK(shm_object);
 	vm_object_clear_flag(shm_object, OBJ_ONEMAPPING);
 	vm_object_set_flag(shm_object, OBJ_NOSPLIT);
diff --git a/sys/kern/uipc_shm.c b/sys/kern/uipc_shm.c
index 3819154..32bfd2d 100644
--- a/sys/kern/uipc_shm.c
+++ b/sys/kern/uipc_shm.c
@@ -110,7 +110,7 @@ static struct shmfd *shm_hold(struct shmfd *shmfd);
 static void	shm_insert(char *path, Fnv32_t fnv, struct shmfd *shmfd);
 static struct shmfd *shm_lookup(char *path, Fnv32_t fnv);
 static int	shm_remove(char *path, Fnv32_t fnv, struct ucred *ucred);
-static void	shm_dotruncate(struct shmfd *shmfd, off_t length);
+static int	shm_dotruncate(struct shmfd *shmfd, off_t length);
 
 static fo_rdwr_t	shm_read;
 static fo_rdwr_t	shm_write;
@@ -167,8 +167,7 @@ shm_truncate(struct file *fp, off_t length, struct ucred *active_cred,
 	if (error)
 		return (error);
 #endif
-	shm_dotruncate(shmfd, length);
-	return (0);
+	return (shm_dotruncate(shmfd, length));
 }
 
 static int
@@ -242,23 +241,26 @@ shm_close(struct file *fp, struct thread *td)
 	return (0);
 }
 
-static void
+static int
 shm_dotruncate(struct shmfd *shmfd, off_t length)
 {
 	vm_object_t object;
 	vm_page_t m;
 	vm_pindex_t nobjsize;
+	vm_ooffset_t delta;
 
 	object = shmfd->shm_object;
 	VM_OBJECT_LOCK(object);
 	if (length == shmfd->shm_size) {
 		VM_OBJECT_UNLOCK(object);
-		return;
+		return (0);
 	}
 	nobjsize = OFF_TO_IDX(length + PAGE_MASK);
 
 	/* Are we shrinking?  If so, trim the end. */
 	if (length < shmfd->shm_size) {
+		delta = ptoa(object->size - nobjsize);
+
 		/* Toss in memory pages. */
 		if (nobjsize < object->size)
 			vm_object_page_remove(object, nobjsize, object->size,
@@ -266,8 +268,11 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
 
 		/* Toss pages from swap. */
 		if (object->type == OBJT_SWAP)
-			swap_pager_freespace(object, nobjsize,
-			    object->size - nobjsize);
+			swap_pager_freespace(object, nobjsize, delta);
+
+		/* Free the swap accounted for shm */
+		swap_release_by_uid(delta, object->uip);
+		object->charge -= delta;
 
 		/*
 		 * If the last page is partially mapped, then zero out
@@ -307,6 +312,15 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
 			vm_page_cache_free(object, OFF_TO_IDX(length),
 			    nobjsize);
 		}
+	} else {
+
+		/* Attempt to reserve the swap */
+		delta = ptoa(nobjsize - object->size);
+		if (!swap_reserve_by_uid(delta, object->uip)) {
+			VM_OBJECT_UNLOCK(object);
+			return (ENOMEM);
+		}
+		object->charge += delta;
 	}
 	shmfd->shm_size = length;
 	mtx_lock(&shm_timestamp_lock);
@@ -315,6 +329,7 @@ shm_dotruncate(struct shmfd *shmfd, off_t length)
 	mtx_unlock(&shm_timestamp_lock);
 	object->size = nobjsize;
 	VM_OBJECT_UNLOCK(object);
+	return (0);
 }
 
 /*
@@ -332,7 +347,7 @@ shm_alloc(struct ucred *ucred, mode_t mode)
 	shmfd->shm_gid = ucred->cr_gid;
 	shmfd->shm_mode = mode;
 	shmfd->shm_object = vm_pager_allocate(OBJT_DEFAULT, NULL,
-	    shmfd->shm_size, VM_PROT_DEFAULT, 0);
+	    shmfd->shm_size, VM_PROT_DEFAULT, 0, ucred);
 	KASSERT(shmfd->shm_object != NULL, ("shm_create: vm_pager_allocate"));
 	VM_OBJECT_LOCK(shmfd->shm_object);
 	vm_object_clear_flag(shmfd->shm_object, OBJ_ONEMAPPING);
author	kib <kib@FreeBSD.org>	2009-06-23 20:45:22 +0000
committer	kib <kib@FreeBSD.org>	2009-06-23 20:45:22 +0000
commit	fa686c638eece83a18de058d1934f4722487818b (patch)
tree	efadbd0bda4d9f0ec36869d4d465b2cabf2dcd1b /sys/kern
parent	39fa9f1c9918ad9bb25af4f1bbce28c34cb2cd65 (diff)
download	FreeBSD-src-fa686c638eece83a18de058d1934f4722487818b.zip FreeBSD-src-fa686c638eece83a18de058d1934f4722487818b.tar.gz