summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/subr_param.c29
-rw-r--r--sys/kern/sys_pipe.c85
-rw-r--r--sys/sys/pipe.h11
3 files changed, 108 insertions, 17 deletions
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 7d048a4..2b7a66f 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -77,6 +77,9 @@ int nbuf;
int nswbuf;
int maxswzone; /* max swmeta KVA storage */
int maxbcache; /* max buffer cache KVA storage */
+int maxpipes; /* Limit on # of pipes */
+int maxpipekva; /* Limit on pipe KVA */
+int maxpipekvawired; /* Limit on wired pipe KVA */
u_quad_t maxtsiz; /* max text size */
u_quad_t dfldsiz; /* initial data size limit */
u_quad_t maxdsiz; /* max data size */
@@ -132,6 +135,15 @@ void
init_param2(long physpages)
{
+ /* Kernel map size */
+ int kmempages, kmemtunable;
+ kmemtunable = 0;
+ TUNABLE_INT_FETCH("kern.vm.kmem.size", &kmemtunable);
+ if (kmemtunable != 0)
+ kmempages = kmemtunable / PAGE_SIZE;
+ else
+ kmempages = VM_KMEM_SIZE_MAX / PAGE_SIZE;
+ kmempages = min(physpages, kmempages);
/* Base parameters */
maxusers = MAXUSERS;
TUNABLE_INT_FETCH("kern.maxusers", &maxusers);
@@ -161,6 +173,23 @@ init_param2(long physpages)
maxfilesperproc = (maxfiles * 9) / 10;
/*
+ * Limit number of pipes to a reasonable fraction of kmap entries,
+ * pageable pipe memory usage to 2.5% of the kernel map, and wired
+ * pipe memory usage to 1% of the same. Ensure that all have
+ * reasonable floors. (See sys_pipe.c for more info.)
+ */
+ maxpipes = kmempages / 20;
+ maxpipekva = (kmempages / 40) * PAGE_SIZE;
+ maxpipekvawired = (kmempages / 100) * PAGE_SIZE;
+
+ if (maxpipes < 128)
+ maxpipes = 128;
+ if (maxpipekva < 512 * 1024)
+ maxpipekva = 512 * 1024;
+ if (maxpipekvawired < 512 * 1024)
+ maxpipekvawired = 512 * 1024;
+
+ /*
* Cannot be changed after boot.
*/
nbuf = NBUF;
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 5782b60..838ff80 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -45,6 +45,28 @@
* happen for small transfers so that the system will not spend all of
* its time context switching. PIPE_SIZE is constrained by the
* amount of kernel virtual memory.
+ *
+ * In order to limit the resource use of pipes, three sysctls exist:
+ *
+ * kern.ipc.maxpipes - A limit on the total number of pipes in the system.
+ * Note that since pipes are bidirectional, the effective value is this
+ * number divided by two.
+ *
+ * kern.ipc.maxpipekva - This value limits the amount of pageable memory that
+ * can be used by pipes. Whenever the amount in use exceeds this value,
+ * all new pipes will be SMALL_PIPE_SIZE in size, rather than PIPE_SIZE.
+ * Big pipe creation will be limited as well.
+ *
+ * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
+ * be wired in order to facilitate direct copies using page flipping.
+ * Whenever this value is exceeded, pipes will fall back to using regular
+ * copies.
+ *
+ * These values are autotuned in subr_param.c.
+ *
+ * Memory usage may be monitored through the sysctls
+ * kern.ipc.pipes, kern.ipc.pipekva and kern.ipc.pipekvawired.
+ *
*/
#include <sys/cdefs.h>
@@ -68,6 +90,7 @@ __FBSDID("$FreeBSD$");
#include <sys/poll.h>
#include <sys/selinfo.h>
#include <sys/signalvar.h>
+#include <sys/sysctl.h>
#include <sys/sysproto.h>
#include <sys/pipe.h>
#include <sys/proc.h>
@@ -148,24 +171,29 @@ static struct filterops pipe_wfiltops =
#define MAXPIPESIZE (2*PIPE_SIZE/3)
/*
- * Maximum amount of kva for pipes -- this is kind-of a soft limit, but
- * is there so that on large systems, we don't exhaust it.
- */
-#define MAXPIPEKVA (8*1024*1024)
-
-/*
- * Limit for direct transfers, we cannot, of course limit
- * the amount of kva for pipes in general though.
- */
-#define LIMITPIPEKVA (16*1024*1024)
-
-/*
* Limit the number of "big" pipes
*/
#define LIMITBIGPIPES 32
static int nbigpipe;
+static int amountpipes;
static int amountpipekva;
+static int amountpipekvawired;
+
+SYSCTL_DECL(_kern_ipc);
+
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipes, CTLFLAG_RW,
+ &maxpipes, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RW,
+ &maxpipekva, 0, "Pipe KVA limit");
+SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
+ &maxpipekvawired, 0, "Pipe KVA wired limit");
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
+ &amountpipes, 0, "");
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
+ &amountpipekva, 0, "Pipe KVA usage");
+SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
+ &amountpipekvawired, 0, "Pipe wired KVA usage");
static void pipeinit(void *dummy __unused);
static void pipeclose(struct pipe *cpipe);
@@ -303,11 +331,19 @@ pipespace(cpipe, size)
struct vm_object *object;
caddr_t buffer;
int npages, error;
+ static int curfail = 0;
+ static struct timeval lastfail;
GIANT_REQUIRED;
KASSERT(cpipe->pipe_mtxp == NULL || !mtx_owned(PIPE_MTX(cpipe)),
("pipespace: pipe mutex locked"));
+ if (amountpipes > maxpipes) {
+ if (ppsratecheck(&lastfail, &curfail, 1))
+ printf("kern.maxpipes exceeded, please see tuning(7).\n");
+ return (ENOMEM);
+ }
+
npages = round_page(size)/PAGE_SIZE;
/*
* Create an object, I don't like the idea of paging to/from
@@ -339,6 +375,7 @@ pipespace(cpipe, size)
cpipe->pipe_buffer.in = 0;
cpipe->pipe_buffer.out = 0;
cpipe->pipe_buffer.cnt = 0;
+ atomic_add_int(&amountpipes, 1);
atomic_add_int(&amountpipekva, cpipe->pipe_buffer.size);
return (0);
}
@@ -385,7 +422,13 @@ pipe_create(cpipep)
#endif
cpipe->pipe_mtxp = NULL; /* avoid pipespace assertion */
- error = pipespace(cpipe, PIPE_SIZE);
+ /*
+ * Reduce to 1/4th pipe size if we're over our global max.
+ */
+ if (amountpipekva > maxpipekva)
+ error = pipespace(cpipe, SMALL_PIPE_SIZE);
+ else
+ error = pipespace(cpipe, PIPE_SIZE);
if (error)
return (error);
@@ -654,8 +697,11 @@ pipe_build_write_buffer(wpipe, uio)
int j;
vm_page_lock_queues();
- for (j = 0; j < i; j++)
+ for (j = 0; j < i; j++) {
vm_page_unwire(wpipe->pipe_map.ms[j], 1);
+ atomic_subtract_int(&amountpipekvawired,
+ PAGE_SIZE);
+ }
vm_page_unlock_queues();
return (EFAULT);
}
@@ -663,6 +709,7 @@ pipe_build_write_buffer(wpipe, uio)
m = PHYS_TO_VM_PAGE(paddr);
vm_page_lock_queues();
vm_page_wire(m);
+ atomic_add_int(&amountpipekvawired, PAGE_SIZE);
vm_page_unlock_queues();
wpipe->pipe_map.ms[i] = m;
}
@@ -719,7 +766,7 @@ pipe_destroy_write_buffer(wpipe)
if (wpipe->pipe_map.kva) {
pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
- if (amountpipekva > MAXPIPEKVA) {
+ if (amountpipekva > maxpipekva) {
vm_offset_t kva = wpipe->pipe_map.kva;
wpipe->pipe_map.kva = 0;
kmem_free(kernel_map, kva,
@@ -729,8 +776,10 @@ pipe_destroy_write_buffer(wpipe)
}
}
vm_page_lock_queues();
- for (i = 0; i < wpipe->pipe_map.npages; i++)
+ for (i = 0; i < wpipe->pipe_map.npages; i++) {
vm_page_unwire(wpipe->pipe_map.ms[i], 1);
+ atomic_subtract_int(&amountpipekvawired, PAGE_SIZE);
+ }
vm_page_unlock_queues();
wpipe->pipe_map.npages = 0;
}
@@ -904,6 +953,7 @@ pipe_write(fp, uio, active_cred, flags, td)
* so.
*/
if ((uio->uio_resid > PIPE_SIZE) &&
+ (amountpipekva < maxpipekva) &&
(nbigpipe < LIMITBIGPIPES) &&
(wpipe->pipe_state & PIPE_DIRECTW) == 0 &&
(wpipe->pipe_buffer.size <= PIPE_SIZE) &&
@@ -950,7 +1000,7 @@ pipe_write(fp, uio, active_cred, flags, td)
*/
if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
(fp->f_flag & FNONBLOCK) == 0 &&
- (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) &&
+ amountpipekvawired < maxpipekvawired &&
(uio->uio_iov->iov_len >= PIPE_MINDIRECT)) {
error = pipe_direct_write(wpipe, uio);
if (error)
@@ -1357,6 +1407,7 @@ pipe_free_kmem(cpipe)
if (cpipe->pipe_buffer.size > PIPE_SIZE)
--nbigpipe;
atomic_subtract_int(&amountpipekva, cpipe->pipe_buffer.size);
+ atomic_subtract_int(&amountpipes, 1);
kmem_free(kernel_map,
(vm_offset_t)cpipe->pipe_buffer.buffer,
cpipe->pipe_buffer.size);
diff --git a/sys/sys/pipe.h b/sys/sys/pipe.h
index edbb388..f28d4ed 100644
--- a/sys/sys/pipe.h
+++ b/sys/sys/pipe.h
@@ -43,6 +43,10 @@
#define BIG_PIPE_SIZE (64*1024)
#endif
+#ifndef SMALL_PIPE_SIZE
+#define SMALL_PIPE_SIZE 4096
+#endif
+
/*
* PIPE_MINDIRECT MUST be smaller than PIPE_SIZE and MUST be bigger
* than PIPE_BUF.
@@ -54,6 +58,13 @@
#define PIPENPAGES (BIG_PIPE_SIZE / PAGE_SIZE + 1)
/*
+ * See sys_pipe.c for info on what these limits mean.
+ */
+extern int maxpipes;
+extern int maxpipekva;
+extern int maxpipekvawired;
+
+/*
* Pipe buffer information.
* Separate in, out, cnt are used to simplify calculations.
* Buffered write is active when the buffer.cnt field is set.
OpenPOWER on IntegriCloud