summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoralc <alc@FreeBSD.org>2004-03-27 19:50:23 +0000
committeralc <alc@FreeBSD.org>2004-03-27 19:50:23 +0000
commit521fa573648e77e065d6d7c23fce9ecb40142021 (patch)
treeb2239967d5e44caa884de7b4dff736d434584c9d
parenta0a0eebb33c39a00b7c2782c988328a830e43cd0 (diff)
downloadFreeBSD-src-521fa573648e77e065d6d7c23fce9ecb40142021.zip
FreeBSD-src-521fa573648e77e065d6d7c23fce9ecb40142021.tar.gz
Revise the direct or optimized case to use uiomove_fromphys() by the reader
instead of ephemeral mappings using pmap_qenter() by the writer. The writer is still, however, responsible for wiring the pages, just not mapping them. Consequently, the allocation of KVA for the direct case is unnecessary. Remove it and the sysctls limiting it, i.e., kern.ipc.maxpipekvawired and kern.ipc.amountpipekvawired. The number of temporarily wired pages is still, however, limited by kern.ipc.maxpipekva. Note: On platforms lacking a direct virtual-to-physical mapping, uiomove_fromphys() uses sf_bufs to cache ephemeral mappings. Thus, the number of available sf_bufs can influence the performance of pipes on platforms such i386. Surprisingly, I saw the greatest gain from this change on such a machine: lmbench's pipe bandwidth result increased from ~1050MB/s to ~1850MB/s on my 2.4GHz, 400MHz FSB P4 Xeon.
-rw-r--r--sys/kern/subr_param.c7
-rw-r--r--sys/kern/sys_pipe.c78
-rw-r--r--sys/sys/pipe.h2
3 files changed, 19 insertions, 68 deletions
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index 9eca45f..e7421b7 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -82,7 +82,6 @@ int nswbuf;
int maxswzone; /* max swmeta KVA storage */
int maxbcache; /* max buffer cache KVA storage */
int maxpipekva; /* Limit on pipe KVA */
-int maxpipekvawired; /* Limit on wired pipe KVA */
u_quad_t maxtsiz; /* max text size */
u_quad_t dfldsiz; /* initial data size limit */
u_quad_t maxdsiz; /* max data size */
@@ -184,17 +183,13 @@ init_param3(long kmempages)
{
/*
* Limit pageable pipe memory usage to 5% of the kernel map
- * (via pipe_map), and nonpageable pipe memory usage to 2.5%
- * of the same. Ensure that all have reasonable floors.
+ * (via pipe_map). Ensure that all have reasonable floors.
* (See sys_pipe.c for more info.)
*/
maxpipekva = (kmempages / 20) * PAGE_SIZE;
- maxpipekvawired = (kmempages / 40) * PAGE_SIZE;
if (maxpipekva < 512 * 1024)
maxpipekva = 512 * 1024;
- if (maxpipekvawired < 512 * 1024)
- maxpipekvawired = 512 * 1024;
TUNABLE_INT_FETCH("kern.ipc.maxpipekva", &maxpipekva);
}
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 3dfd6e1..aa0445e 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -53,11 +53,6 @@
* SMALL_PIPE_SIZE, rather than PIPE_SIZE. Big pipe creation will be limited
* as well. This value is loader tunable only.
*
- * kern.ipc.maxpipekvawired - This value limits the amount of memory that may
- * be wired in order to facilitate direct copies using page flipping.
- * Whenever this value is exceeded, pipes will fall back to using regular
- * copies. This value is sysctl controllable at all times.
- *
* These values are autotuned in subr_param.c.
*
* Memory usage may be monitored through the sysctls
@@ -159,22 +154,17 @@ static int nbigpipe;
static int amountpipes;
static int amountpipekva;
-static int amountpipekvawired;
SYSCTL_DECL(_kern_ipc);
SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekva, CTLFLAG_RDTUN,
&maxpipekva, 0, "Pipe KVA limit");
-SYSCTL_INT(_kern_ipc, OID_AUTO, maxpipekvawired, CTLFLAG_RW,
- &maxpipekvawired, 0, "Pipe KVA wired limit");
SYSCTL_INT(_kern_ipc, OID_AUTO, pipes, CTLFLAG_RD,
&amountpipes, 0, "Current # of pipes");
SYSCTL_INT(_kern_ipc, OID_AUTO, bigpipes, CTLFLAG_RD,
&nbigpipe, 0, "Current # of big pipes");
SYSCTL_INT(_kern_ipc, OID_AUTO, pipekva, CTLFLAG_RD,
&amountpipekva, 0, "Pipe KVA usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, pipekvawired, CTLFLAG_RD,
- &amountpipekvawired, 0, "Pipe wired KVA usage");
static void pipeinit(void *dummy __unused);
static void pipeclose(struct pipe *cpipe);
@@ -584,14 +574,12 @@ pipe_read(fp, uio, active_cred, flags, td)
*/
} else if ((size = rpipe->pipe_map.cnt) &&
(rpipe->pipe_state & PIPE_DIRECTW)) {
- caddr_t va;
if (size > (u_int) uio->uio_resid)
size = (u_int) uio->uio_resid;
- va = (caddr_t) rpipe->pipe_map.kva +
- rpipe->pipe_map.pos;
PIPE_UNLOCK(rpipe);
- error = uiomove(va, size, uio);
+ error = uiomove_fromphys(rpipe->pipe_map.ms,
+ rpipe->pipe_map.pos, size, uio);
PIPE_LOCK(rpipe);
if (error)
break;
@@ -736,22 +724,6 @@ pipe_build_write_buffer(wpipe, uio)
wpipe->pipe_map.cnt = size;
/*
- * and map the buffer
- */
- if (wpipe->pipe_map.kva == 0) {
- /*
- * We need to allocate space for an extra page because the
- * address range might (will) span pages at times.
- */
- wpipe->pipe_map.kva = kmem_alloc_nofault(kernel_map,
- wpipe->pipe_buffer.size + PAGE_SIZE);
- atomic_add_int(&amountpipekvawired,
- wpipe->pipe_buffer.size + PAGE_SIZE);
- }
- pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
- wpipe->pipe_map.npages);
-
-/*
* and update the uio data
*/
@@ -773,20 +745,7 @@ pipe_destroy_write_buffer(wpipe)
{
int i;
- PIPE_LOCK_ASSERT(wpipe, MA_NOTOWNED);
- if (wpipe->pipe_map.kva) {
- pmap_qremove(wpipe->pipe_map.kva, wpipe->pipe_map.npages);
-
- if (amountpipekvawired > maxpipekvawired / 2) {
- /* Conserve address space */
- vm_offset_t kva = wpipe->pipe_map.kva;
- wpipe->pipe_map.kva = 0;
- kmem_free(kernel_map, kva,
- wpipe->pipe_buffer.size + PAGE_SIZE);
- atomic_subtract_int(&amountpipekvawired,
- wpipe->pipe_buffer.size + PAGE_SIZE);
- }
- }
+ PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
vm_page_lock_queues();
for (i = 0; i < wpipe->pipe_map.npages; i++) {
vm_page_unhold(wpipe->pipe_map.ms[i]);
@@ -804,6 +763,8 @@ static void
pipe_clone_write_buffer(wpipe)
struct pipe *wpipe;
{
+ struct uio uio;
+ struct iovec iov;
int size;
int pos;
@@ -817,10 +778,18 @@ pipe_clone_write_buffer(wpipe)
wpipe->pipe_state &= ~PIPE_DIRECTW;
PIPE_UNLOCK(wpipe);
- bcopy((caddr_t) wpipe->pipe_map.kva + pos,
- wpipe->pipe_buffer.buffer, size);
- pipe_destroy_write_buffer(wpipe);
+ iov.iov_base = wpipe->pipe_buffer.buffer;
+ iov.iov_len = size;
+ uio.uio_iov = &iov;
+ uio.uio_iovcnt = 1;
+ uio.uio_offset = 0;
+ uio.uio_resid = size;
+ uio.uio_segflg = UIO_SYSSPACE;
+ uio.uio_rw = UIO_READ;
+ uio.uio_td = curthread;
+ uiomove_fromphys(wpipe->pipe_map.ms, pos, size, &uio);
PIPE_LOCK(wpipe);
+ pipe_destroy_write_buffer(wpipe);
}
/*
@@ -893,9 +862,7 @@ retry:
while (!error && (wpipe->pipe_state & PIPE_DIRECTW)) {
if (wpipe->pipe_state & PIPE_EOF) {
pipelock(wpipe, 0);
- PIPE_UNLOCK(wpipe);
pipe_destroy_write_buffer(wpipe);
- PIPE_LOCK(wpipe);
pipeselwakeup(wpipe);
pipeunlock(wpipe);
error = EPIPE;
@@ -920,9 +887,7 @@ retry:
*/
pipe_clone_write_buffer(wpipe);
} else {
- PIPE_UNLOCK(wpipe);
pipe_destroy_write_buffer(wpipe);
- PIPE_LOCK(wpipe);
}
error2:
pipeunlock(wpipe);
@@ -1021,8 +986,7 @@ pipe_write(fp, uio, active_cred, flags, td)
* away on us.
*/
if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) &&
- (fp->f_flag & FNONBLOCK) == 0 &&
- amountpipekvawired + uio->uio_resid < maxpipekvawired) {
+ (fp->f_flag & FNONBLOCK) == 0) {
error = pipe_direct_write(wpipe, uio);
if (error)
break;
@@ -1436,14 +1400,8 @@ pipe_free_kmem(cpipe)
cpipe->pipe_buffer.buffer = NULL;
}
#ifndef PIPE_NODIRECT
- if (cpipe->pipe_map.kva != 0) {
- atomic_subtract_int(&amountpipekvawired,
- cpipe->pipe_buffer.size + PAGE_SIZE);
- kmem_free(kernel_map,
- cpipe->pipe_map.kva,
- cpipe->pipe_buffer.size + PAGE_SIZE);
+ {
cpipe->pipe_map.cnt = 0;
- cpipe->pipe_map.kva = 0;
cpipe->pipe_map.pos = 0;
cpipe->pipe_map.npages = 0;
}
diff --git a/sys/sys/pipe.h b/sys/sys/pipe.h
index 84464bd..76b6d37 100644
--- a/sys/sys/pipe.h
+++ b/sys/sys/pipe.h
@@ -60,7 +60,6 @@
* See sys_pipe.c for info on what these limits mean.
*/
extern int maxpipekva;
-extern int maxpipekvawired;
/*
* Pipe buffer information.
@@ -79,7 +78,6 @@ struct pipebuf {
* Information to support direct transfers between processes for pipes.
*/
struct pipemapping {
- vm_offset_t kva; /* kernel virtual address */
vm_size_t cnt; /* number of chars in buffer */
vm_size_t pos; /* current position of transfer */
int npages; /* number of pages */
OpenPOWER on IntegriCloud