summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/include/sf_buf.h23
-rw-r--r--sys/arm/arm/vm_machdep.c137
-rw-r--r--sys/arm/include/sf_buf.h29
-rw-r--r--sys/arm/include/vmparam.h3
-rw-r--r--sys/conf/files.arm1
-rw-r--r--sys/conf/files.i3861
-rw-r--r--sys/conf/files.mips1
-rw-r--r--sys/conf/files.pc981
-rw-r--r--sys/conf/files.powerpc1
-rw-r--r--sys/conf/files.sparc641
-rw-r--r--sys/i386/i386/vm_machdep.c233
-rw-r--r--sys/i386/include/sf_buf.h39
-rw-r--r--sys/i386/include/vmparam.h5
-rw-r--r--sys/kern/subr_sfbuf.c226
-rw-r--r--sys/mips/include/sf_buf.h54
-rw-r--r--sys/mips/include/vmparam.h4
-rw-r--r--sys/mips/mips/vm_machdep.c113
-rw-r--r--sys/powerpc/include/sf_buf.h80
-rw-r--r--sys/powerpc/include/vmparam.h14
-rw-r--r--sys/powerpc/powerpc/vm_machdep.c160
-rw-r--r--sys/sparc64/include/sf_buf.h59
-rw-r--r--sys/sparc64/include/vmparam.h3
-rw-r--r--sys/sparc64/sparc64/vm_machdep.c107
-rw-r--r--sys/sys/sf_buf.h154
24 files changed, 475 insertions, 974 deletions
diff --git a/sys/amd64/include/sf_buf.h b/sys/amd64/include/sf_buf.h
index 729e8e5..00b3085 100644
--- a/sys/amd64/include/sf_buf.h
+++ b/sys/amd64/include/sf_buf.h
@@ -29,42 +29,23 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_page.h>
-
/*
* On this machine, the only purpose for which sf_buf is used is to implement
* an opaque pointer required by the machine-independent parts of the kernel.
* That pointer references the vm_page that is "mapped" by the sf_buf. The
* actual mapping is provided by the direct virtual-to-physical mapping.
*/
-struct sf_buf;
-
-static inline struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int pri)
-{
-
- return ((struct sf_buf *)m);
-}
-
-static inline void
-sf_buf_free(struct sf_buf *sf)
-{
-}
-
-static __inline vm_offset_t
+static inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf)));
}
-static __inline vm_page_t
+static inline vm_page_t
sf_buf_page(struct sf_buf *sf)
{
return ((vm_page_t)sf);
}
-
#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/arm/arm/vm_machdep.c b/sys/arm/arm/vm_machdep.c
index df764e8..4a9cf3f 100644
--- a/sys/arm/arm/vm_machdep.c
+++ b/sys/arm/arm/vm_machdep.c
@@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/proc.h>
#include <sys/socketvar.h>
-#include <sys/sf_buf.h>
#include <sys/syscall.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
@@ -83,42 +82,6 @@ __FBSDID("$FreeBSD$");
CTASSERT(sizeof(struct switchframe) == 24);
CTASSERT(sizeof(struct trapframe) == 80);
-#ifndef NSFBUFS
-#define NSFBUFS (512 + maxusers * 16)
-#endif
-
-static int nsfbufs;
-static int nsfbufspeak;
-static int nsfbufsused;
-
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
-
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
-
-LIST_HEAD(sf_head, sf_buf);
-
-/*
- * A hash table of active sendfile(2) buffers
- */
-static struct sf_head *sf_buf_active;
-static u_long sf_buf_hashmask;
-
-#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
-
-static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
-static u_int sf_buf_alloc_want;
-
-/*
- * A lock used to synchronize access to the hash table and free list
- */
-static struct mtx sf_buf_lock;
-
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@@ -184,106 +147,6 @@ cpu_thread_swapout(struct thread *td)
{
}
-/*
- * Detatch mapped page and release resources back to the system.
- */
-void
-sf_buf_free(struct sf_buf *sf)
-{
-
- mtx_lock(&sf_buf_lock);
- sf->ref_count--;
- if (sf->ref_count == 0) {
- TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
- nsfbufsused--;
- pmap_kremove(sf->kva);
- sf->m = NULL;
- LIST_REMOVE(sf, list_entry);
- if (sf_buf_alloc_want > 0)
- wakeup(&sf_buf_freelist);
- }
- mtx_unlock(&sf_buf_lock);
-}
-
-/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- */
-static void
-sf_buf_init(void *arg)
-{
- struct sf_buf *sf_bufs;
- vm_offset_t sf_base;
- int i;
-
- nsfbufs = NSFBUFS;
- TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
-
- sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
- TAILQ_INIT(&sf_buf_freelist);
- sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
- M_NOWAIT | M_ZERO);
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
- }
- sf_buf_alloc_want = 0;
- mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
-}
-
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int flags)
-{
- struct sf_head *hash_list;
- struct sf_buf *sf;
- int error;
-
- hash_list = &sf_buf_active[SF_BUF_HASH(m)];
- mtx_lock(&sf_buf_lock);
- LIST_FOREACH(sf, hash_list, list_entry) {
- if (sf->m == m) {
- sf->ref_count++;
- if (sf->ref_count == 1) {
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- }
- goto done;
- }
- }
- while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
- if (flags & SFB_NOWAIT)
- goto done;
- sf_buf_alloc_want++;
- SFSTAT_INC(sf_allocwait);
- error = msleep(&sf_buf_freelist, &sf_buf_lock,
- (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
- sf_buf_alloc_want--;
-
-
- /*
- * If we got a signal, don't risk going back to sleep.
- */
- if (error)
- goto done;
- }
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- if (sf->m != NULL)
- LIST_REMOVE(sf, list_entry);
- LIST_INSERT_HEAD(hash_list, sf, list_entry);
- sf->ref_count = 1;
- sf->m = m;
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
-done:
- mtx_unlock(&sf_buf_lock);
- return (sf);
-}
-
void
cpu_set_syscall_retval(struct thread *td, int error)
{
diff --git a/sys/arm/include/sf_buf.h b/sys/arm/include/sf_buf.h
index 2ec07de..b761cc7 100644
--- a/sys/arm/include/sf_buf.h
+++ b/sys/arm/include/sf_buf.h
@@ -29,33 +29,18 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
-#include <sys/queue.h>
-
-struct vm_page;
-
-struct sf_buf {
- LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
- TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
- struct vm_page *m; /* currently mapped page */
- vm_offset_t kva; /* va of mapping */
- int ref_count; /* usage of this mapping */
-};
-
-static __inline vm_offset_t
-sf_buf_kva(struct sf_buf *sf)
+static inline void
+sf_buf_map(struct sf_buf *sf, int flags)
{
- return (sf->kva);
+ pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m));
}
-static __inline struct vm_page *
-sf_buf_page(struct sf_buf *sf)
+static inline int
+sf_buf_unmap(struct sf_buf *sf)
{
- return (sf->m);
+ pmap_kremove(sf->kva);
+ return (1);
}
-
-struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
-void sf_buf_free(struct sf_buf *sf);
-
#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h
index 1c6085e..9bfd358 100644
--- a/sys/arm/include/vmparam.h
+++ b/sys/arm/include/vmparam.h
@@ -170,4 +170,7 @@ extern vm_offset_t vm_max_kernel_address;
#define VM_MAX_AUTOTUNE_MAXUSERS 384
#endif
+#define SFBUF
+#define SFBUF_MAP
+
#endif /* _MACHINE_VMPARAM_H_ */
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index 7243c17..1f7c71d 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -77,6 +77,7 @@ font.h optional sc \
clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
kern/subr_busdma_bufalloc.c standard
kern/subr_dummy_vdso_tc.c standard
+kern/subr_sfbuf.c standard
libkern/arm/aeabi_unwind.c standard
libkern/arm/divsi3.S standard
libkern/arm/ffs.S standard
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 58ae57b..ebc81fc 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -520,6 +520,7 @@ isa/vga_isa.c optional vga
kern/kern_clocksource.c standard
kern/imgact_aout.c optional compat_aout
kern/imgact_gzip.c optional gzip
+kern/subr_sfbuf.c standard
libkern/divdi3.c standard
libkern/flsll.c standard
libkern/memmove.c standard
diff --git a/sys/conf/files.mips b/sys/conf/files.mips
index 2df6844..3677de4 100644
--- a/sys/conf/files.mips
+++ b/sys/conf/files.mips
@@ -51,6 +51,7 @@ mips/mips/vm_machdep.c standard
kern/kern_clocksource.c standard
kern/link_elf_obj.c standard
kern/subr_dummy_vdso_tc.c standard
+kern/subr_sfbuf.c optional mips | mipsel | mipsn32
# gcc/clang runtime
libkern/ffsl.c standard
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index fff7b3c..3142223 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -205,6 +205,7 @@ i386/svr4/svr4_machdep.c optional compat_svr4
kern/kern_clocksource.c standard
kern/imgact_aout.c optional compat_aout
kern/imgact_gzip.c optional gzip
+kern/subr_sfbuf.c standard
libkern/divdi3.c standard
libkern/flsll.c standard
libkern/memmove.c standard
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index 8aed60a..10515a5 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -71,6 +71,7 @@ dev/vt/hw/ofwfb/ofwfb.c optional vt aim
kern/kern_clocksource.c standard
kern/subr_dummy_vdso_tc.c standard
kern/syscalls.c optional ktr
+kern/subr_sfbuf.c standard
libkern/ashldi3.c optional powerpc
libkern/ashrdi3.c optional powerpc
libkern/bcmp.c standard
diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64
index ccee247..53b0920 100644
--- a/sys/conf/files.sparc64
+++ b/sys/conf/files.sparc64
@@ -64,6 +64,7 @@ dev/vt/hw/ofwfb/ofwfb.c optional vt
kern/kern_clocksource.c standard
kern/subr_dummy_vdso_tc.c standard
kern/syscalls.c optional ktr
+kern/subr_sfbuf.c standard
libkern/ffs.c standard
libkern/ffsl.c standard
libkern/fls.c standard
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 78cc9c9..9c87548 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -118,38 +118,6 @@ static u_int cpu_reset_proxyid;
static volatile u_int cpu_reset_proxy_active;
#endif
-static int nsfbufs;
-static int nsfbufspeak;
-static int nsfbufsused;
-
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
-
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
-
-LIST_HEAD(sf_head, sf_buf);
-
-/*
- * A hash table of active sendfile(2) buffers
- */
-static struct sf_head *sf_buf_active;
-static u_long sf_buf_hashmask;
-
-#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
-
-static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
-static u_int sf_buf_alloc_want;
-
-/*
- * A lock used to synchronize access to the hash table and free list
- */
-static struct mtx sf_buf_lock;
-
extern int _ucodesel, _udatasel;
/*
@@ -750,121 +718,12 @@ cpu_reset_real()
}
/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- */
-static void
-sf_buf_init(void *arg)
-{
- struct sf_buf *sf_bufs;
- vm_offset_t sf_base;
- int i;
-
- nsfbufs = NSFBUFS;
- TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
-
- sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
- TAILQ_INIT(&sf_buf_freelist);
- sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
- M_NOWAIT | M_ZERO);
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
- }
- sf_buf_alloc_want = 0;
- mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
-}
-
-/*
- * Invalidate the cache lines that may belong to the page, if
- * (possibly old) mapping of the page by sf buffer exists. Returns
- * TRUE when mapping was found and cache invalidated.
- */
-boolean_t
-sf_buf_invalidate_cache(vm_page_t m)
-{
- struct sf_head *hash_list;
- struct sf_buf *sf;
- boolean_t ret;
-
- hash_list = &sf_buf_active[SF_BUF_HASH(m)];
- ret = FALSE;
- mtx_lock(&sf_buf_lock);
- LIST_FOREACH(sf, hash_list, list_entry) {
- if (sf->m == m) {
- /*
- * Use pmap_qenter to update the pte for
- * existing mapping, in particular, the PAT
- * settings are recalculated.
- */
- pmap_qenter(sf->kva, &m, 1);
- pmap_invalidate_cache_range(sf->kva, sf->kva +
- PAGE_SIZE);
- ret = TRUE;
- break;
- }
- }
- mtx_unlock(&sf_buf_lock);
- return (ret);
-}
-
-/*
* Get an sf_buf from the freelist. May block if none are available.
*/
-struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int flags)
+void
+sf_buf_map(struct sf_buf *sf, int flags)
{
pt_entry_t opte, *ptep;
- struct sf_head *hash_list;
- struct sf_buf *sf;
-#ifdef SMP
- cpuset_t other_cpus;
- u_int cpuid;
-#endif
- int error;
-
- KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
- ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
- hash_list = &sf_buf_active[SF_BUF_HASH(m)];
- mtx_lock(&sf_buf_lock);
- LIST_FOREACH(sf, hash_list, list_entry) {
- if (sf->m == m) {
- sf->ref_count++;
- if (sf->ref_count == 1) {
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- }
-#ifdef SMP
- goto shootdown;
-#else
- goto done;
-#endif
- }
- }
- while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
- if (flags & SFB_NOWAIT)
- goto done;
- sf_buf_alloc_want++;
- SFSTAT_INC(sf_allocwait);
- error = msleep(&sf_buf_freelist, &sf_buf_lock,
- (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
- sf_buf_alloc_want--;
-
- /*
- * If we got a signal, don't risk going back to sleep.
- */
- if (error)
- goto done;
- }
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- if (sf->m != NULL)
- LIST_REMOVE(sf, list_entry);
- LIST_INSERT_HEAD(hash_list, sf, list_entry);
- sf->ref_count = 1;
- sf->m = m;
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
/*
* Update the sf_buf's virtual-to-physical mapping, flushing the
@@ -876,11 +735,11 @@ sf_buf_alloc(struct vm_page *m, int flags)
ptep = vtopte(sf->kva);
opte = *ptep;
#ifdef XEN
- PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag
- | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0));
+ PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag
+ | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0));
#else
- *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V |
- pmap_cache_bits(m->md.pat_mode, 0);
+ *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V |
+ pmap_cache_bits(sf->m->md.pat_mode, 0);
#endif
/*
@@ -892,7 +751,21 @@ sf_buf_alloc(struct vm_page *m, int flags)
#ifdef SMP
if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
CPU_ZERO(&sf->cpumask);
-shootdown:
+
+ sf_buf_shootdown(sf, flags);
+#else
+ if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
+ pmap_invalidate_page(kernel_pmap, sf->kva);
+#endif
+}
+
+#ifdef SMP
+void
+sf_buf_shootdown(struct sf_buf *sf, int flags)
+{
+ cpuset_t other_cpus;
+ u_int cpuid;
+
sched_pin();
cpuid = PCPU_GET(cpuid);
if (!CPU_ISSET(cpuid, &sf->cpumask)) {
@@ -909,42 +782,50 @@ shootdown:
}
}
sched_unpin();
-#else
- if ((opte & (PG_V | PG_A)) == (PG_V | PG_A))
- pmap_invalidate_page(kernel_pmap, sf->kva);
-#endif
-done:
- mtx_unlock(&sf_buf_lock);
- return (sf);
}
+#endif
/*
- * Remove a reference from the given sf_buf, adding it to the free
- * list when its reference count reaches zero. A freed sf_buf still,
- * however, retains its virtual-to-physical mapping until it is
- * recycled or reactivated by sf_buf_alloc(9).
+ * MD part of sf_buf_free().
*/
-void
-sf_buf_free(struct sf_buf *sf)
+int
+sf_buf_unmap(struct sf_buf *sf)
{
-
- mtx_lock(&sf_buf_lock);
- sf->ref_count--;
- if (sf->ref_count == 0) {
- TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
- nsfbufsused--;
#ifdef XEN
+ /*
+ * Xen doesn't like having dangling R/W mappings
+ */
+ pmap_qremove(sf->kva, 1);
+ return (1);
+#else
+ return (0);
+#endif
+}
+
+static void
+sf_buf_invalidate(struct sf_buf *sf)
+{
+ vm_page_t m = sf->m;
+
+ /*
+ * Use pmap_qenter to update the pte for
+ * existing mapping, in particular, the PAT
+ * settings are recalculated.
+ */
+ pmap_qenter(sf->kva, &m, 1);
+ pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE);
+}
+
/*
- * Xen doesn't like having dangling R/W mappings
+ * Invalidate the cache lines that may belong to the page, if
+ * (possibly old) mapping of the page by sf buffer exists. Returns
+ * TRUE when mapping was found and cache invalidated.
*/
- pmap_qremove(sf->kva, 1);
- sf->m = NULL;
- LIST_REMOVE(sf, list_entry);
-#endif
- if (sf_buf_alloc_want > 0)
- wakeup(&sf_buf_freelist);
- }
- mtx_unlock(&sf_buf_lock);
+boolean_t
+sf_buf_invalidate_cache(vm_page_t m)
+{
+
+ return (sf_buf_process_page(m, sf_buf_invalidate));
}
/*
diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h
index 20296b3..f1f1cd3 100644
--- a/sys/i386/include/sf_buf.h
+++ b/sys/i386/include/sf_buf.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu>
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,39 +29,8 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
-#include <sys/_cpuset.h>
-#include <sys/queue.h>
-
-struct vm_page;
-
-struct sf_buf {
- LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
- TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
- struct vm_page *m; /* currently mapped page */
- vm_offset_t kva; /* va of mapping */
- int ref_count; /* usage of this mapping */
-#ifdef SMP
- cpuset_t cpumask; /* cpus on which mapping is valid */
-#endif
-};
-
-struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
-void sf_buf_free(struct sf_buf *sf);
-
-static __inline vm_offset_t
-sf_buf_kva(struct sf_buf *sf)
-{
-
- return (sf->kva);
-}
-
-static __inline struct vm_page *
-sf_buf_page(struct sf_buf *sf)
-{
-
- return (sf->m);
-}
-
-boolean_t sf_buf_invalidate_cache(vm_page_t m);
+void sf_buf_map(struct sf_buf *, int);
+int sf_buf_unmap(struct sf_buf *);
+boolean_t sf_buf_invalidate_cache(vm_page_t);
#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h
index c086d76..975b302 100644
--- a/sys/i386/include/vmparam.h
+++ b/sys/i386/include/vmparam.h
@@ -198,4 +198,9 @@
#define VM_MAX_AUTOTUNE_MAXUSERS 384
#endif
+#define SFBUF
+#define SFBUF_MAP
+#define SFBUF_CPUSET
+#define SFBUF_PROCESS_PAGE
+
#endif /* _MACHINE_VMPARAM_H_ */
diff --git a/sys/kern/subr_sfbuf.c b/sys/kern/subr_sfbuf.c
new file mode 100644
index 0000000..e4309d1
--- /dev/null
+++ b/sys/kern/subr_sfbuf.c
@@ -0,0 +1,226 @@
+/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
+ * Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mutex.h>
+#include <sys/sf_buf.h>
+#include <sys/smp.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_page.h>
+
+#ifndef NSFBUFS
+#define NSFBUFS (512 + maxusers * 16)
+#endif
+
+static int nsfbufs;
+static int nsfbufspeak;
+static int nsfbufsused;
+
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
+ "Maximum number of sendfile(2) sf_bufs available");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
+ "Number of sendfile(2) sf_bufs at peak usage");
+SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
+ "Number of sendfile(2) sf_bufs in use");
+
+static void sf_buf_init(void *arg);
+SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
+
+LIST_HEAD(sf_head, sf_buf);
+
+/*
+ * A hash table of active sendfile(2) buffers
+ */
+static struct sf_head *sf_buf_active;
+static u_long sf_buf_hashmask;
+
+#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
+
+static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
+static u_int sf_buf_alloc_want;
+
+/*
+ * A lock used to synchronize access to the hash table and free list
+ */
+static struct mtx sf_buf_lock;
+
+/*
+ * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
+ */
+static void
+sf_buf_init(void *arg)
+{
+ struct sf_buf *sf_bufs;
+ vm_offset_t sf_base;
+ int i;
+
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return;
+#endif
+
+ nsfbufs = NSFBUFS;
+ TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
+
+ sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
+ TAILQ_INIT(&sf_buf_freelist);
+ sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
+ sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
+ M_NOWAIT | M_ZERO);
+ KASSERT(sf_bufs, ("%s: malloc failure", __func__));
+ for (i = 0; i < nsfbufs; i++) {
+ sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
+ TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
+ }
+ sf_buf_alloc_want = 0;
+ mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
+}
+
+/*
+ * Get an sf_buf from the freelist. May block if none are available.
+ */
+struct sf_buf *
+sf_buf_alloc(struct vm_page *m, int flags)
+{
+ struct sf_head *hash_list;
+ struct sf_buf *sf;
+ int error;
+
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return ((struct sf_buf *)m);
+#endif
+
+ KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0,
+ ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned"));
+ hash_list = &sf_buf_active[SF_BUF_HASH(m)];
+ mtx_lock(&sf_buf_lock);
+ LIST_FOREACH(sf, hash_list, list_entry) {
+ if (sf->m == m) {
+ sf->ref_count++;
+ if (sf->ref_count == 1) {
+ TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+ nsfbufsused++;
+ nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
+ }
+#if defined(SMP) && defined(SFBUF_CPUSET)
+ sf_buf_shootdown(sf, flags);
+#endif
+ goto done;
+ }
+ }
+ while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
+ if (flags & SFB_NOWAIT)
+ goto done;
+ sf_buf_alloc_want++;
+ SFSTAT_INC(sf_allocwait);
+ error = msleep(&sf_buf_freelist, &sf_buf_lock,
+ (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
+ sf_buf_alloc_want--;
+
+ /*
+ * If we got a signal, don't risk going back to sleep.
+ */
+ if (error)
+ goto done;
+ }
+ TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
+ if (sf->m != NULL)
+ LIST_REMOVE(sf, list_entry);
+ LIST_INSERT_HEAD(hash_list, sf, list_entry);
+ sf->ref_count = 1;
+ sf->m = m;
+ nsfbufsused++;
+ nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
+ sf_buf_map(sf, flags);
+done:
+ mtx_unlock(&sf_buf_lock);
+ return (sf);
+}
+
+/*
+ * Remove a reference from the given sf_buf, adding it to the free
+ * list when its reference count reaches zero. A freed sf_buf still,
+ * however, retains its virtual-to-physical mapping until it is
+ * recycled or reactivated by sf_buf_alloc(9).
+ */
+void
+sf_buf_free(struct sf_buf *sf)
+{
+
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return;
+#endif
+
+ mtx_lock(&sf_buf_lock);
+ sf->ref_count--;
+ if (sf->ref_count == 0) {
+ TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
+ nsfbufsused--;
+ if (sf_buf_unmap(sf)) {
+ sf->m = NULL;
+ LIST_REMOVE(sf, list_entry);
+ }
+ if (sf_buf_alloc_want > 0)
+ wakeup(&sf_buf_freelist);
+ }
+ mtx_unlock(&sf_buf_lock);
+}
+
+#ifdef SFBUF_PROCESS_PAGE
+/*
+ * Run callback function on sf_buf that holds a certain page.
+ */
+boolean_t
+sf_buf_process_page(vm_page_t m, void (*cb)(struct sf_buf *))
+{
+ struct sf_head *hash_list;
+ struct sf_buf *sf;
+
+ hash_list = &sf_buf_active[SF_BUF_HASH(m)];
+ mtx_lock(&sf_buf_lock);
+ LIST_FOREACH(sf, hash_list, list_entry) {
+ if (sf->m == m) {
+ cb(sf);
+ mtx_unlock(&sf_buf_lock);
+ return (TRUE);
+ }
+ }
+ mtx_unlock(&sf_buf_lock);
+ return (FALSE);
+}
+#endif /* SFBUF_PROCESS_PAGE */
diff --git a/sys/mips/include/sf_buf.h b/sys/mips/include/sf_buf.h
index e5d981f..76fb993 100644
--- a/sys/mips/include/sf_buf.h
+++ b/sys/mips/include/sf_buf.h
@@ -29,31 +29,9 @@
#ifndef _MACHINE_SF_BUF_H_
#define _MACHINE_SF_BUF_H_
-#ifdef __mips_n64
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_page.h>
-#else
-#include <sys/queue.h>
-#endif
+#ifdef __mips_n64 /* In 64 bit the whole memory is directly mapped */
-#ifdef __mips_n64
-/* In 64 bit the whole memory is directly mapped */
-struct sf_buf;
-
-static inline struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int pri)
-{
-
- return ((struct sf_buf *)m);
-}
-
-static inline void
-sf_buf_free(struct sf_buf *sf)
-{
-}
-
-static __inline vm_offset_t
+static inline vm_offset_t
sf_buf_kva(struct sf_buf *sf)
{
vm_page_t m;
@@ -62,38 +40,12 @@ sf_buf_kva(struct sf_buf *sf)
return (MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m)));
}
-static __inline struct vm_page *
+static inline struct vm_page *
sf_buf_page(struct sf_buf *sf)
{
return ((vm_page_t)sf);
}
-#else /* ! __mips_n64 */
-struct vm_page;
-
-struct sf_buf {
- SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */
- struct vm_page *m; /* currently mapped page */
- vm_offset_t kva; /* va of mapping */
-};
-
-struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
-void sf_buf_free(struct sf_buf *sf);
-
-static __inline vm_offset_t
-sf_buf_kva(struct sf_buf *sf)
-{
-
- return (sf->kva);
-}
-
-static __inline struct vm_page *
-sf_buf_page(struct sf_buf *sf)
-{
-
- return (sf->m);
-}
#endif /* __mips_n64 */
-
#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/mips/include/vmparam.h b/sys/mips/include/vmparam.h
index 8922924..d3b833e 100644
--- a/sys/mips/include/vmparam.h
+++ b/sys/mips/include/vmparam.h
@@ -187,4 +187,8 @@
#define ZERO_REGION_SIZE (64 * 1024) /* 64KB */
+#ifndef __mips_n64
+#define SFBUF
+#endif
+
#endif /* !_MACHINE_VMPARAM_H_ */
diff --git a/sys/mips/mips/vm_machdep.c b/sys/mips/mips/vm_machdep.c
index dd294e9..26cdbff 100644
--- a/sys/mips/mips/vm_machdep.c
+++ b/sys/mips/mips/vm_machdep.c
@@ -76,9 +76,6 @@ __FBSDID("$FreeBSD$");
#include <sys/user.h>
#include <sys/mbuf.h>
-#ifndef __mips_n64
-#include <sys/sf_buf.h>
-#endif
/* Duplicated from asm.h */
#if defined(__mips_o32)
@@ -92,38 +89,6 @@ __FBSDID("$FreeBSD$");
#define CALLFRAME_SIZ (SZREG * 4)
#endif
-#ifndef __mips_n64
-
-#ifndef NSFBUFS
-#define NSFBUFS (512 + maxusers * 16)
-#endif
-
-static int nsfbufs;
-static int nsfbufspeak;
-static int nsfbufsused;
-
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
-
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
-
-/*
- * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
- * sf_freelist head with the sf_lock mutex.
- */
-static struct {
- SLIST_HEAD(, sf_buf) sf_head;
- struct mtx sf_lock;
-} sf_freelist;
-
-static u_int sf_buf_alloc_want;
-#endif /* !__mips_n64 */
-
/*
* Finish a fork operation, with process p2 nearly set up.
* Copy and update the pcb, set up the stack so that the child
@@ -513,84 +478,6 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg,
#define ZIDLE_HI(v) ((v) * 4 / 5)
/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- */
-#ifndef __mips_n64
-static void
-sf_buf_init(void *arg)
-{
- struct sf_buf *sf_bufs;
- vm_offset_t sf_base;
- int i;
-
- nsfbufs = NSFBUFS;
- TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
-
- mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
- SLIST_INIT(&sf_freelist.sf_head);
- sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
- M_NOWAIT | M_ZERO);
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
- }
- sf_buf_alloc_want = 0;
-}
-
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int flags)
-{
- struct sf_buf *sf;
- int error;
-
- mtx_lock(&sf_freelist.sf_lock);
- while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
- if (flags & SFB_NOWAIT)
- break;
- sf_buf_alloc_want++;
- SFSTAT_INC(sf_allocwait);
- error = msleep(&sf_freelist, &sf_freelist.sf_lock,
- (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
- sf_buf_alloc_want--;
-
- /*
- * If we got a signal, don't risk going back to sleep.
- */
- if (error)
- break;
- }
- if (sf != NULL) {
- SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
- sf->m = m;
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- pmap_qenter(sf->kva, &sf->m, 1);
- }
- mtx_unlock(&sf_freelist.sf_lock);
- return (sf);
-}
-
-/*
- * Release resources back to the system.
- */
-void
-sf_buf_free(struct sf_buf *sf)
-{
- pmap_qremove(sf->kva, 1);
- mtx_lock(&sf_freelist.sf_lock);
- SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
- nsfbufsused--;
- if (sf_buf_alloc_want > 0)
- wakeup(&sf_freelist);
- mtx_unlock(&sf_freelist.sf_lock);
-}
-#endif /* !__mips_n64 */
-
-/*
* Software interrupt handler for queued VM system processing.
*/
void
diff --git a/sys/powerpc/include/sf_buf.h b/sys/powerpc/include/sf_buf.h
deleted file mode 100644
index f8a5936..0000000
--- a/sys/powerpc/include/sf_buf.h
+++ /dev/null
@@ -1,80 +0,0 @@
-/*-
- * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _MACHINE_SF_BUF_H_
-#define _MACHINE_SF_BUF_H_
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_page.h>
-#include <machine/md_var.h>
-#include <sys/queue.h>
-
-struct vm_page;
-
-struct sf_buf {
- LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
- TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
- struct vm_page *m; /* currently mapped page */
- vm_offset_t kva; /* va of mapping */
- int ref_count; /* usage of this mapping */
-};
-
-struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
-void sf_buf_free(struct sf_buf *sf);
-
-/*
- * On 32-bit OEA, the only purpose for which sf_buf is used is to implement
- * an opaque pointer required by the machine-independent parts of the kernel.
- * That pointer references the vm_page that is "mapped" by the sf_buf. The
- * actual mapping is provided by the direct virtual-to-physical mapping.
- *
- * On OEA64 and Book-E, we need to do something a little more complicated. Use
- * the runtime-detected hw_direct_map to pick between the two cases. Our
- * friends in vm_machdep.c will do the same to ensure nothing gets confused.
- */
-
-static __inline vm_offset_t
-sf_buf_kva(struct sf_buf *sf)
-{
- if (hw_direct_map)
- return (VM_PAGE_TO_PHYS((vm_page_t)sf));
-
- return (sf->kva);
-}
-
-static __inline struct vm_page *
-sf_buf_page(struct sf_buf *sf)
-{
- if (hw_direct_map)
- return ((vm_page_t)sf);
-
- return (sf->m);
-}
-
-#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h
index 2d50f94..c46f395 100644
--- a/sys/powerpc/include/vmparam.h
+++ b/sys/powerpc/include/vmparam.h
@@ -197,4 +197,18 @@ struct pmap_physseg {
#define ZERO_REGION_SIZE (64 * 1024) /* 64KB */
+/*
+ * On 32-bit OEA, the only purpose for which sf_buf is used is to implement
+ * an opaque pointer required by the machine-independent parts of the kernel.
+ * That pointer references the vm_page that is "mapped" by the sf_buf. The
+ * actual mapping is provided by the direct virtual-to-physical mapping.
+ *
+ * On OEA64 and Book-E, we need to do something a little more complicated. Use
+ * the runtime-detected hw_direct_map to pick between the two cases. Our
+ * friends in vm_machdep.c will do the same to ensure nothing gets confused.
+ */
+#define SFBUF
+#define SFBUF_NOMD
+#define SFBUF_OPTIONAL_DIRECT_MAP hw_direct_map
+
#endif /* _MACHINE_VMPARAM_H_ */
diff --git a/sys/powerpc/powerpc/vm_machdep.c b/sys/powerpc/powerpc/vm_machdep.c
index d9d3987..d684094 100644
--- a/sys/powerpc/powerpc/vm_machdep.c
+++ b/sys/powerpc/powerpc/vm_machdep.c
@@ -80,7 +80,6 @@
#include <sys/vmmeter.h>
#include <sys/kernel.h>
#include <sys/mbuf.h>
-#include <sys/sf_buf.h>
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/unistd.h>
@@ -100,47 +99,6 @@
#include <vm/vm_map.h>
#include <vm/vm_extern.h>
-/*
- * On systems without a direct mapped region (e.g. PPC64),
- * we use the same code as the Book E implementation. Since
- * we need to have runtime detection of this, define some machinery
- * for sf_bufs in this case, and ignore it on systems with direct maps.
- */
-
-#ifndef NSFBUFS
-#define NSFBUFS (512 + maxusers * 16)
-#endif
-
-static int nsfbufs;
-static int nsfbufspeak;
-static int nsfbufsused;
-
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
-
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
-
-LIST_HEAD(sf_head, sf_buf);
-
-/* A hash table of active sendfile(2) buffers */
-static struct sf_head *sf_buf_active;
-static u_long sf_buf_hashmask;
-
-#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask)
-
-static TAILQ_HEAD(, sf_buf) sf_buf_freelist;
-static u_int sf_buf_alloc_want;
-
-/*
- * A lock used to synchronize access to the hash table and free list
- */
-static struct mtx sf_buf_lock;
-
#ifdef __powerpc64__
extern uintptr_t tocbase;
#endif
@@ -245,124 +203,6 @@ cpu_exit(struct thread *td)
}
/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- */
-static void
-sf_buf_init(void *arg)
-{
- struct sf_buf *sf_bufs;
- vm_offset_t sf_base;
- int i;
-
- /* Don't bother on systems with a direct map */
- if (hw_direct_map)
- return;
-
- nsfbufs = NSFBUFS;
- TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
-
- sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask);
- TAILQ_INIT(&sf_buf_freelist);
- sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
- M_NOWAIT | M_ZERO);
-
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry);
- }
- sf_buf_alloc_want = 0;
- mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF);
-}
-
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int flags)
-{
- struct sf_head *hash_list;
- struct sf_buf *sf;
- int error;
-
- if (hw_direct_map) {
- /* Shortcut the direct mapped case */
- return ((struct sf_buf *)m);
- }
-
- hash_list = &sf_buf_active[SF_BUF_HASH(m)];
- mtx_lock(&sf_buf_lock);
- LIST_FOREACH(sf, hash_list, list_entry) {
- if (sf->m == m) {
- sf->ref_count++;
- if (sf->ref_count == 1) {
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- }
- goto done;
- }
- }
-
- while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) {
- if (flags & SFB_NOWAIT)
- goto done;
-
- sf_buf_alloc_want++;
- SFSTAT_INC(sf_allocwait);
- error = msleep(&sf_buf_freelist, &sf_buf_lock,
- (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
- sf_buf_alloc_want--;
-
- /*
- * If we got a signal, don't risk going back to sleep.
- */
- if (error)
- goto done;
- }
-
- TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry);
- if (sf->m != NULL)
- LIST_REMOVE(sf, list_entry);
-
- LIST_INSERT_HEAD(hash_list, sf, list_entry);
- sf->ref_count = 1;
- sf->m = m;
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- pmap_qenter(sf->kva, &sf->m, 1);
-done:
- mtx_unlock(&sf_buf_lock);
- return (sf);
-}
-
-/*
- * Detach mapped page and release resources back to the system.
- *
- * Remove a reference from the given sf_buf, adding it to the free
- * list when its reference count reaches zero. A freed sf_buf still,
- * however, retains its virtual-to-physical mapping until it is
- * recycled or reactivated by sf_buf_alloc(9).
- */
-void
-sf_buf_free(struct sf_buf *sf)
-{
- if (hw_direct_map)
- return;
-
- mtx_lock(&sf_buf_lock);
- sf->ref_count--;
- if (sf->ref_count == 0) {
- TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry);
- nsfbufsused--;
-
- if (sf_buf_alloc_want > 0)
- wakeup(&sf_buf_freelist);
- }
- mtx_unlock(&sf_buf_lock);
-}
-
-/*
* Software interrupt handler for queued VM system processing.
*/
void
diff --git a/sys/sparc64/include/sf_buf.h b/sys/sparc64/include/sf_buf.h
deleted file mode 100644
index ebbbea8..0000000
--- a/sys/sparc64/include/sf_buf.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/*-
- * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _MACHINE_SF_BUF_H_
-#define _MACHINE_SF_BUF_H_
-
-#include <sys/queue.h>
-
-struct vm_page;
-
-struct sf_buf {
- SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */
- struct vm_page *m; /* currently mapped page */
- vm_offset_t kva; /* va of mapping */
-};
-
-struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags);
-void sf_buf_free(struct sf_buf *sf);
-
-static __inline vm_offset_t
-sf_buf_kva(struct sf_buf *sf)
-{
-
- return (sf->kva);
-}
-
-static __inline struct vm_page *
-sf_buf_page(struct sf_buf *sf)
-{
-
- return (sf->m);
-}
-
-#endif /* !_MACHINE_SF_BUF_H_ */
diff --git a/sys/sparc64/include/vmparam.h b/sys/sparc64/include/vmparam.h
index b1c90d2..10976f1 100644
--- a/sys/sparc64/include/vmparam.h
+++ b/sys/sparc64/include/vmparam.h
@@ -239,4 +239,7 @@ extern vm_offset_t vm_max_kernel_address;
*/
#define ZERO_REGION_SIZE PAGE_SIZE
+#define SFBUF
+#define SFBUF_NOMD
+
#endif /* !_MACHINE_VMPARAM_H_ */
diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c
index 8615aa2..96353eb 100644
--- a/sys/sparc64/sparc64/vm_machdep.c
+++ b/sys/sparc64/sparc64/vm_machdep.c
@@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysent.h>
-#include <sys/sf_buf.h>
#include <sys/sched.h>
#include <sys/sysctl.h>
#include <sys/unistd.h>
@@ -84,35 +83,6 @@ __FBSDID("$FreeBSD$");
#include <machine/tlb.h>
#include <machine/tstate.h>
-#ifndef NSFBUFS
-#define NSFBUFS (512 + maxusers * 16)
-#endif
-
-static int nsfbufs;
-static int nsfbufspeak;
-static int nsfbufsused;
-
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
- "Maximum number of sendfile(2) sf_bufs available");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
- "Number of sendfile(2) sf_bufs at peak usage");
-SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
- "Number of sendfile(2) sf_bufs in use");
-
-static void sf_buf_init(void *arg);
-SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL);
-
-/*
- * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
- * sf_freelist head with the sf_lock mutex.
- */
-static struct {
- SLIST_HEAD(, sf_buf) sf_head;
- struct mtx sf_lock;
-} sf_freelist;
-
-static u_int sf_buf_alloc_want;
-
PMAP_STATS_VAR(uma_nsmall_alloc);
PMAP_STATS_VAR(uma_nsmall_alloc_oc);
PMAP_STATS_VAR(uma_nsmall_free);
@@ -417,83 +387,6 @@ is_physical_memory(vm_paddr_t addr)
return (0);
}
-/*
- * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
- */
-static void
-sf_buf_init(void *arg)
-{
- struct sf_buf *sf_bufs;
- vm_offset_t sf_base;
- int i;
-
- nsfbufs = NSFBUFS;
- TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs);
-
- mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
- SLIST_INIT(&sf_freelist.sf_head);
- sf_base = kva_alloc(nsfbufs * PAGE_SIZE);
- sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
- M_NOWAIT | M_ZERO);
- for (i = 0; i < nsfbufs; i++) {
- sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
- SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
- }
- sf_buf_alloc_want = 0;
-}
-
-/*
- * Get an sf_buf from the freelist. Will block if none are available.
- */
-struct sf_buf *
-sf_buf_alloc(struct vm_page *m, int flags)
-{
- struct sf_buf *sf;
- int error;
-
- mtx_lock(&sf_freelist.sf_lock);
- while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
- if (flags & SFB_NOWAIT)
- break;
- sf_buf_alloc_want++;
- SFSTAT_INC(sf_allocwait);
- error = msleep(&sf_freelist, &sf_freelist.sf_lock,
- (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0);
- sf_buf_alloc_want--;
-
- /*
- * If we got a signal, don't risk going back to sleep.
- */
- if (error)
- break;
- }
- if (sf != NULL) {
- SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
- sf->m = m;
- nsfbufsused++;
- nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
- pmap_qenter(sf->kva, &sf->m, 1);
- }
- mtx_unlock(&sf_freelist.sf_lock);
- return (sf);
-}
-
-/*
- * Release resources back to the system.
- */
-void
-sf_buf_free(struct sf_buf *sf)
-{
-
- pmap_qremove(sf->kva, 1);
- mtx_lock(&sf_freelist.sf_lock);
- SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
- nsfbufsused--;
- if (sf_buf_alloc_want > 0)
- wakeup(&sf_freelist);
- mtx_unlock(&sf_freelist.sf_lock);
-}
-
void
swi_vm(void *v)
{
diff --git a/sys/sys/sf_buf.h b/sys/sys/sf_buf.h
index 07871dd..8a6c56f 100644
--- a/sys/sys/sf_buf.h
+++ b/sys/sys/sf_buf.h
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org>
* Copyright (c) 2003-2004 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
@@ -29,6 +30,146 @@
#ifndef _SYS_SF_BUF_H_
#define _SYS_SF_BUF_H_
+struct sfstat { /* sendfile statistics */
+ uint64_t sf_iocnt; /* times sendfile had to do disk I/O */
+ uint64_t sf_allocfail; /* times sfbuf allocation failed */
+ uint64_t sf_allocwait; /* times sfbuf allocation had to wait */
+};
+
+#ifdef _KERNEL
+#include <sys/types.h>
+#include <sys/systm.h>
+#include <sys/counter.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_page.h>
+
+/*
+ * Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped
+ * into kernel address space. Note, that they aren't used only
+ * by sendfile(2)!
+ *
+ * Sf_bufs could be implemented as a feature of vm_page_t, but that
+ * would require growth of the structure. That's why they are implemented
+ * as a separate hash indexed by vm_page address. Implementation lives in
+ * kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map,
+ * so they don't require this hash at all, thus ignore subr_sfbuf.c.
+ *
+ * Different 32-bit architectures demand different requirements on sf_buf
+ * hash and functions. They request features in machine/vmparam.h, which
+ * enable parts of this file. They can also optionally provide helpers in
+ * machine/sf_buf.h
+ *
+ * Defines are:
+ * SFBUF This machine requires sf_buf hash.
+ * subr_sfbuf.c should be compiled.
+ * SFBUF_CPUSET This machine can perform SFB_CPUPRIVATE mappings,
+ * that do no invalidate cache on the rest of CPUs.
+ * SFBUF_NOMD This machine doesn't have machine/sf_buf.h
+ *
+ * SFBUF_OPTIONAL_DIRECT_MAP Value of this define is used as boolean
+ * variable that tells whether machine is
+ * capable of direct map or not at runtime.
+ * SFBUF_MAP This machine provides its own sf_buf_map() and
+ * sf_buf_unmap().
+ * SFBUF_PROCESS_PAGE This machine provides sf_buf_process_page()
+ * function.
+ */
+
+#ifdef SFBUF
+#if defined(SMP) && defined(SFBUF_CPUSET)
+#include <sys/_cpuset.h>
+#endif
+#include <sys/queue.h>
+
+struct sf_buf {
+ LIST_ENTRY(sf_buf) list_entry; /* list of buffers */
+ TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */
+ vm_page_t m; /* currently mapped page */
+ vm_offset_t kva; /* va of mapping */
+ int ref_count; /* usage of this mapping */
+#if defined(SMP) && defined(SFBUF_CPUSET)
+ cpuset_t cpumask; /* where mapping is valid */
+#endif
+};
+#else /* ! SFBUF */
+struct sf_buf;
+#endif /* SFBUF */
+
+#ifndef SFBUF_NOMD
+#include <machine/sf_buf.h>
+#endif
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+#include <machine/md_var.h>
+#endif
+
+#ifdef SFBUF
+struct sf_buf *sf_buf_alloc(struct vm_page *, int);
+void sf_buf_free(struct sf_buf *);
+
+static inline vm_offset_t
+sf_buf_kva(struct sf_buf *sf)
+{
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return (VM_PAGE_TO_PHYS((vm_page_t)sf));
+#endif
+
+ return (sf->kva);
+}
+
+static inline vm_page_t
+sf_buf_page(struct sf_buf *sf)
+{
+#ifdef SFBUF_OPTIONAL_DIRECT_MAP
+ if (SFBUF_OPTIONAL_DIRECT_MAP)
+ return ((vm_page_t)sf);
+#endif
+
+ return (sf->m);
+}
+
+#ifndef SFBUF_MAP
+#include <vm/pmap.h>
+
+static inline void
+sf_buf_map(struct sf_buf *sf, int flags)
+{
+
+ pmap_qenter(sf->kva, &sf->m, 1);
+}
+
+static inline int
+sf_buf_unmap(struct sf_buf *sf)
+{
+
+ return (0);
+}
+#endif /* SFBUF_MAP */
+
+#if defined(SMP) && defined(SFBUF_CPUSET)
+void sf_buf_shootdown(struct sf_buf *, int);
+#endif
+
+#ifdef SFBUF_PROCESS_PAGE
+boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *));
+#endif
+
+#else /* ! SFBUF */
+
+static inline struct sf_buf *
+sf_buf_alloc(struct vm_page *m, int pri)
+{
+
+ return ((struct sf_buf *)m);
+}
+
+static inline void
+sf_buf_free(struct sf_buf *sf)
+{
+}
+#endif /* SFBUF */
+
/*
* Options to sf_buf_alloc() are specified through its flags argument. This
* argument's value should be the result of a bitwise or'ing of one or more
@@ -40,19 +181,6 @@
#define SFB_DEFAULT 0
#define SFB_NOWAIT 4 /* Return NULL if all bufs are used. */
-struct vm_page;
-
-struct sfstat { /* sendfile statistics */
- uint64_t sf_iocnt; /* times sendfile had to do disk I/O */
- uint64_t sf_allocfail; /* times sfbuf allocation failed */
- uint64_t sf_allocwait; /* times sfbuf allocation had to wait */
-};
-
-#ifdef _KERNEL
-#include <machine/sf_buf.h>
-#include <sys/systm.h>
-#include <sys/counter.h>
-
extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
#define SFSTAT_ADD(name, val) \
counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\
OpenPOWER on IntegriCloud