diff options
-rw-r--r-- | sys/amd64/include/sf_buf.h | 23 | ||||
-rw-r--r-- | sys/arm/arm/vm_machdep.c | 137 | ||||
-rw-r--r-- | sys/arm/include/sf_buf.h | 29 | ||||
-rw-r--r-- | sys/arm/include/vmparam.h | 3 | ||||
-rw-r--r-- | sys/conf/files.arm | 1 | ||||
-rw-r--r-- | sys/conf/files.i386 | 1 | ||||
-rw-r--r-- | sys/conf/files.mips | 1 | ||||
-rw-r--r-- | sys/conf/files.pc98 | 1 | ||||
-rw-r--r-- | sys/conf/files.powerpc | 1 | ||||
-rw-r--r-- | sys/conf/files.sparc64 | 1 | ||||
-rw-r--r-- | sys/i386/i386/vm_machdep.c | 233 | ||||
-rw-r--r-- | sys/i386/include/sf_buf.h | 39 | ||||
-rw-r--r-- | sys/i386/include/vmparam.h | 5 | ||||
-rw-r--r-- | sys/kern/subr_sfbuf.c | 226 | ||||
-rw-r--r-- | sys/mips/include/sf_buf.h | 54 | ||||
-rw-r--r-- | sys/mips/include/vmparam.h | 4 | ||||
-rw-r--r-- | sys/mips/mips/vm_machdep.c | 113 | ||||
-rw-r--r-- | sys/powerpc/include/sf_buf.h | 80 | ||||
-rw-r--r-- | sys/powerpc/include/vmparam.h | 14 | ||||
-rw-r--r-- | sys/powerpc/powerpc/vm_machdep.c | 160 | ||||
-rw-r--r-- | sys/sparc64/include/sf_buf.h | 59 | ||||
-rw-r--r-- | sys/sparc64/include/vmparam.h | 3 | ||||
-rw-r--r-- | sys/sparc64/sparc64/vm_machdep.c | 107 | ||||
-rw-r--r-- | sys/sys/sf_buf.h | 154 |
24 files changed, 475 insertions, 974 deletions
diff --git a/sys/amd64/include/sf_buf.h b/sys/amd64/include/sf_buf.h index 729e8e5..00b3085 100644 --- a/sys/amd64/include/sf_buf.h +++ b/sys/amd64/include/sf_buf.h @@ -29,42 +29,23 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_page.h> - /* * On this machine, the only purpose for which sf_buf is used is to implement * an opaque pointer required by the machine-independent parts of the kernel. * That pointer references the vm_page that is "mapped" by the sf_buf. The * actual mapping is provided by the direct virtual-to-physical mapping. */ -struct sf_buf; - -static inline struct sf_buf * -sf_buf_alloc(struct vm_page *m, int pri) -{ - - return ((struct sf_buf *)m); -} - -static inline void -sf_buf_free(struct sf_buf *sf) -{ -} - -static __inline vm_offset_t +static inline vm_offset_t sf_buf_kva(struct sf_buf *sf) { return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS((vm_page_t)sf))); } -static __inline vm_page_t +static inline vm_page_t sf_buf_page(struct sf_buf *sf) { return ((vm_page_t)sf); } - #endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/arm/arm/vm_machdep.c b/sys/arm/arm/vm_machdep.c index df764e8..4a9cf3f 100644 --- a/sys/arm/arm/vm_machdep.c +++ b/sys/arm/arm/vm_machdep.c @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); #include <sys/mbuf.h> #include <sys/proc.h> #include <sys/socketvar.h> -#include <sys/sf_buf.h> #include <sys/syscall.h> #include <sys/sysctl.h> #include <sys/sysent.h> @@ -83,42 +82,6 @@ __FBSDID("$FreeBSD$"); CTASSERT(sizeof(struct switchframe) == 24); CTASSERT(sizeof(struct trapframe) == 80); -#ifndef NSFBUFS -#define NSFBUFS (512 + maxusers * 16) -#endif - -static int nsfbufs; -static int nsfbufspeak; -static int nsfbufsused; - -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, - "Maximum number of sendfile(2) sf_bufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, - "Number of sendfile(2) sf_bufs at peak usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, - "Number of sendfile(2) sf_bufs in use"); - -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); - -LIST_HEAD(sf_head, sf_buf); - -/* - * A hash table of active sendfile(2) buffers - */ -static struct sf_head *sf_buf_active; -static u_long sf_buf_hashmask; - -#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) - -static TAILQ_HEAD(, sf_buf) sf_buf_freelist; -static u_int sf_buf_alloc_want; - -/* - * A lock used to synchronize access to the hash table and free list - */ -static struct mtx sf_buf_lock; - /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -184,106 +147,6 @@ cpu_thread_swapout(struct thread *td) { } -/* - * Detatch mapped page and release resources back to the system. - */ -void -sf_buf_free(struct sf_buf *sf) -{ - - mtx_lock(&sf_buf_lock); - sf->ref_count--; - if (sf->ref_count == 0) { - TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); - nsfbufsused--; - pmap_kremove(sf->kva); - sf->m = NULL; - LIST_REMOVE(sf, list_entry); - if (sf_buf_alloc_want > 0) - wakeup(&sf_buf_freelist); - } - mtx_unlock(&sf_buf_lock); -} - -/* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - - sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); - TAILQ_INIT(&sf_buf_freelist); - sf_base = kva_alloc(nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); - } - sf_buf_alloc_want = 0; - mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); -} - -/* - * Get an sf_buf from the freelist. Will block if none are available. - */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m, int flags) -{ - struct sf_head *hash_list; - struct sf_buf *sf; - int error; - - hash_list = &sf_buf_active[SF_BUF_HASH(m)]; - mtx_lock(&sf_buf_lock); - LIST_FOREACH(sf, hash_list, list_entry) { - if (sf->m == m) { - sf->ref_count++; - if (sf->ref_count == 1) { - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - } - goto done; - } - } - while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { - if (flags & SFB_NOWAIT) - goto done; - sf_buf_alloc_want++; - SFSTAT_INC(sf_allocwait); - error = msleep(&sf_buf_freelist, &sf_buf_lock, - (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); - sf_buf_alloc_want--; - - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - goto done; - } - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - if (sf->m != NULL) - LIST_REMOVE(sf, list_entry); - LIST_INSERT_HEAD(hash_list, sf, list_entry); - sf->ref_count = 1; - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m)); -done: - mtx_unlock(&sf_buf_lock); - return (sf); -} - void cpu_set_syscall_retval(struct thread *td, int error) { diff --git a/sys/arm/include/sf_buf.h b/sys/arm/include/sf_buf.h index 2ec07de..b761cc7 100644 --- a/sys/arm/include/sf_buf.h +++ b/sys/arm/include/sf_buf.h @@ -29,33 +29,18 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ -#include <sys/queue.h> - -struct vm_page; - -struct sf_buf { - LIST_ENTRY(sf_buf) list_entry; /* list of buffers */ - TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */ - struct vm_page *m; /* currently mapped page */ - vm_offset_t kva; /* va of mapping */ - int ref_count; /* usage of this mapping */ -}; - -static __inline vm_offset_t -sf_buf_kva(struct sf_buf *sf) +static inline void +sf_buf_map(struct sf_buf *sf, int flags) { - return (sf->kva); + pmap_kenter(sf->kva, VM_PAGE_TO_PHYS(sf->m)); } -static __inline struct vm_page * -sf_buf_page(struct sf_buf *sf) +static inline int +sf_buf_unmap(struct sf_buf *sf) { - return (sf->m); + pmap_kremove(sf->kva); + return (1); } - -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags); -void sf_buf_free(struct sf_buf *sf); - #endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h index 1c6085e..9bfd358 100644 --- a/sys/arm/include/vmparam.h +++ b/sys/arm/include/vmparam.h @@ -170,4 +170,7 @@ extern vm_offset_t vm_max_kernel_address; #define VM_MAX_AUTOTUNE_MAXUSERS 384 #endif +#define SFBUF +#define SFBUF_MAP + #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/conf/files.arm b/sys/conf/files.arm index 7243c17..1f7c71d 100644 --- a/sys/conf/files.arm +++ b/sys/conf/files.arm @@ -77,6 +77,7 @@ font.h optional sc \ clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8" kern/subr_busdma_bufalloc.c standard kern/subr_dummy_vdso_tc.c standard +kern/subr_sfbuf.c standard libkern/arm/aeabi_unwind.c standard libkern/arm/divsi3.S standard libkern/arm/ffs.S standard diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index 58ae57b..ebc81fc 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -520,6 +520,7 @@ isa/vga_isa.c optional vga kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip +kern/subr_sfbuf.c standard libkern/divdi3.c standard libkern/flsll.c standard libkern/memmove.c standard diff --git a/sys/conf/files.mips b/sys/conf/files.mips index 2df6844..3677de4 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -51,6 +51,7 @@ mips/mips/vm_machdep.c standard kern/kern_clocksource.c standard kern/link_elf_obj.c standard kern/subr_dummy_vdso_tc.c standard +kern/subr_sfbuf.c optional mips | mipsel | mipsn32 # gcc/clang runtime libkern/ffsl.c standard diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index fff7b3c..3142223 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -205,6 +205,7 @@ i386/svr4/svr4_machdep.c optional compat_svr4 kern/kern_clocksource.c standard kern/imgact_aout.c optional compat_aout kern/imgact_gzip.c optional gzip +kern/subr_sfbuf.c standard libkern/divdi3.c standard libkern/flsll.c standard libkern/memmove.c standard diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index 8aed60a..10515a5 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -71,6 +71,7 @@ dev/vt/hw/ofwfb/ofwfb.c optional vt aim kern/kern_clocksource.c standard kern/subr_dummy_vdso_tc.c standard kern/syscalls.c optional ktr +kern/subr_sfbuf.c standard libkern/ashldi3.c optional powerpc libkern/ashrdi3.c optional powerpc libkern/bcmp.c standard diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64 index ccee247..53b0920 100644 --- a/sys/conf/files.sparc64 +++ b/sys/conf/files.sparc64 @@ -64,6 +64,7 @@ dev/vt/hw/ofwfb/ofwfb.c optional vt kern/kern_clocksource.c standard kern/subr_dummy_vdso_tc.c standard kern/syscalls.c optional ktr +kern/subr_sfbuf.c standard libkern/ffs.c standard libkern/ffsl.c standard libkern/fls.c standard diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 78cc9c9..9c87548 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -118,38 +118,6 @@ static u_int cpu_reset_proxyid; static volatile u_int cpu_reset_proxy_active; #endif -static int nsfbufs; -static int nsfbufspeak; -static int nsfbufsused; - -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, - "Maximum number of sendfile(2) sf_bufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, - "Number of sendfile(2) sf_bufs at peak usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, - "Number of sendfile(2) sf_bufs in use"); - -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); - -LIST_HEAD(sf_head, sf_buf); - -/* - * A hash table of active sendfile(2) buffers - */ -static struct sf_head *sf_buf_active; -static u_long sf_buf_hashmask; - -#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) - -static TAILQ_HEAD(, sf_buf) sf_buf_freelist; -static u_int sf_buf_alloc_want; - -/* - * A lock used to synchronize access to the hash table and free list - */ -static struct mtx sf_buf_lock; - extern int _ucodesel, _udatasel; /* @@ -750,121 +718,12 @@ cpu_reset_real() } /* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - - sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); - TAILQ_INIT(&sf_buf_freelist); - sf_base = kva_alloc(nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); - } - sf_buf_alloc_want = 0; - mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); -} - -/* - * Invalidate the cache lines that may belong to the page, if - * (possibly old) mapping of the page by sf buffer exists. Returns - * TRUE when mapping was found and cache invalidated. - */ -boolean_t -sf_buf_invalidate_cache(vm_page_t m) -{ - struct sf_head *hash_list; - struct sf_buf *sf; - boolean_t ret; - - hash_list = &sf_buf_active[SF_BUF_HASH(m)]; - ret = FALSE; - mtx_lock(&sf_buf_lock); - LIST_FOREACH(sf, hash_list, list_entry) { - if (sf->m == m) { - /* - * Use pmap_qenter to update the pte for - * existing mapping, in particular, the PAT - * settings are recalculated. - */ - pmap_qenter(sf->kva, &m, 1); - pmap_invalidate_cache_range(sf->kva, sf->kva + - PAGE_SIZE); - ret = TRUE; - break; - } - } - mtx_unlock(&sf_buf_lock); - return (ret); -} - -/* * Get an sf_buf from the freelist. May block if none are available. */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m, int flags) +void +sf_buf_map(struct sf_buf *sf, int flags) { pt_entry_t opte, *ptep; - struct sf_head *hash_list; - struct sf_buf *sf; -#ifdef SMP - cpuset_t other_cpus; - u_int cpuid; -#endif - int error; - - KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0, - ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned")); - hash_list = &sf_buf_active[SF_BUF_HASH(m)]; - mtx_lock(&sf_buf_lock); - LIST_FOREACH(sf, hash_list, list_entry) { - if (sf->m == m) { - sf->ref_count++; - if (sf->ref_count == 1) { - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - } -#ifdef SMP - goto shootdown; -#else - goto done; -#endif - } - } - while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { - if (flags & SFB_NOWAIT) - goto done; - sf_buf_alloc_want++; - SFSTAT_INC(sf_allocwait); - error = msleep(&sf_buf_freelist, &sf_buf_lock, - (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); - sf_buf_alloc_want--; - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - goto done; - } - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - if (sf->m != NULL) - LIST_REMOVE(sf, list_entry); - LIST_INSERT_HEAD(hash_list, sf, list_entry); - sf->ref_count = 1; - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); /* * Update the sf_buf's virtual-to-physical mapping, flushing the @@ -876,11 +735,11 @@ sf_buf_alloc(struct vm_page *m, int flags) ptep = vtopte(sf->kva); opte = *ptep; #ifdef XEN - PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(m)) | pgeflag - | PG_RW | PG_V | pmap_cache_bits(m->md.pat_mode, 0)); + PT_SET_MA(sf->kva, xpmap_ptom(VM_PAGE_TO_PHYS(sf->m)) | pgeflag + | PG_RW | PG_V | pmap_cache_bits(sf->m->md.pat_mode, 0)); #else - *ptep = VM_PAGE_TO_PHYS(m) | pgeflag | PG_RW | PG_V | - pmap_cache_bits(m->md.pat_mode, 0); + *ptep = VM_PAGE_TO_PHYS(sf->m) | pgeflag | PG_RW | PG_V | + pmap_cache_bits(sf->m->md.pat_mode, 0); #endif /* @@ -892,7 +751,21 @@ sf_buf_alloc(struct vm_page *m, int flags) #ifdef SMP if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) CPU_ZERO(&sf->cpumask); -shootdown: + + sf_buf_shootdown(sf, flags); +#else + if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) + pmap_invalidate_page(kernel_pmap, sf->kva); +#endif +} + +#ifdef SMP +void +sf_buf_shootdown(struct sf_buf *sf, int flags) +{ + cpuset_t other_cpus; + u_int cpuid; + sched_pin(); cpuid = PCPU_GET(cpuid); if (!CPU_ISSET(cpuid, &sf->cpumask)) { @@ -909,42 +782,50 @@ shootdown: } } sched_unpin(); -#else - if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) - pmap_invalidate_page(kernel_pmap, sf->kva); -#endif -done: - mtx_unlock(&sf_buf_lock); - return (sf); } +#endif /* - * Remove a reference from the given sf_buf, adding it to the free - * list when its reference count reaches zero. A freed sf_buf still, - * however, retains its virtual-to-physical mapping until it is - * recycled or reactivated by sf_buf_alloc(9). + * MD part of sf_buf_free(). */ -void -sf_buf_free(struct sf_buf *sf) +int +sf_buf_unmap(struct sf_buf *sf) { - - mtx_lock(&sf_buf_lock); - sf->ref_count--; - if (sf->ref_count == 0) { - TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); - nsfbufsused--; #ifdef XEN + /* + * Xen doesn't like having dangling R/W mappings + */ + pmap_qremove(sf->kva, 1); + return (1); +#else + return (0); +#endif +} + +static void +sf_buf_invalidate(struct sf_buf *sf) +{ + vm_page_t m = sf->m; + + /* + * Use pmap_qenter to update the pte for + * existing mapping, in particular, the PAT + * settings are recalculated. + */ + pmap_qenter(sf->kva, &m, 1); + pmap_invalidate_cache_range(sf->kva, sf->kva + PAGE_SIZE); +} + /* - * Xen doesn't like having dangling R/W mappings + * Invalidate the cache lines that may belong to the page, if + * (possibly old) mapping of the page by sf buffer exists. Returns + * TRUE when mapping was found and cache invalidated. */ - pmap_qremove(sf->kva, 1); - sf->m = NULL; - LIST_REMOVE(sf, list_entry); -#endif - if (sf_buf_alloc_want > 0) - wakeup(&sf_buf_freelist); - } - mtx_unlock(&sf_buf_lock); +boolean_t +sf_buf_invalidate_cache(vm_page_t m) +{ + + return (sf_buf_process_page(m, sf_buf_invalidate)); } /* diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h index 20296b3..f1f1cd3 100644 --- a/sys/i386/include/sf_buf.h +++ b/sys/i386/include/sf_buf.h @@ -1,5 +1,5 @@ /*- - * Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu> + * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -29,39 +29,8 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ -#include <sys/_cpuset.h> -#include <sys/queue.h> - -struct vm_page; - -struct sf_buf { - LIST_ENTRY(sf_buf) list_entry; /* list of buffers */ - TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */ - struct vm_page *m; /* currently mapped page */ - vm_offset_t kva; /* va of mapping */ - int ref_count; /* usage of this mapping */ -#ifdef SMP - cpuset_t cpumask; /* cpus on which mapping is valid */ -#endif -}; - -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags); -void sf_buf_free(struct sf_buf *sf); - -static __inline vm_offset_t -sf_buf_kva(struct sf_buf *sf) -{ - - return (sf->kva); -} - -static __inline struct vm_page * -sf_buf_page(struct sf_buf *sf) -{ - - return (sf->m); -} - -boolean_t sf_buf_invalidate_cache(vm_page_t m); +void sf_buf_map(struct sf_buf *, int); +int sf_buf_unmap(struct sf_buf *); +boolean_t sf_buf_invalidate_cache(vm_page_t); #endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h index c086d76..975b302 100644 --- a/sys/i386/include/vmparam.h +++ b/sys/i386/include/vmparam.h @@ -198,4 +198,9 @@ #define VM_MAX_AUTOTUNE_MAXUSERS 384 #endif +#define SFBUF +#define SFBUF_MAP +#define SFBUF_CPUSET +#define SFBUF_PROCESS_PAGE + #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/kern/subr_sfbuf.c b/sys/kern/subr_sfbuf.c new file mode 100644 index 0000000..e4309d1 --- /dev/null +++ b/sys/kern/subr_sfbuf.c @@ -0,0 +1,226 @@ +/*- + * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org> + * Copyright (c) 2003, 2005 Alan L. Cox <alc@cs.rice.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/lock.h> +#include <sys/malloc.h> +#include <sys/mutex.h> +#include <sys/sf_buf.h> +#include <sys/smp.h> +#include <sys/sysctl.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/vm_page.h> + +#ifndef NSFBUFS +#define NSFBUFS (512 + maxusers * 16) +#endif + +static int nsfbufs; +static int nsfbufspeak; +static int nsfbufsused; + +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, + "Maximum number of sendfile(2) sf_bufs available"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, + "Number of sendfile(2) sf_bufs at peak usage"); +SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, + "Number of sendfile(2) sf_bufs in use"); + +static void sf_buf_init(void *arg); +SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); + +LIST_HEAD(sf_head, sf_buf); + +/* + * A hash table of active sendfile(2) buffers + */ +static struct sf_head *sf_buf_active; +static u_long sf_buf_hashmask; + +#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) + +static TAILQ_HEAD(, sf_buf) sf_buf_freelist; +static u_int sf_buf_alloc_want; + +/* + * A lock used to synchronize access to the hash table and free list + */ +static struct mtx sf_buf_lock; + +/* + * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) + */ +static void +sf_buf_init(void *arg) +{ + struct sf_buf *sf_bufs; + vm_offset_t sf_base; + int i; + +#ifdef SFBUF_OPTIONAL_DIRECT_MAP + if (SFBUF_OPTIONAL_DIRECT_MAP) + return; +#endif + + nsfbufs = NSFBUFS; + TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); + + sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); + TAILQ_INIT(&sf_buf_freelist); + sf_base = kva_alloc(nsfbufs * PAGE_SIZE); + sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, + M_NOWAIT | M_ZERO); + KASSERT(sf_bufs, ("%s: malloc failure", __func__)); + for (i = 0; i < nsfbufs; i++) { + sf_bufs[i].kva = sf_base + i * PAGE_SIZE; + TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); + } + sf_buf_alloc_want = 0; + mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); +} + +/* + * Get an sf_buf from the freelist. May block if none are available. + */ +struct sf_buf * +sf_buf_alloc(struct vm_page *m, int flags) +{ + struct sf_head *hash_list; + struct sf_buf *sf; + int error; + +#ifdef SFBUF_OPTIONAL_DIRECT_MAP + if (SFBUF_OPTIONAL_DIRECT_MAP) + return ((struct sf_buf *)m); +#endif + + KASSERT(curthread->td_pinned > 0 || (flags & SFB_CPUPRIVATE) == 0, + ("sf_buf_alloc(SFB_CPUPRIVATE): curthread not pinned")); + hash_list = &sf_buf_active[SF_BUF_HASH(m)]; + mtx_lock(&sf_buf_lock); + LIST_FOREACH(sf, hash_list, list_entry) { + if (sf->m == m) { + sf->ref_count++; + if (sf->ref_count == 1) { + TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); + nsfbufsused++; + nsfbufspeak = imax(nsfbufspeak, nsfbufsused); + } +#if defined(SMP) && defined(SFBUF_CPUSET) + sf_buf_shootdown(sf, flags); +#endif + goto done; + } + } + while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { + if (flags & SFB_NOWAIT) + goto done; + sf_buf_alloc_want++; + SFSTAT_INC(sf_allocwait); + error = msleep(&sf_buf_freelist, &sf_buf_lock, + (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); + sf_buf_alloc_want--; + + /* + * If we got a signal, don't risk going back to sleep. + */ + if (error) + goto done; + } + TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); + if (sf->m != NULL) + LIST_REMOVE(sf, list_entry); + LIST_INSERT_HEAD(hash_list, sf, list_entry); + sf->ref_count = 1; + sf->m = m; + nsfbufsused++; + nsfbufspeak = imax(nsfbufspeak, nsfbufsused); + sf_buf_map(sf, flags); +done: + mtx_unlock(&sf_buf_lock); + return (sf); +} + +/* + * Remove a reference from the given sf_buf, adding it to the free + * list when its reference count reaches zero. A freed sf_buf still, + * however, retains its virtual-to-physical mapping until it is + * recycled or reactivated by sf_buf_alloc(9). + */ +void +sf_buf_free(struct sf_buf *sf) +{ + +#ifdef SFBUF_OPTIONAL_DIRECT_MAP + if (SFBUF_OPTIONAL_DIRECT_MAP) + return; +#endif + + mtx_lock(&sf_buf_lock); + sf->ref_count--; + if (sf->ref_count == 0) { + TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); + nsfbufsused--; + if (sf_buf_unmap(sf)) { + sf->m = NULL; + LIST_REMOVE(sf, list_entry); + } + if (sf_buf_alloc_want > 0) + wakeup(&sf_buf_freelist); + } + mtx_unlock(&sf_buf_lock); +} + +#ifdef SFBUF_PROCESS_PAGE +/* + * Run callback function on sf_buf that holds a certain page. + */ +boolean_t +sf_buf_process_page(vm_page_t m, void (*cb)(struct sf_buf *)) +{ + struct sf_head *hash_list; + struct sf_buf *sf; + + hash_list = &sf_buf_active[SF_BUF_HASH(m)]; + mtx_lock(&sf_buf_lock); + LIST_FOREACH(sf, hash_list, list_entry) { + if (sf->m == m) { + cb(sf); + mtx_unlock(&sf_buf_lock); + return (TRUE); + } + } + mtx_unlock(&sf_buf_lock); + return (FALSE); +} +#endif /* SFBUF_PROCESS_PAGE */ diff --git a/sys/mips/include/sf_buf.h b/sys/mips/include/sf_buf.h index e5d981f..76fb993 100644 --- a/sys/mips/include/sf_buf.h +++ b/sys/mips/include/sf_buf.h @@ -29,31 +29,9 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ -#ifdef __mips_n64 -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_page.h> -#else -#include <sys/queue.h> -#endif +#ifdef __mips_n64 /* In 64 bit the whole memory is directly mapped */ -#ifdef __mips_n64 -/* In 64 bit the whole memory is directly mapped */ -struct sf_buf; - -static inline struct sf_buf * -sf_buf_alloc(struct vm_page *m, int pri) -{ - - return ((struct sf_buf *)m); -} - -static inline void -sf_buf_free(struct sf_buf *sf) -{ -} - -static __inline vm_offset_t +static inline vm_offset_t sf_buf_kva(struct sf_buf *sf) { vm_page_t m; @@ -62,38 +40,12 @@ sf_buf_kva(struct sf_buf *sf) return (MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(m))); } -static __inline struct vm_page * +static inline struct vm_page * sf_buf_page(struct sf_buf *sf) { return ((vm_page_t)sf); } -#else /* ! __mips_n64 */ -struct vm_page; - -struct sf_buf { - SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */ - struct vm_page *m; /* currently mapped page */ - vm_offset_t kva; /* va of mapping */ -}; - -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags); -void sf_buf_free(struct sf_buf *sf); - -static __inline vm_offset_t -sf_buf_kva(struct sf_buf *sf) -{ - - return (sf->kva); -} - -static __inline struct vm_page * -sf_buf_page(struct sf_buf *sf) -{ - - return (sf->m); -} #endif /* __mips_n64 */ - #endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/mips/include/vmparam.h b/sys/mips/include/vmparam.h index 8922924..d3b833e 100644 --- a/sys/mips/include/vmparam.h +++ b/sys/mips/include/vmparam.h @@ -187,4 +187,8 @@ #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ +#ifndef __mips_n64 +#define SFBUF +#endif + #endif /* !_MACHINE_VMPARAM_H_ */ diff --git a/sys/mips/mips/vm_machdep.c b/sys/mips/mips/vm_machdep.c index dd294e9..26cdbff 100644 --- a/sys/mips/mips/vm_machdep.c +++ b/sys/mips/mips/vm_machdep.c @@ -76,9 +76,6 @@ __FBSDID("$FreeBSD$"); #include <sys/user.h> #include <sys/mbuf.h> -#ifndef __mips_n64 -#include <sys/sf_buf.h> -#endif /* Duplicated from asm.h */ #if defined(__mips_o32) @@ -92,38 +89,6 @@ __FBSDID("$FreeBSD$"); #define CALLFRAME_SIZ (SZREG * 4) #endif -#ifndef __mips_n64 - -#ifndef NSFBUFS -#define NSFBUFS (512 + maxusers * 16) -#endif - -static int nsfbufs; -static int nsfbufspeak; -static int nsfbufsused; - -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, - "Maximum number of sendfile(2) sf_bufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, - "Number of sendfile(2) sf_bufs at peak usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, - "Number of sendfile(2) sf_bufs in use"); - -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); - -/* - * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the - * sf_freelist head with the sf_lock mutex. - */ -static struct { - SLIST_HEAD(, sf_buf) sf_head; - struct mtx sf_lock; -} sf_freelist; - -static u_int sf_buf_alloc_want; -#endif /* !__mips_n64 */ - /* * Finish a fork operation, with process p2 nearly set up. * Copy and update the pcb, set up the stack so that the child @@ -513,84 +478,6 @@ cpu_set_upcall_kse(struct thread *td, void (*entry)(void *), void *arg, #define ZIDLE_HI(v) ((v) * 4 / 5) /* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -#ifndef __mips_n64 -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - - mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); - SLIST_INIT(&sf_freelist.sf_head); - sf_base = kva_alloc(nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); - } - sf_buf_alloc_want = 0; -} - -/* - * Get an sf_buf from the freelist. Will block if none are available. - */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m, int flags) -{ - struct sf_buf *sf; - int error; - - mtx_lock(&sf_freelist.sf_lock); - while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { - if (flags & SFB_NOWAIT) - break; - sf_buf_alloc_want++; - SFSTAT_INC(sf_allocwait); - error = msleep(&sf_freelist, &sf_freelist.sf_lock, - (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); - sf_buf_alloc_want--; - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - break; - } - if (sf != NULL) { - SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - pmap_qenter(sf->kva, &sf->m, 1); - } - mtx_unlock(&sf_freelist.sf_lock); - return (sf); -} - -/* - * Release resources back to the system. - */ -void -sf_buf_free(struct sf_buf *sf) -{ - pmap_qremove(sf->kva, 1); - mtx_lock(&sf_freelist.sf_lock); - SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); - nsfbufsused--; - if (sf_buf_alloc_want > 0) - wakeup(&sf_freelist); - mtx_unlock(&sf_freelist.sf_lock); -} -#endif /* !__mips_n64 */ - -/* * Software interrupt handler for queued VM system processing. */ void diff --git a/sys/powerpc/include/sf_buf.h b/sys/powerpc/include/sf_buf.h deleted file mode 100644 index f8a5936..0000000 --- a/sys/powerpc/include/sf_buf.h +++ /dev/null @@ -1,80 +0,0 @@ -/*- - * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _MACHINE_SF_BUF_H_ -#define _MACHINE_SF_BUF_H_ - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_page.h> -#include <machine/md_var.h> -#include <sys/queue.h> - -struct vm_page; - -struct sf_buf { - LIST_ENTRY(sf_buf) list_entry; /* list of buffers */ - TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */ - struct vm_page *m; /* currently mapped page */ - vm_offset_t kva; /* va of mapping */ - int ref_count; /* usage of this mapping */ -}; - -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags); -void sf_buf_free(struct sf_buf *sf); - -/* - * On 32-bit OEA, the only purpose for which sf_buf is used is to implement - * an opaque pointer required by the machine-independent parts of the kernel. - * That pointer references the vm_page that is "mapped" by the sf_buf. The - * actual mapping is provided by the direct virtual-to-physical mapping. - * - * On OEA64 and Book-E, we need to do something a little more complicated. Use - * the runtime-detected hw_direct_map to pick between the two cases. Our - * friends in vm_machdep.c will do the same to ensure nothing gets confused. - */ - -static __inline vm_offset_t -sf_buf_kva(struct sf_buf *sf) -{ - if (hw_direct_map) - return (VM_PAGE_TO_PHYS((vm_page_t)sf)); - - return (sf->kva); -} - -static __inline struct vm_page * -sf_buf_page(struct sf_buf *sf) -{ - if (hw_direct_map) - return ((vm_page_t)sf); - - return (sf->m); -} - -#endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h index 2d50f94..c46f395 100644 --- a/sys/powerpc/include/vmparam.h +++ b/sys/powerpc/include/vmparam.h @@ -197,4 +197,18 @@ struct pmap_physseg { #define ZERO_REGION_SIZE (64 * 1024) /* 64KB */ +/* + * On 32-bit OEA, the only purpose for which sf_buf is used is to implement + * an opaque pointer required by the machine-independent parts of the kernel. + * That pointer references the vm_page that is "mapped" by the sf_buf. The + * actual mapping is provided by the direct virtual-to-physical mapping. + * + * On OEA64 and Book-E, we need to do something a little more complicated. Use + * the runtime-detected hw_direct_map to pick between the two cases. Our + * friends in vm_machdep.c will do the same to ensure nothing gets confused. + */ +#define SFBUF +#define SFBUF_NOMD +#define SFBUF_OPTIONAL_DIRECT_MAP hw_direct_map + #endif /* _MACHINE_VMPARAM_H_ */ diff --git a/sys/powerpc/powerpc/vm_machdep.c b/sys/powerpc/powerpc/vm_machdep.c index d9d3987..d684094 100644 --- a/sys/powerpc/powerpc/vm_machdep.c +++ b/sys/powerpc/powerpc/vm_machdep.c @@ -80,7 +80,6 @@ #include <sys/vmmeter.h> #include <sys/kernel.h> #include <sys/mbuf.h> -#include <sys/sf_buf.h> #include <sys/sysctl.h> #include <sys/sysent.h> #include <sys/unistd.h> @@ -100,47 +99,6 @@ #include <vm/vm_map.h> #include <vm/vm_extern.h> -/* - * On systems without a direct mapped region (e.g. PPC64), - * we use the same code as the Book E implementation. Since - * we need to have runtime detection of this, define some machinery - * for sf_bufs in this case, and ignore it on systems with direct maps. - */ - -#ifndef NSFBUFS -#define NSFBUFS (512 + maxusers * 16) -#endif - -static int nsfbufs; -static int nsfbufspeak; -static int nsfbufsused; - -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, - "Maximum number of sendfile(2) sf_bufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, - "Number of sendfile(2) sf_bufs at peak usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, - "Number of sendfile(2) sf_bufs in use"); - -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); - -LIST_HEAD(sf_head, sf_buf); - -/* A hash table of active sendfile(2) buffers */ -static struct sf_head *sf_buf_active; -static u_long sf_buf_hashmask; - -#define SF_BUF_HASH(m) (((m) - vm_page_array) & sf_buf_hashmask) - -static TAILQ_HEAD(, sf_buf) sf_buf_freelist; -static u_int sf_buf_alloc_want; - -/* - * A lock used to synchronize access to the hash table and free list - */ -static struct mtx sf_buf_lock; - #ifdef __powerpc64__ extern uintptr_t tocbase; #endif @@ -245,124 +203,6 @@ cpu_exit(struct thread *td) } /* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - /* Don't bother on systems with a direct map */ - if (hw_direct_map) - return; - - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - - sf_buf_active = hashinit(nsfbufs, M_TEMP, &sf_buf_hashmask); - TAILQ_INIT(&sf_buf_freelist); - sf_base = kva_alloc(nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - TAILQ_INSERT_TAIL(&sf_buf_freelist, &sf_bufs[i], free_entry); - } - sf_buf_alloc_want = 0; - mtx_init(&sf_buf_lock, "sf_buf", NULL, MTX_DEF); -} - -/* - * Get an sf_buf from the freelist. Will block if none are available. - */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m, int flags) -{ - struct sf_head *hash_list; - struct sf_buf *sf; - int error; - - if (hw_direct_map) { - /* Shortcut the direct mapped case */ - return ((struct sf_buf *)m); - } - - hash_list = &sf_buf_active[SF_BUF_HASH(m)]; - mtx_lock(&sf_buf_lock); - LIST_FOREACH(sf, hash_list, list_entry) { - if (sf->m == m) { - sf->ref_count++; - if (sf->ref_count == 1) { - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - } - goto done; - } - } - - while ((sf = TAILQ_FIRST(&sf_buf_freelist)) == NULL) { - if (flags & SFB_NOWAIT) - goto done; - - sf_buf_alloc_want++; - SFSTAT_INC(sf_allocwait); - error = msleep(&sf_buf_freelist, &sf_buf_lock, - (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); - sf_buf_alloc_want--; - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - goto done; - } - - TAILQ_REMOVE(&sf_buf_freelist, sf, free_entry); - if (sf->m != NULL) - LIST_REMOVE(sf, list_entry); - - LIST_INSERT_HEAD(hash_list, sf, list_entry); - sf->ref_count = 1; - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - pmap_qenter(sf->kva, &sf->m, 1); -done: - mtx_unlock(&sf_buf_lock); - return (sf); -} - -/* - * Detach mapped page and release resources back to the system. - * - * Remove a reference from the given sf_buf, adding it to the free - * list when its reference count reaches zero. A freed sf_buf still, - * however, retains its virtual-to-physical mapping until it is - * recycled or reactivated by sf_buf_alloc(9). - */ -void -sf_buf_free(struct sf_buf *sf) -{ - if (hw_direct_map) - return; - - mtx_lock(&sf_buf_lock); - sf->ref_count--; - if (sf->ref_count == 0) { - TAILQ_INSERT_TAIL(&sf_buf_freelist, sf, free_entry); - nsfbufsused--; - - if (sf_buf_alloc_want > 0) - wakeup(&sf_buf_freelist); - } - mtx_unlock(&sf_buf_lock); -} - -/* * Software interrupt handler for queued VM system processing. */ void diff --git a/sys/sparc64/include/sf_buf.h b/sys/sparc64/include/sf_buf.h deleted file mode 100644 index ebbbea8..0000000 --- a/sys/sparc64/include/sf_buf.h +++ /dev/null @@ -1,59 +0,0 @@ -/*- - * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu> - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * $FreeBSD$ - */ - -#ifndef _MACHINE_SF_BUF_H_ -#define _MACHINE_SF_BUF_H_ - -#include <sys/queue.h> - -struct vm_page; - -struct sf_buf { - SLIST_ENTRY(sf_buf) free_list; /* list of free buffer slots */ - struct vm_page *m; /* currently mapped page */ - vm_offset_t kva; /* va of mapping */ -}; - -struct sf_buf * sf_buf_alloc(struct vm_page *m, int flags); -void sf_buf_free(struct sf_buf *sf); - -static __inline vm_offset_t -sf_buf_kva(struct sf_buf *sf) -{ - - return (sf->kva); -} - -static __inline struct vm_page * -sf_buf_page(struct sf_buf *sf) -{ - - return (sf->m); -} - -#endif /* !_MACHINE_SF_BUF_H_ */ diff --git a/sys/sparc64/include/vmparam.h b/sys/sparc64/include/vmparam.h index b1c90d2..10976f1 100644 --- a/sys/sparc64/include/vmparam.h +++ b/sys/sparc64/include/vmparam.h @@ -239,4 +239,7 @@ extern vm_offset_t vm_max_kernel_address; */ #define ZERO_REGION_SIZE PAGE_SIZE +#define SFBUF +#define SFBUF_NOMD + #endif /* !_MACHINE_VMPARAM_H_ */ diff --git a/sys/sparc64/sparc64/vm_machdep.c b/sys/sparc64/sparc64/vm_machdep.c index 8615aa2..96353eb 100644 --- a/sys/sparc64/sparc64/vm_machdep.c +++ b/sys/sparc64/sparc64/vm_machdep.c @@ -53,7 +53,6 @@ __FBSDID("$FreeBSD$"); #include <sys/mutex.h> #include <sys/proc.h> #include <sys/sysent.h> -#include <sys/sf_buf.h> #include <sys/sched.h> #include <sys/sysctl.h> #include <sys/unistd.h> @@ -84,35 +83,6 @@ __FBSDID("$FreeBSD$"); #include <machine/tlb.h> #include <machine/tstate.h> -#ifndef NSFBUFS -#define NSFBUFS (512 + maxusers * 16) -#endif - -static int nsfbufs; -static int nsfbufspeak; -static int nsfbufsused; - -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0, - "Maximum number of sendfile(2) sf_bufs available"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0, - "Number of sendfile(2) sf_bufs at peak usage"); -SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, - "Number of sendfile(2) sf_bufs in use"); - -static void sf_buf_init(void *arg); -SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL); - -/* - * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the - * sf_freelist head with the sf_lock mutex. - */ -static struct { - SLIST_HEAD(, sf_buf) sf_head; - struct mtx sf_lock; -} sf_freelist; - -static u_int sf_buf_alloc_want; - PMAP_STATS_VAR(uma_nsmall_alloc); PMAP_STATS_VAR(uma_nsmall_alloc_oc); PMAP_STATS_VAR(uma_nsmall_free); @@ -417,83 +387,6 @@ is_physical_memory(vm_paddr_t addr) return (0); } -/* - * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-)) - */ -static void -sf_buf_init(void *arg) -{ - struct sf_buf *sf_bufs; - vm_offset_t sf_base; - int i; - - nsfbufs = NSFBUFS; - TUNABLE_INT_FETCH("kern.ipc.nsfbufs", &nsfbufs); - - mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF); - SLIST_INIT(&sf_freelist.sf_head); - sf_base = kva_alloc(nsfbufs * PAGE_SIZE); - sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, - M_NOWAIT | M_ZERO); - for (i = 0; i < nsfbufs; i++) { - sf_bufs[i].kva = sf_base + i * PAGE_SIZE; - SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list); - } - sf_buf_alloc_want = 0; -} - -/* - * Get an sf_buf from the freelist. Will block if none are available. - */ -struct sf_buf * -sf_buf_alloc(struct vm_page *m, int flags) -{ - struct sf_buf *sf; - int error; - - mtx_lock(&sf_freelist.sf_lock); - while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) { - if (flags & SFB_NOWAIT) - break; - sf_buf_alloc_want++; - SFSTAT_INC(sf_allocwait); - error = msleep(&sf_freelist, &sf_freelist.sf_lock, - (flags & SFB_CATCH) ? PCATCH | PVM : PVM, "sfbufa", 0); - sf_buf_alloc_want--; - - /* - * If we got a signal, don't risk going back to sleep. - */ - if (error) - break; - } - if (sf != NULL) { - SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list); - sf->m = m; - nsfbufsused++; - nsfbufspeak = imax(nsfbufspeak, nsfbufsused); - pmap_qenter(sf->kva, &sf->m, 1); - } - mtx_unlock(&sf_freelist.sf_lock); - return (sf); -} - -/* - * Release resources back to the system. - */ -void -sf_buf_free(struct sf_buf *sf) -{ - - pmap_qremove(sf->kva, 1); - mtx_lock(&sf_freelist.sf_lock); - SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list); - nsfbufsused--; - if (sf_buf_alloc_want > 0) - wakeup(&sf_freelist); - mtx_unlock(&sf_freelist.sf_lock); -} - void swi_vm(void *v) { diff --git a/sys/sys/sf_buf.h b/sys/sys/sf_buf.h index 07871dd..8a6c56f 100644 --- a/sys/sys/sf_buf.h +++ b/sys/sys/sf_buf.h @@ -1,4 +1,5 @@ /*- + * Copyright (c) 2014 Gleb Smirnoff <glebius@FreeBSD.org> * Copyright (c) 2003-2004 Alan L. Cox <alc@cs.rice.edu> * All rights reserved. * @@ -29,6 +30,146 @@ #ifndef _SYS_SF_BUF_H_ #define _SYS_SF_BUF_H_ +struct sfstat { /* sendfile statistics */ + uint64_t sf_iocnt; /* times sendfile had to do disk I/O */ + uint64_t sf_allocfail; /* times sfbuf allocation failed */ + uint64_t sf_allocwait; /* times sfbuf allocation had to wait */ +}; + +#ifdef _KERNEL +#include <sys/types.h> +#include <sys/systm.h> +#include <sys/counter.h> +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_page.h> + +/* + * Sf_bufs, or sendfile(2) buffers provide a vm_page that is mapped + * into kernel address space. Note, that they aren't used only + * by sendfile(2)! + * + * Sf_bufs could be implemented as a feature of vm_page_t, but that + * would require growth of the structure. That's why they are implemented + * as a separate hash indexed by vm_page address. Implementation lives in + * kern/subr_sfbuf.c. Meanwhile, most 64-bit machines have a physical map, + * so they don't require this hash at all, thus ignore subr_sfbuf.c. + * + * Different 32-bit architectures demand different requirements on sf_buf + * hash and functions. They request features in machine/vmparam.h, which + * enable parts of this file. They can also optionally provide helpers in + * machine/sf_buf.h + * + * Defines are: + * SFBUF This machine requires sf_buf hash. + * subr_sfbuf.c should be compiled. + * SFBUF_CPUSET This machine can perform SFB_CPUPRIVATE mappings, + * that do no invalidate cache on the rest of CPUs. + * SFBUF_NOMD This machine doesn't have machine/sf_buf.h + * + * SFBUF_OPTIONAL_DIRECT_MAP Value of this define is used as boolean + * variable that tells whether machine is + * capable of direct map or not at runtime. + * SFBUF_MAP This machine provides its own sf_buf_map() and + * sf_buf_unmap(). + * SFBUF_PROCESS_PAGE This machine provides sf_buf_process_page() + * function. + */ + +#ifdef SFBUF +#if defined(SMP) && defined(SFBUF_CPUSET) +#include <sys/_cpuset.h> +#endif +#include <sys/queue.h> + +struct sf_buf { + LIST_ENTRY(sf_buf) list_entry; /* list of buffers */ + TAILQ_ENTRY(sf_buf) free_entry; /* list of buffers */ + vm_page_t m; /* currently mapped page */ + vm_offset_t kva; /* va of mapping */ + int ref_count; /* usage of this mapping */ +#if defined(SMP) && defined(SFBUF_CPUSET) + cpuset_t cpumask; /* where mapping is valid */ +#endif +}; +#else /* ! SFBUF */ +struct sf_buf; +#endif /* SFBUF */ + +#ifndef SFBUF_NOMD +#include <machine/sf_buf.h> +#endif +#ifdef SFBUF_OPTIONAL_DIRECT_MAP +#include <machine/md_var.h> +#endif + +#ifdef SFBUF +struct sf_buf *sf_buf_alloc(struct vm_page *, int); +void sf_buf_free(struct sf_buf *); + +static inline vm_offset_t +sf_buf_kva(struct sf_buf *sf) +{ +#ifdef SFBUF_OPTIONAL_DIRECT_MAP + if (SFBUF_OPTIONAL_DIRECT_MAP) + return (VM_PAGE_TO_PHYS((vm_page_t)sf)); +#endif + + return (sf->kva); +} + +static inline vm_page_t +sf_buf_page(struct sf_buf *sf) +{ +#ifdef SFBUF_OPTIONAL_DIRECT_MAP + if (SFBUF_OPTIONAL_DIRECT_MAP) + return ((vm_page_t)sf); +#endif + + return (sf->m); +} + +#ifndef SFBUF_MAP +#include <vm/pmap.h> + +static inline void +sf_buf_map(struct sf_buf *sf, int flags) +{ + + pmap_qenter(sf->kva, &sf->m, 1); +} + +static inline int +sf_buf_unmap(struct sf_buf *sf) +{ + + return (0); +} +#endif /* SFBUF_MAP */ + +#if defined(SMP) && defined(SFBUF_CPUSET) +void sf_buf_shootdown(struct sf_buf *, int); +#endif + +#ifdef SFBUF_PROCESS_PAGE +boolean_t sf_buf_process_page(vm_page_t, void (*)(struct sf_buf *)); +#endif + +#else /* ! SFBUF */ + +static inline struct sf_buf * +sf_buf_alloc(struct vm_page *m, int pri) +{ + + return ((struct sf_buf *)m); +} + +static inline void +sf_buf_free(struct sf_buf *sf) +{ +} +#endif /* SFBUF */ + /* * Options to sf_buf_alloc() are specified through its flags argument. This * argument's value should be the result of a bitwise or'ing of one or more @@ -40,19 +181,6 @@ #define SFB_DEFAULT 0 #define SFB_NOWAIT 4 /* Return NULL if all bufs are used. */ -struct vm_page; - -struct sfstat { /* sendfile statistics */ - uint64_t sf_iocnt; /* times sendfile had to do disk I/O */ - uint64_t sf_allocfail; /* times sfbuf allocation failed */ - uint64_t sf_allocwait; /* times sfbuf allocation had to wait */ -}; - -#ifdef _KERNEL -#include <machine/sf_buf.h> -#include <sys/systm.h> -#include <sys/counter.h> - extern counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)]; #define SFSTAT_ADD(name, val) \ counter_u64_add(sfstat[offsetof(struct sfstat, name) / sizeof(uint64_t)],\ |