diff options
Diffstat (limited to 'sys/kern/kern_subr.c')
-rw-r--r-- | sys/kern/kern_subr.c | 461 |
1 files changed, 461 insertions, 0 deletions
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c new file mode 100644 index 0000000..769dd1d --- /dev/null +++ b/sys/kern/kern_subr.c @@ -0,0 +1,461 @@ +/* + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include "opt_zero.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/malloc.h> +#include <sys/resourcevar.h> +#include <sys/sched.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#ifdef ZERO_COPY_SOCKETS +#include <vm/vm_param.h> +#include <vm/vm_object.h> +#endif + +SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, + "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); + +#ifdef ZERO_COPY_SOCKETS +/* Declared in uipc_socket.c */ +extern int so_zero_copy_receive; + +static int +vm_pgmoveco(vm_map_t mapa, vm_object_t srcobj, vm_offset_t kaddr, + vm_offset_t uaddr) +{ + vm_map_t map = mapa; + vm_page_t kern_pg, user_pg; + vm_object_t uobject; + vm_map_entry_t entry; + vm_pindex_t upindex, kpindex; + vm_prot_t prot; + boolean_t wired; + + /* + * First lookup the kernel page. + */ + kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); + /* + * XXX The vm object containing kern_pg needs locking. + */ + if ((vm_map_lookup(&map, uaddr, + VM_PROT_WRITE, &entry, &uobject, + &upindex, &prot, &wired)) != KERN_SUCCESS) { + return(EFAULT); + } + VM_OBJECT_LOCK(uobject); + if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { + do + vm_page_lock_queues(); + while (vm_page_sleep_if_busy(user_pg, 1, "vm_pgmoveco")); + vm_page_busy(user_pg); + pmap_remove_all(user_pg); + vm_page_free(user_pg); + } else + vm_page_lock_queues(); + if (kern_pg->busy || ((kern_pg->queue - kern_pg->pc) == PQ_FREE) || + (kern_pg->hold_count != 0)|| (kern_pg->flags & PG_BUSY)) { + printf("vm_pgmoveco: pindex(%lu), busy(%d), PG_BUSY(%d), " + "hold(%d) paddr(0x%lx)\n", (u_long)kern_pg->pindex, + kern_pg->busy, (kern_pg->flags & PG_BUSY) ? 1 : 0, + kern_pg->hold_count, (u_long)kern_pg->phys_addr); + if ((kern_pg->queue - kern_pg->pc) == PQ_FREE) + panic("vm_pgmoveco: renaming free page"); + else + panic("vm_pgmoveco: renaming busy page"); + } + kpindex = kern_pg->pindex; + vm_page_busy(kern_pg); + vm_page_rename(kern_pg, uobject, upindex); + vm_page_flag_clear(kern_pg, PG_BUSY); + kern_pg->valid = VM_PAGE_BITS_ALL; + vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(uobject); + vm_map_lookup_done(map, entry); + return(KERN_SUCCESS); +} +#endif /* ZERO_COPY_SOCKETS */ + +int +uiomove(void *cp, int n, struct uio *uio) +{ + struct thread *td = curthread; + struct iovec *iov; + u_int cnt; + int error = 0; + int save = 0; + + KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, + ("uiomove: mode")); + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + ("uiomove proc")); + + if (td) { + mtx_lock_spin(&sched_lock); + save = td->td_flags & TDF_DEADLKTREAT; + td->td_flags |= TDF_DEADLKTREAT; + mtx_unlock_spin(&sched_lock); + } + + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (ticks - PCPU_GET(switchticks) >= hogticks) + uio_yield(); + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + goto out; + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp = (char *)cp + cnt; + n -= cnt; + } +out: + if (td && save == 0) { + mtx_lock_spin(&sched_lock); + td->td_flags &= ~TDF_DEADLKTREAT; + mtx_unlock_spin(&sched_lock); + } + return (error); +} + +#ifdef ZERO_COPY_SOCKETS +/* + * Experimental support for zero-copy I/O + */ +static int +userspaceco(void *cp, u_int cnt, struct uio *uio, struct vm_object *obj, + int disposable) +{ + struct iovec *iov; + int error; + + iov = uio->uio_iov; + if (uio->uio_rw == UIO_READ) { + if ((so_zero_copy_receive != 0) + && (obj != NULL) + && ((cnt & PAGE_MASK) == 0) + && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) + && ((uio->uio_offset & PAGE_MASK) == 0) + && ((((intptr_t) cp) & PAGE_MASK) == 0) + && (obj->type == OBJT_DEFAULT) + && (disposable != 0)) { + /* SOCKET: use page-trading */ + /* + * We only want to call vm_pgmoveco() on + * disposeable pages, since it gives the + * kernel page to the userland process. + */ + error = vm_pgmoveco(&curproc->p_vmspace->vm_map, + obj, (vm_offset_t)cp, + (vm_offset_t)iov->iov_base); + + /* + * If we get an error back, attempt + * to use copyout() instead. The + * disposable page should be freed + * automatically if we weren't able to move + * it into userland. + */ + if (error != 0) + error = copyout(cp, iov->iov_base, cnt); + } else { + error = copyout(cp, iov->iov_base, cnt); + } + } else { + error = copyin(iov->iov_base, cp, cnt); + } + return (error); +} + +int +uiomoveco(void *cp, int n, struct uio *uio, struct vm_object *obj, + int disposable) +{ + struct iovec *iov; + u_int cnt; + int error; + + KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, + ("uiomoveco: mode")); + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + ("uiomoveco proc")); + + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (ticks - PCPU_GET(switchticks) >= hogticks) + uio_yield(); + + error = userspaceco(cp, cnt, uio, obj, disposable); + + if (error) + return (error); + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp = (char *)cp + cnt; + n -= cnt; + } + return (0); +} +#endif /* ZERO_COPY_SOCKETS */ + +/* + * Give next character to user as result of read. + */ +int +ureadc(int c, struct uio *uio) +{ + struct iovec *iov; + char *iov_base; + +again: + if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) + panic("ureadc"); + iov = uio->uio_iov; + if (iov->iov_len == 0) { + uio->uio_iovcnt--; + uio->uio_iov++; + goto again; + } + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (subyte(iov->iov_base, c) < 0) + return (EFAULT); + break; + + case UIO_SYSSPACE: + iov_base = iov->iov_base; + *iov_base = c; + iov->iov_base = iov_base; + break; + + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + 1; + iov->iov_len--; + uio->uio_resid--; + uio->uio_offset++; + return (0); +} + +/* + * General routine to allocate a hash table. + */ +void * +hashinit(int elements, struct malloc_type *type, u_long *hashmask) +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + if (elements <= 0) + panic("hashinit: bad elements"); + for (hashsize = 1; hashsize <= elements; hashsize <<= 1) + continue; + hashsize >>= 1; + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *hashmask = hashsize - 1; + return (hashtbl); +} + +void +hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) +{ + LIST_HEAD(generic, generic) *hashtbl, *hp; + + hashtbl = vhashtbl; + for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) + if (!LIST_EMPTY(hp)) + panic("hashdestroy: hash not empty"); + free(hashtbl, type); +} + +static int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, 2039, + 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, 6653, + 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; +#define NPRIMES (sizeof(primes) / sizeof(primes[0])) + +/* + * General routine to allocate a prime number sized hash table. + */ +void * +phashinit(int elements, struct malloc_type *type, u_long *nentries) +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + if (elements <= 0) + panic("phashinit: bad elements"); + for (i = 1, hashsize = primes[1]; hashsize <= elements;) { + i++; + if (i == NPRIMES) + break; + hashsize = primes[i]; + } + hashsize = primes[i - 1]; + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *nentries = hashsize; + return (hashtbl); +} + +void +uio_yield(void) +{ + struct thread *td; + + td = curthread; + mtx_lock_spin(&sched_lock); + DROP_GIANT(); + sched_prio(td, td->td_ksegrp->kg_user_pri); /* XXXKSE */ + td->td_proc->p_stats->p_ru.ru_nivcsw++; + mi_switch(); + mtx_unlock_spin(&sched_lock); + PICKUP_GIANT(); +} + +int +copyinfrom(const void *src, void *dst, size_t len, int seg) +{ + int error = 0; + + switch (seg) { + case UIO_USERSPACE: + error = copyin(src, dst, len); + break; + case UIO_SYSSPACE: + bcopy(src, dst, len); + break; + default: + panic("copyinfrom: bad seg %d\n", seg); + } + return (error); +} + +int +copyinstrfrom(const void *src, void *dst, size_t len, size_t *copied, int seg) +{ + int error = 0; + + switch (seg) { + case UIO_USERSPACE: + error = copyinstr(src, dst, len, copied); + break; + case UIO_SYSSPACE: + error = copystr(src, dst, len, copied); + break; + default: + panic("copyinstrfrom: bad seg %d\n", seg); + } + return (error); +} |