diff options
Diffstat (limited to 'sys')
107 files changed, 2291 insertions, 2962 deletions
diff --git a/sys/amd64/amd64/busdma_machdep.c b/sys/amd64/amd64/busdma_machdep.c deleted file mode 100644 index 5a60bc1..0000000 --- a/sys/amd64/amd64/busdma_machdep.c +++ /dev/null @@ -1,1205 +0,0 @@ -/*- - * Copyright (c) 1997, 1998 Justin T. Gibbs. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions, and the following disclaimer, - * without modification, immediately at the beginning of the file. - * 2. The name of the author may not be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR - * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/malloc.h> -#include <sys/bus.h> -#include <sys/interrupt.h> -#include <sys/kernel.h> -#include <sys/ktr.h> -#include <sys/lock.h> -#include <sys/proc.h> -#include <sys/mutex.h> -#include <sys/mbuf.h> -#include <sys/uio.h> -#include <sys/sysctl.h> - -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_map.h> - -#include <machine/atomic.h> -#include <machine/bus.h> -#include <machine/md_var.h> -#include <machine/specialreg.h> - -#define MAX_BPAGES 8192 - -struct bounce_zone; - -struct bus_dma_tag { - bus_dma_tag_t parent; - bus_size_t alignment; - bus_size_t boundary; - bus_addr_t lowaddr; - bus_addr_t highaddr; - bus_dma_filter_t *filter; - void *filterarg; - bus_size_t maxsize; - u_int nsegments; - bus_size_t maxsegsz; - int flags; - int ref_count; - int map_count; - bus_dma_lock_t *lockfunc; - void *lockfuncarg; - bus_dma_segment_t *segments; - struct bounce_zone *bounce_zone; -}; - -struct bounce_page { - vm_offset_t vaddr; /* kva of bounce buffer */ - bus_addr_t busaddr; /* Physical address */ - vm_offset_t datavaddr; /* kva of client data */ - bus_size_t datacount; /* client data count */ - STAILQ_ENTRY(bounce_page) links; -}; - -int busdma_swi_pending; - -struct bounce_zone { - STAILQ_ENTRY(bounce_zone) links; - STAILQ_HEAD(bp_list, bounce_page) bounce_page_list; - int total_bpages; - int free_bpages; - int reserved_bpages; - int active_bpages; - int total_bounced; - int total_deferred; - int map_count; - bus_size_t alignment; - bus_addr_t lowaddr; - char zoneid[8]; - char lowaddrid[20]; - struct sysctl_ctx_list sysctl_tree; - struct sysctl_oid *sysctl_tree_top; -}; - -static struct mtx bounce_lock; -static int total_bpages; -static int busdma_zonecount; -static STAILQ_HEAD(, bounce_zone) bounce_zone_list; - -SYSCTL_NODE(_hw, OID_AUTO, busdma, CTLFLAG_RD, 0, "Busdma parameters"); -SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0, - "Total bounce pages"); - -struct bus_dmamap { - struct bp_list bpages; - int pagesneeded; - int pagesreserved; - bus_dma_tag_t dmat; - void *buf; /* unmapped buffer pointer */ - bus_size_t buflen; /* unmapped buffer length */ - bus_dmamap_callback_t *callback; - void *callback_arg; - STAILQ_ENTRY(bus_dmamap) links; -}; - -static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; -static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; -static struct bus_dmamap nobounce_dmamap; - -static void init_bounce_pages(void *dummy); -static int alloc_bounce_zone(bus_dma_tag_t dmat); -static int alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages); -static int reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, - int commit); -static bus_addr_t add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, - vm_offset_t vaddr, bus_size_t size); -static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage); -static __inline int run_filter(bus_dma_tag_t dmat, bus_addr_t paddr); - -/* - * Return true if a match is made. - * - * To find a match walk the chain of bus_dma_tag_t's looking for 'paddr'. - * - * If paddr is within the bounds of the dma tag then call the filter callback - * to check for a match, if there is no filter callback then assume a match. - */ -static __inline int -run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) -{ - int retval; - - retval = 0; - - do { - if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) - || ((paddr & (dmat->alignment - 1)) != 0)) - && (dmat->filter == NULL - || (*dmat->filter)(dmat->filterarg, paddr) != 0)) - retval = 1; - - dmat = dmat->parent; - } while (retval == 0 && dmat != NULL); - return (retval); -} - -/* - * Convenience function for manipulating driver locks from busdma (during - * busdma_swi, for example). Drivers that don't provide their own locks - * should specify &Giant to dmat->lockfuncarg. Drivers that use their own - * non-mutex locking scheme don't have to use this at all. - */ -void -busdma_lock_mutex(void *arg, bus_dma_lock_op_t op) -{ - struct mtx *dmtx; - - dmtx = (struct mtx *)arg; - switch (op) { - case BUS_DMA_LOCK: - mtx_lock(dmtx); - break; - case BUS_DMA_UNLOCK: - mtx_unlock(dmtx); - break; - default: - panic("Unknown operation 0x%x for busdma_lock_mutex!", op); - } -} - -/* - * dflt_lock should never get called. It gets put into the dma tag when - * lockfunc == NULL, which is only valid if the maps that are associated - * with the tag are meant to never be defered. - * XXX Should have a way to identify which driver is responsible here. - */ -static void -dflt_lock(void *arg, bus_dma_lock_op_t op) -{ - panic("driver error: busdma dflt_lock called"); -} - -#define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 -#define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 -/* - * Allocate a device specific dma_tag. - */ -int -bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, - bus_size_t boundary, bus_addr_t lowaddr, - bus_addr_t highaddr, bus_dma_filter_t *filter, - void *filterarg, bus_size_t maxsize, int nsegments, - bus_size_t maxsegsz, int flags, bus_dma_lock_t *lockfunc, - void *lockfuncarg, bus_dma_tag_t *dmat) -{ - bus_dma_tag_t newtag; - int error = 0; - - /* Basic sanity checking */ - if (boundary != 0 && boundary < maxsegsz) - maxsegsz = boundary; - - if (maxsegsz == 0) { - return (EINVAL); - } - - /* Return a NULL tag on failure */ - *dmat = NULL; - - newtag = (bus_dma_tag_t)malloc(sizeof(*newtag), M_DEVBUF, - M_ZERO | M_NOWAIT); - if (newtag == NULL) { - CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", - __func__, newtag, 0, error); - return (ENOMEM); - } - - newtag->parent = parent; - newtag->alignment = alignment; - newtag->boundary = boundary; - newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); - newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); - newtag->filter = filter; - newtag->filterarg = filterarg; - newtag->maxsize = maxsize; - newtag->nsegments = nsegments; - newtag->maxsegsz = maxsegsz; - newtag->flags = flags; - newtag->ref_count = 1; /* Count ourself */ - newtag->map_count = 0; - if (lockfunc != NULL) { - newtag->lockfunc = lockfunc; - newtag->lockfuncarg = lockfuncarg; - } else { - newtag->lockfunc = dflt_lock; - newtag->lockfuncarg = NULL; - } - newtag->segments = NULL; - - /* Take into account any restrictions imposed by our parent tag */ - if (parent != NULL) { - newtag->lowaddr = MIN(parent->lowaddr, newtag->lowaddr); - newtag->highaddr = MAX(parent->highaddr, newtag->highaddr); - if (newtag->boundary == 0) - newtag->boundary = parent->boundary; - else if (parent->boundary != 0) - newtag->boundary = MIN(parent->boundary, - newtag->boundary); - if (newtag->filter == NULL) { - /* - * Short circuit looking at our parent directly - * since we have encapsulated all of its information - */ - newtag->filter = parent->filter; - newtag->filterarg = parent->filterarg; - newtag->parent = parent->parent; - } - if (newtag->parent != NULL) - atomic_add_int(&parent->ref_count, 1); - } - - if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) - || newtag->alignment > 1) - newtag->flags |= BUS_DMA_COULD_BOUNCE; - - if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && - (flags & BUS_DMA_ALLOCNOW) != 0) { - struct bounce_zone *bz; - - /* Must bounce */ - - if ((error = alloc_bounce_zone(newtag)) != 0) { - free(newtag, M_DEVBUF); - return (error); - } - bz = newtag->bounce_zone; - - if (ptoa(bz->total_bpages) < maxsize) { - int pages; - - pages = atop(maxsize) - bz->total_bpages; - - /* Add pages to our bounce pool */ - if (alloc_bounce_pages(newtag, pages) < pages) - error = ENOMEM; - } - /* Performed initial allocation */ - newtag->flags |= BUS_DMA_MIN_ALLOC_COMP; - } - - if (error != 0) { - free(newtag, M_DEVBUF); - } else { - *dmat = newtag; - } - CTR4(KTR_BUSDMA, "%s returned tag %p tag flags 0x%x error %d", - __func__, newtag, (newtag != NULL ? newtag->flags : 0), error); - return (error); -} - -int -bus_dma_tag_destroy(bus_dma_tag_t dmat) -{ - bus_dma_tag_t dmat_copy; - int error; - - error = 0; - dmat_copy = dmat; - - if (dmat != NULL) { - - if (dmat->map_count != 0) { - error = EBUSY; - goto out; - } - - while (dmat != NULL) { - bus_dma_tag_t parent; - - parent = dmat->parent; - atomic_subtract_int(&dmat->ref_count, 1); - if (dmat->ref_count == 0) { - if (dmat->segments != NULL) - free(dmat->segments, M_DEVBUF); - free(dmat, M_DEVBUF); - /* - * Last reference count, so - * release our reference - * count on our parent. - */ - dmat = parent; - } else - dmat = NULL; - } - } -out: - CTR3(KTR_BUSDMA, "%s tag %p error %d", __func__, dmat_copy, error); - return (error); -} - -/* - * Allocate a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) -{ - int error; - - error = 0; - - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - M_NOWAIT); - if (dmat->segments == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } - } - - /* - * Bouncing might be required if the driver asks for an active - * exclusion region, a data alignment that is stricter than 1, and/or - * an active address boundary. - */ - if (dmat->flags & BUS_DMA_COULD_BOUNCE) { - - /* Must bounce */ - struct bounce_zone *bz; - int maxpages; - - if (dmat->bounce_zone == NULL) { - if ((error = alloc_bounce_zone(dmat)) != 0) - return (error); - } - bz = dmat->bounce_zone; - - *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (*mapp == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } - - /* Initialize the new map */ - STAILQ_INIT(&((*mapp)->bpages)); - - /* - * Attempt to add pages to our pool on a per-instance - * basis up to a sane limit. - */ - if (dmat->alignment > 1) - maxpages = MAX_BPAGES; - else - maxpages = MIN(MAX_BPAGES, Maxmem -atop(dmat->lowaddr)); - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0 - || (bz->map_count > 0 && bz->total_bpages < maxpages)) { - int pages; - - pages = MAX(atop(dmat->maxsize), 1); - pages = MIN(maxpages - bz->total_bpages, pages); - pages = MAX(pages, 1); - if (alloc_bounce_pages(dmat, pages) < pages) - error = ENOMEM; - - if ((dmat->flags & BUS_DMA_MIN_ALLOC_COMP) == 0) { - if (error == 0) - dmat->flags |= BUS_DMA_MIN_ALLOC_COMP; - } else { - error = 0; - } - } - bz->map_count++; - } else { - *mapp = NULL; - } - if (error == 0) - dmat->map_count++; - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, error); - return (error); -} - -/* - * Destroy a handle for mapping from kva/uva/physical - * address space into bus device space. - */ -int -bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - if (map != NULL && map != &nobounce_dmamap) { - if (STAILQ_FIRST(&map->bpages) != NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, EBUSY); - return (EBUSY); - } - if (dmat->bounce_zone) - dmat->bounce_zone->map_count--; - free(map, M_DEVBUF); - } - dmat->map_count--; - CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); - return (0); -} - - -/* - * Allocate a piece of memory that can be efficiently mapped into - * bus device space based on the constraints lited in the dma tag. - * A dmamap to for use with dmamap_load is also allocated. - */ -int -bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, - bus_dmamap_t *mapp) -{ - int mflags; - - if (flags & BUS_DMA_NOWAIT) - mflags = M_NOWAIT; - else - mflags = M_WAITOK; - - /* If we succeed, no mapping/bouncing will be required */ - *mapp = NULL; - - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - mflags); - if (dmat->segments == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - return (ENOMEM); - } - } - if (flags & BUS_DMA_ZERO) - mflags |= M_ZERO; - - /* - * XXX: - * (dmat->alignment < dmat->maxsize) is just a quick hack; the exact - * alignment guarantees of malloc need to be nailed down, and the - * code below should be rewritten to take that into account. - * - * In the meantime, we'll warn the user if malloc gets it wrong. - */ - if ((dmat->maxsize <= PAGE_SIZE) && - (dmat->alignment < dmat->maxsize) && - dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem)) { - *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags); - } else { - /* - * XXX Use Contigmalloc until it is merged into this facility - * and handles multi-seg allocations. Nobody is doing - * multi-seg allocations yet though. - * XXX Certain AGP hardware does. - */ - *vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags, - 0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul, - dmat->boundary); - } - if (*vaddr == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - return (ENOMEM); - } else if (vtophys(*vaddr) & (dmat->alignment - 1)) { - printf("bus_dmamem_alloc failed to align memory properly.\n"); - } - if (flags & BUS_DMA_NOCACHE) - pmap_change_attr((vm_offset_t)*vaddr, dmat->maxsize, - PAT_UNCACHEABLE); - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, 0); - return (0); -} - -/* - * Free a piece of memory and it's allociated dmamap, that was allocated - * via bus_dmamem_alloc. Make the same choice for free/contigfree. - */ -void -bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) -{ - /* - * dmamem does not need to be bounced, so the map should be - * NULL - */ - if (map != NULL) - panic("bus_dmamem_free: Invalid map freed\n"); - pmap_change_attr((vm_offset_t)vaddr, dmat->maxsize, PAT_WRITE_BACK); - if ((dmat->maxsize <= PAGE_SIZE) && - (dmat->alignment < dmat->maxsize) && - dmat->lowaddr >= ptoa((vm_paddr_t)Maxmem)) - free(vaddr, M_DEVBUF); - else { - contigfree(vaddr, dmat->maxsize, M_DEVBUF); - } - CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags); -} - -/* - * Utility function to load a linear buffer. lastaddrp holds state - * between invocations (for multiple-buffer loads). segp contains - * the starting segment on entrace, and the ending segment on exit. - * first indicates if this is the first invocation of this function. - */ -static __inline int -_bus_dmamap_load_buffer(bus_dma_tag_t dmat, - bus_dmamap_t map, - void *buf, bus_size_t buflen, - pmap_t pmap, - int flags, - bus_addr_t *lastaddrp, - bus_dma_segment_t *segs, - int *segp, - int first) -{ - bus_size_t sgsize; - bus_addr_t curaddr, lastaddr, baddr, bmask; - vm_offset_t vaddr; - bus_addr_t paddr; - int seg; - - if (map == NULL) - map = &nobounce_dmamap; - - if ((map != &nobounce_dmamap && map->pagesneeded == 0) - && ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0)) { - vm_offset_t vendaddr; - - CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " - "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), - dmat->boundary, dmat->alignment); - CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", - map, &nobounce_dmamap, map->pagesneeded); - /* - * Count the number of bounce pages - * needed in order to complete this transfer - */ - vaddr = (vm_offset_t)buf; - vendaddr = (vm_offset_t)buf + buflen; - - while (vaddr < vendaddr) { - bus_size_t sg_len; - - sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); - if (pmap) - paddr = pmap_extract(pmap, vaddr); - else - paddr = pmap_kextract(vaddr); - if (run_filter(dmat, paddr) != 0) { - sg_len = roundup2(sg_len, dmat->alignment); - map->pagesneeded++; - } - vaddr += sg_len; - } - CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); - } - - /* Reserve Necessary Bounce Pages */ - if (map->pagesneeded != 0) { - mtx_lock(&bounce_lock); - if (flags & BUS_DMA_NOWAIT) { - if (reserve_bounce_pages(dmat, map, 0) != 0) { - mtx_unlock(&bounce_lock); - return (ENOMEM); - } - } else { - if (reserve_bounce_pages(dmat, map, 1) != 0) { - /* Queue us for resources */ - map->dmat = dmat; - map->buf = buf; - map->buflen = buflen; - STAILQ_INSERT_TAIL(&bounce_map_waitinglist, - map, links); - mtx_unlock(&bounce_lock); - return (EINPROGRESS); - } - } - mtx_unlock(&bounce_lock); - } - - vaddr = (vm_offset_t)buf; - lastaddr = *lastaddrp; - bmask = ~(dmat->boundary - 1); - - for (seg = *segp; buflen > 0 ; ) { - bus_size_t max_sgsize; - - /* - * Get the physical address for this segment. - */ - if (pmap) - curaddr = pmap_extract(pmap, vaddr); - else - curaddr = pmap_kextract(vaddr); - - /* - * Compute the segment size, and adjust counts. - */ - max_sgsize = MIN(buflen, dmat->maxsegsz); - sgsize = PAGE_SIZE - ((vm_offset_t)curaddr & PAGE_MASK); - if (map->pagesneeded != 0 && run_filter(dmat, curaddr)) { - sgsize = roundup2(sgsize, dmat->alignment); - sgsize = MIN(sgsize, max_sgsize); - curaddr = add_bounce_page(dmat, map, vaddr, sgsize); - } else { - sgsize = MIN(sgsize, max_sgsize); - } - - /* - * Make sure we don't cross any boundaries. - */ - if (dmat->boundary > 0) { - baddr = (curaddr + dmat->boundary) & bmask; - if (sgsize > (baddr - curaddr)) - sgsize = (baddr - curaddr); - } - - /* - * Insert chunk into a segment, coalescing with - * previous segment if possible. - */ - if (first) { - segs[seg].ds_addr = curaddr; - segs[seg].ds_len = sgsize; - first = 0; - } else { - if (curaddr == lastaddr && - (segs[seg].ds_len + sgsize) <= dmat->maxsegsz && - (dmat->boundary == 0 || - (segs[seg].ds_addr & bmask) == (curaddr & bmask))) - segs[seg].ds_len += sgsize; - else { - if (++seg >= dmat->nsegments) - break; - segs[seg].ds_addr = curaddr; - segs[seg].ds_len = sgsize; - } - } - - lastaddr = curaddr + sgsize; - vaddr += sgsize; - buflen -= sgsize; - } - - *segp = seg; - *lastaddrp = lastaddr; - - /* - * Did we fit? - */ - return (buflen != 0 ? EFBIG : 0); /* XXX better return value here? */ -} - -/* - * Map the buffer buf into bus space using the dmamap map. - */ -int -bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, - bus_size_t buflen, bus_dmamap_callback_t *callback, - void *callback_arg, int flags) -{ - bus_addr_t lastaddr = 0; - int error, nsegs = 0; - - if (map != NULL) { - flags |= BUS_DMA_WAITOK; - map->callback = callback; - map->callback_arg = callback_arg; - } - - error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, NULL, flags, - &lastaddr, dmat->segments, &nsegs, 1); - - CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, nsegs + 1); - - if (error == EINPROGRESS) { - return (error); - } - - if (error) - (*callback)(callback_arg, dmat->segments, 0, error); - else - (*callback)(callback_arg, dmat->segments, nsegs + 1, 0); - - /* - * Return ENOMEM to the caller so that it can pass it up the stack. - * This error only happens when NOWAIT is set, so deferal is disabled. - */ - if (error == ENOMEM) - return (error); - - return (0); -} - - -/* - * Like _bus_dmamap_load(), but for mbufs. - */ -int -bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, - struct mbuf *m0, - bus_dmamap_callback2_t *callback, void *callback_arg, - int flags) -{ - int nsegs, error; - - M_ASSERTPKTHDR(m0); - - flags |= BUS_DMA_NOWAIT; - nsegs = 0; - error = 0; - if (m0->m_pkthdr.len <= dmat->maxsize) { - int first = 1; - bus_addr_t lastaddr = 0; - struct mbuf *m; - - for (m = m0; m != NULL && error == 0; m = m->m_next) { - if (m->m_len > 0) { - error = _bus_dmamap_load_buffer(dmat, map, - m->m_data, m->m_len, - NULL, flags, &lastaddr, - dmat->segments, &nsegs, first); - first = 0; - } - } - } else { - error = EINVAL; - } - - if (error) { - /* force "no valid mappings" in callback */ - (*callback)(callback_arg, dmat->segments, 0, 0, error); - } else { - (*callback)(callback_arg, dmat->segments, - nsegs+1, m0->m_pkthdr.len, error); - } - CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, nsegs + 1); - return (error); -} - -int -bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, - struct mbuf *m0, bus_dma_segment_t *segs, int *nsegs, - int flags) -{ - int error; - - M_ASSERTPKTHDR(m0); - - flags |= BUS_DMA_NOWAIT; - *nsegs = 0; - error = 0; - if (m0->m_pkthdr.len <= dmat->maxsize) { - int first = 1; - bus_addr_t lastaddr = 0; - struct mbuf *m; - - for (m = m0; m != NULL && error == 0; m = m->m_next) { - if (m->m_len > 0) { - error = _bus_dmamap_load_buffer(dmat, map, - m->m_data, m->m_len, - NULL, flags, &lastaddr, - segs, nsegs, first); - first = 0; - } - } - } else { - error = EINVAL; - } - - /* XXX FIXME: Having to increment nsegs is really annoying */ - ++*nsegs; - CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, *nsegs); - return (error); -} - -/* - * Like _bus_dmamap_load(), but for uios. - */ -int -bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, - struct uio *uio, - bus_dmamap_callback2_t *callback, void *callback_arg, - int flags) -{ - bus_addr_t lastaddr = 0; - int nsegs, error, first, i; - bus_size_t resid; - struct iovec *iov; - pmap_t pmap; - - flags |= BUS_DMA_NOWAIT; - resid = uio->uio_resid; - iov = uio->uio_iov; - - if (uio->uio_segflg == UIO_USERSPACE) { - KASSERT(uio->uio_td != NULL, - ("bus_dmamap_load_uio: USERSPACE but no proc")); - pmap = vmspace_pmap(uio->uio_td->td_proc->p_vmspace); - } else - pmap = NULL; - - nsegs = 0; - error = 0; - first = 1; - for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) { - /* - * Now at the first iovec to load. Load each iovec - * until we have exhausted the residual count. - */ - bus_size_t minlen = - resid < iov[i].iov_len ? resid : iov[i].iov_len; - caddr_t addr = (caddr_t) iov[i].iov_base; - - if (minlen > 0) { - error = _bus_dmamap_load_buffer(dmat, map, - addr, minlen, pmap, flags, &lastaddr, - dmat->segments, &nsegs, first); - first = 0; - - resid -= minlen; - } - } - - if (error) { - /* force "no valid mappings" in callback */ - (*callback)(callback_arg, dmat->segments, 0, 0, error); - } else { - (*callback)(callback_arg, dmat->segments, - nsegs+1, uio->uio_resid, error); - } - CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, nsegs + 1); - return (error); -} - -/* - * Release the mapping held by map. - */ -void -_bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) -{ - struct bounce_page *bpage; - - while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { - STAILQ_REMOVE_HEAD(&map->bpages, links); - free_bounce_page(dmat, bpage); - } -} - -void -_bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op) -{ - struct bounce_page *bpage; - - if ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { - /* - * Handle data bouncing. We might also - * want to add support for invalidating - * the caches on broken hardware - */ - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x op 0x%x " - "performing bounce", __func__, op, dmat, dmat->flags); - - if (op & BUS_DMASYNC_PREWRITE) { - while (bpage != NULL) { - bcopy((void *)bpage->datavaddr, - (void *)bpage->vaddr, - bpage->datacount); - bpage = STAILQ_NEXT(bpage, links); - } - dmat->bounce_zone->total_bounced++; - } - - if (op & BUS_DMASYNC_POSTREAD) { - while (bpage != NULL) { - bcopy((void *)bpage->vaddr, - (void *)bpage->datavaddr, - bpage->datacount); - bpage = STAILQ_NEXT(bpage, links); - } - dmat->bounce_zone->total_bounced++; - } - } -} - -static void -init_bounce_pages(void *dummy __unused) -{ - - total_bpages = 0; - STAILQ_INIT(&bounce_zone_list); - STAILQ_INIT(&bounce_map_waitinglist); - STAILQ_INIT(&bounce_map_callbacklist); - mtx_init(&bounce_lock, "bounce pages lock", NULL, MTX_DEF); -} -SYSINIT(bpages, SI_SUB_LOCK, SI_ORDER_ANY, init_bounce_pages, NULL); - -static struct sysctl_ctx_list * -busdma_sysctl_tree(struct bounce_zone *bz) -{ - return (&bz->sysctl_tree); -} - -static struct sysctl_oid * -busdma_sysctl_tree_top(struct bounce_zone *bz) -{ - return (bz->sysctl_tree_top); -} - -static int -alloc_bounce_zone(bus_dma_tag_t dmat) -{ - struct bounce_zone *bz; - - /* Check to see if we already have a suitable zone */ - STAILQ_FOREACH(bz, &bounce_zone_list, links) { - if ((dmat->alignment <= bz->alignment) - && (dmat->lowaddr >= bz->lowaddr)) { - dmat->bounce_zone = bz; - return (0); - } - } - - if ((bz = (struct bounce_zone *)malloc(sizeof(*bz), M_DEVBUF, - M_NOWAIT | M_ZERO)) == NULL) - return (ENOMEM); - - STAILQ_INIT(&bz->bounce_page_list); - bz->free_bpages = 0; - bz->reserved_bpages = 0; - bz->active_bpages = 0; - bz->lowaddr = dmat->lowaddr; - bz->alignment = MAX(dmat->alignment, PAGE_SIZE); - bz->map_count = 0; - snprintf(bz->zoneid, 8, "zone%d", busdma_zonecount); - busdma_zonecount++; - snprintf(bz->lowaddrid, 18, "%#jx", (uintmax_t)bz->lowaddr); - STAILQ_INSERT_TAIL(&bounce_zone_list, bz, links); - dmat->bounce_zone = bz; - - sysctl_ctx_init(&bz->sysctl_tree); - bz->sysctl_tree_top = SYSCTL_ADD_NODE(&bz->sysctl_tree, - SYSCTL_STATIC_CHILDREN(_hw_busdma), OID_AUTO, bz->zoneid, - CTLFLAG_RD, 0, ""); - if (bz->sysctl_tree_top == NULL) { - sysctl_ctx_free(&bz->sysctl_tree); - return (0); /* XXX error code? */ - } - - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "total_bpages", CTLFLAG_RD, &bz->total_bpages, 0, - "Total bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "free_bpages", CTLFLAG_RD, &bz->free_bpages, 0, - "Free bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "reserved_bpages", CTLFLAG_RD, &bz->reserved_bpages, 0, - "Reserved bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "active_bpages", CTLFLAG_RD, &bz->active_bpages, 0, - "Active bounce pages"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "total_bounced", CTLFLAG_RD, &bz->total_bounced, 0, - "Total bounce requests"); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "total_deferred", CTLFLAG_RD, &bz->total_deferred, 0, - "Total bounce requests that were deferred"); - SYSCTL_ADD_STRING(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "lowaddr", CTLFLAG_RD, bz->lowaddrid, 0, ""); - SYSCTL_ADD_INT(busdma_sysctl_tree(bz), - SYSCTL_CHILDREN(busdma_sysctl_tree_top(bz)), OID_AUTO, - "alignment", CTLFLAG_RD, &bz->alignment, 0, ""); - - return (0); -} - -static int -alloc_bounce_pages(bus_dma_tag_t dmat, u_int numpages) -{ - struct bounce_zone *bz; - int count; - - bz = dmat->bounce_zone; - count = 0; - while (numpages > 0) { - struct bounce_page *bpage; - - bpage = (struct bounce_page *)malloc(sizeof(*bpage), M_DEVBUF, - M_NOWAIT | M_ZERO); - - if (bpage == NULL) - break; - bpage->vaddr = (vm_offset_t)contigmalloc(PAGE_SIZE, M_DEVBUF, - M_NOWAIT, 0ul, - bz->lowaddr, - PAGE_SIZE, - 0); - if (bpage->vaddr == 0) { - free(bpage, M_DEVBUF); - break; - } - bpage->busaddr = pmap_kextract(bpage->vaddr); - mtx_lock(&bounce_lock); - STAILQ_INSERT_TAIL(&bz->bounce_page_list, bpage, links); - total_bpages++; - bz->total_bpages++; - bz->free_bpages++; - mtx_unlock(&bounce_lock); - count++; - numpages--; - } - return (count); -} - -static int -reserve_bounce_pages(bus_dma_tag_t dmat, bus_dmamap_t map, int commit) -{ - struct bounce_zone *bz; - int pages; - - mtx_assert(&bounce_lock, MA_OWNED); - bz = dmat->bounce_zone; - pages = MIN(bz->free_bpages, map->pagesneeded - map->pagesreserved); - if (commit == 0 && map->pagesneeded > (map->pagesreserved + pages)) - return (map->pagesneeded - (map->pagesreserved + pages)); - bz->free_bpages -= pages; - bz->reserved_bpages += pages; - map->pagesreserved += pages; - pages = map->pagesneeded - map->pagesreserved; - - return (pages); -} - -static bus_addr_t -add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, - bus_size_t size) -{ - struct bounce_zone *bz; - struct bounce_page *bpage; - - KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); - KASSERT(map != NULL && map != &nobounce_dmamap, - ("add_bounce_page: bad map %p", map)); - - bz = dmat->bounce_zone; - if (map->pagesneeded == 0) - panic("add_bounce_page: map doesn't need any pages"); - map->pagesneeded--; - - if (map->pagesreserved == 0) - panic("add_bounce_page: map doesn't need any pages"); - map->pagesreserved--; - - mtx_lock(&bounce_lock); - bpage = STAILQ_FIRST(&bz->bounce_page_list); - if (bpage == NULL) - panic("add_bounce_page: free page list is empty"); - - STAILQ_REMOVE_HEAD(&bz->bounce_page_list, links); - bz->reserved_bpages--; - bz->active_bpages++; - mtx_unlock(&bounce_lock); - - if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { - /* Page offset needs to be preserved. */ - bpage->vaddr |= vaddr & PAGE_MASK; - bpage->busaddr |= vaddr & PAGE_MASK; - } - bpage->datavaddr = vaddr; - bpage->datacount = size; - STAILQ_INSERT_TAIL(&(map->bpages), bpage, links); - return (bpage->busaddr); -} - -static void -free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage) -{ - struct bus_dmamap *map; - struct bounce_zone *bz; - - bz = dmat->bounce_zone; - bpage->datavaddr = 0; - bpage->datacount = 0; - if (dmat->flags & BUS_DMA_KEEP_PG_OFFSET) { - /* - * Reset the bounce page to start at offset 0. Other uses - * of this bounce page may need to store a full page of - * data and/or assume it starts on a page boundary. - */ - bpage->vaddr &= ~PAGE_MASK; - bpage->busaddr &= ~PAGE_MASK; - } - - mtx_lock(&bounce_lock); - STAILQ_INSERT_HEAD(&bz->bounce_page_list, bpage, links); - bz->free_bpages++; - bz->active_bpages--; - if ((map = STAILQ_FIRST(&bounce_map_waitinglist)) != NULL) { - if (reserve_bounce_pages(map->dmat, map, 1) == 0) { - STAILQ_REMOVE_HEAD(&bounce_map_waitinglist, links); - STAILQ_INSERT_TAIL(&bounce_map_callbacklist, - map, links); - busdma_swi_pending = 1; - bz->total_deferred++; - swi_sched(vm_ih, 0); - } - } - mtx_unlock(&bounce_lock); -} - -void -busdma_swi(void) -{ - bus_dma_tag_t dmat; - struct bus_dmamap *map; - - mtx_lock(&bounce_lock); - while ((map = STAILQ_FIRST(&bounce_map_callbacklist)) != NULL) { - STAILQ_REMOVE_HEAD(&bounce_map_callbacklist, links); - mtx_unlock(&bounce_lock); - dmat = map->dmat; - (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_LOCK); - bus_dmamap_load(map->dmat, map, map->buf, map->buflen, - map->callback, map->callback_arg, /*flags*/0); - (dmat->lockfunc)(dmat->lockfuncarg, BUS_DMA_UNLOCK); - mtx_lock(&bounce_lock); - } - mtx_unlock(&bounce_lock); -} diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S index 37e89a00..7ffa7a9 100644 --- a/sys/amd64/amd64/exception.S +++ b/sys/amd64/amd64/exception.S @@ -380,7 +380,6 @@ IDTVEC(fast_syscall) movq %rsp,%rdi call syscall movq PCPU(CURPCB),%rax - andq $~PCB_FULLCTX,PCB_FLAGS(%rax) MEXITCOUNT jmp doreti diff --git a/sys/amd64/amd64/genassym.c b/sys/amd64/amd64/genassym.c index 613bce5..bc30174 100644 --- a/sys/amd64/amd64/genassym.c +++ b/sys/amd64/amd64/genassym.c @@ -161,7 +161,6 @@ ASSYM(PCB_SIZE, sizeof(struct pcb)); ASSYM(PCB_DBREGS, PCB_DBREGS); ASSYM(PCB_32BIT, PCB_32BIT); ASSYM(PCB_GS32BIT, PCB_GS32BIT); -ASSYM(PCB_FULLCTX, PCB_FULLCTX); ASSYM(COMMON_TSS_RSP0, offsetof(struct amd64tss, tss_rsp0)); diff --git a/sys/amd64/amd64/identcpu.c b/sys/amd64/amd64/identcpu.c index 7e4319e..cd28d59 100644 --- a/sys/amd64/amd64/identcpu.c +++ b/sys/amd64/amd64/identcpu.c @@ -109,6 +109,8 @@ static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); +static eventhandler_tag tsc_post_tag; + static char cpu_brand[48]; static struct { @@ -392,28 +394,6 @@ printcpuinfo(void) * If this CPU supports P-state invariant TSC then * mention the capability. */ - switch (cpu_vendor_id) { - case CPU_VENDOR_AMD: - if ((amd_pminfo & AMDPM_TSC_INVARIANT) || - CPUID_TO_FAMILY(cpu_id) >= 0x10 || - cpu_id == 0x60fb2) - tsc_is_invariant = 1; - break; - case CPU_VENDOR_INTEL: - if ((amd_pminfo & AMDPM_TSC_INVARIANT) || - (CPUID_TO_FAMILY(cpu_id) == 0x6 && - CPUID_TO_MODEL(cpu_id) >= 0xe) || - (CPUID_TO_FAMILY(cpu_id) == 0xf && - CPUID_TO_MODEL(cpu_id) >= 0x3)) - tsc_is_invariant = 1; - break; - case CPU_VENDOR_CENTAUR: - if (CPUID_TO_FAMILY(cpu_id) == 0x6 && - CPUID_TO_MODEL(cpu_id) >= 0xf && - (rdmsr(0x1203) & 0x100000000ULL) == 0) - tsc_is_invariant = 1; - break; - } if (tsc_is_invariant) printf("\n TSC: P-state invariant"); @@ -455,21 +435,29 @@ panicifcpuunsupported(void) /* Update TSC freq with the value indicated by the caller. */ static void -tsc_freq_changed(void *arg, const struct cf_level *level, int status) +tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status) { - /* - * If there was an error during the transition or - * TSC is P-state invariant, don't do anything. - */ - if (status != 0 || tsc_is_invariant) + + /* If there was an error during the transition, don't do anything. */ + if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ hw_clockrate = level->total_set.freq; } -EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, - EVENTHANDLER_PRI_ANY); +static void +hook_tsc_freq(void *arg __unused) +{ + + if (tsc_is_invariant) + return; + + tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, + tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); +} + +SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL); /* * Final stage of CPU identification. diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 584473d..55b1ea3 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -492,7 +492,6 @@ sigreturn(td, uap) #endif kern_sigprocmask(td, SIG_SETMASK, &ucp->uc_sigmask, NULL, 0); - td->td_pcb->pcb_flags |= PCB_FULLCTX; td->td_pcb->pcb_full_iret = 1; return (EJUSTRETURN); } @@ -564,13 +563,7 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) thread_unlock(curthread); #endif - /* - * Calculate the difference in readings, convert to Mhz, and - * subtract 0.5% of the total. Empirical testing has shown that - * overhead in DELAY() works out to approximately this value. - */ - tsc2 -= tsc1; - *rate = tsc2 * 1000 - tsc2 * 5; + *rate = (tsc2 - tsc1) * 1000; return (0); } @@ -876,6 +869,7 @@ exec_setregs(struct thread *td, struct image_params *imgp, u_long stack) regs->tf_fs = _ufssel; regs->tf_gs = _ugssel; regs->tf_flags = TF_HASSEGS; + td->td_retval[1] = 0; /* * Reset the hardware debug registers if they were in use. @@ -1906,8 +1900,8 @@ set_regs(struct thread *td, struct reg *regs) tp->tf_fs = regs->r_fs; tp->tf_gs = regs->r_gs; tp->tf_flags = TF_HASSEGS; + td->td_pcb->pcb_full_iret = 1; } - td->td_pcb->pcb_flags |= PCB_FULLCTX; return (0); } @@ -2094,7 +2088,6 @@ set_mcontext(struct thread *td, const mcontext_t *mcp) td->td_pcb->pcb_fsbase = mcp->mc_fsbase; td->td_pcb->pcb_gsbase = mcp->mc_gsbase; } - td->td_pcb->pcb_flags |= PCB_FULLCTX; td->td_pcb->pcb_full_iret = 1; return (0); } diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c deleted file mode 100644 index 847c1eb..0000000 --- a/sys/amd64/amd64/tsc.c +++ /dev/null @@ -1,231 +0,0 @@ -/*- - * Copyright (c) 1998-2003 Poul-Henning Kamp - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include "opt_clock.h" - -#include <sys/param.h> -#include <sys/bus.h> -#include <sys/cpu.h> -#include <sys/malloc.h> -#include <sys/systm.h> -#include <sys/sysctl.h> -#include <sys/time.h> -#include <sys/timetc.h> -#include <sys/kernel.h> -#include <sys/power.h> -#include <sys/smp.h> -#include <machine/clock.h> -#include <machine/md_var.h> -#include <machine/specialreg.h> - -#include "cpufreq_if.h" - -uint64_t tsc_freq; -int tsc_is_broken; -int tsc_is_invariant; -static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; - -SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, - &tsc_is_invariant, 0, "Indicates whether the TSC is P-state invariant"); -TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant); - -#ifdef SMP -static int smp_tsc; -SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0, - "Indicates whether the TSC is safe to use in SMP mode"); -TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc); -#endif - -static void tsc_freq_changed(void *arg, const struct cf_level *level, - int status); -static void tsc_freq_changing(void *arg, const struct cf_level *level, - int *status); -static unsigned tsc_get_timecount(struct timecounter *tc); -static void tsc_levels_changed(void *arg, int unit); - -static struct timecounter tsc_timecounter = { - tsc_get_timecount, /* get_timecount */ - 0, /* no poll_pps */ - ~0u, /* counter_mask */ - 0, /* frequency */ - "TSC", /* name */ - 800, /* quality (adjusted in code) */ -}; - -void -init_TSC(void) -{ - u_int64_t tscval[2]; - - if (bootverbose) - printf("Calibrating TSC clock ... "); - - tscval[0] = rdtsc(); - DELAY(1000000); - tscval[1] = rdtsc(); - - tsc_freq = tscval[1] - tscval[0]; - if (bootverbose) - printf("TSC clock: %lu Hz\n", tsc_freq); - - /* - * Inform CPU accounting about our boot-time clock rate. Once the - * system is finished booting, we will get the real max clock rate - * via tsc_freq_max(). This also will be updated if someone loads - * a cpufreq driver after boot that discovers a new max frequency. - */ - set_cputicker(rdtsc, tsc_freq, 1); - - /* Register to find out about changes in CPU frequency. */ - tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, - tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); - tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, - tsc_freq_changed, NULL, EVENTHANDLER_PRI_FIRST); - tsc_levels_tag = EVENTHANDLER_REGISTER(cpufreq_levels_changed, - tsc_levels_changed, NULL, EVENTHANDLER_PRI_ANY); -} - -void -init_TSC_tc(void) -{ - -#ifdef SMP - /* - * We can not use the TSC in SMP mode unless the TSCs on all CPUs - * are somehow synchronized. Some hardware configurations do - * this, but we have no way of determining whether this is the - * case, so we do not use the TSC in multi-processor systems - * unless the user indicated (by setting kern.timecounter.smp_tsc - * to 1) that he believes that his TSCs are synchronized. - */ - if (mp_ncpus > 1 && !smp_tsc) - tsc_timecounter.tc_quality = -100; -#endif - - if (tsc_freq != 0 && !tsc_is_broken) { - tsc_timecounter.tc_frequency = tsc_freq; - tc_init(&tsc_timecounter); - } -} - -/* - * When cpufreq levels change, find out about the (new) max frequency. We - * use this to update CPU accounting in case it got a lower estimate at boot. - */ -static void -tsc_levels_changed(void *arg, int unit) -{ - device_t cf_dev; - struct cf_level *levels; - int count, error; - uint64_t max_freq; - - /* Only use values from the first CPU, assuming all are equal. */ - if (unit != 0) - return; - - /* Find the appropriate cpufreq device instance. */ - cf_dev = devclass_get_device(devclass_find("cpufreq"), unit); - if (cf_dev == NULL) { - printf("tsc_levels_changed() called but no cpufreq device?\n"); - return; - } - - /* Get settings from the device and find the max frequency. */ - count = 64; - levels = malloc(count * sizeof(*levels), M_TEMP, M_NOWAIT); - if (levels == NULL) - return; - error = CPUFREQ_LEVELS(cf_dev, levels, &count); - if (error == 0 && count != 0) { - max_freq = (uint64_t)levels[0].total_set.freq * 1000000; - set_cputicker(rdtsc, max_freq, 1); - } else - printf("tsc_levels_changed: no max freq found\n"); - free(levels, M_TEMP); -} - -/* - * If the TSC timecounter is in use, veto the pending change. It may be - * possible in the future to handle a dynamically-changing timecounter rate. - */ -static void -tsc_freq_changing(void *arg, const struct cf_level *level, int *status) -{ - - if (*status != 0 || timecounter != &tsc_timecounter || - tsc_is_invariant) - return; - - printf("timecounter TSC must not be in use when " - "changing frequencies; change denied\n"); - *status = EBUSY; -} - -/* Update TSC freq with the value indicated by the caller. */ -static void -tsc_freq_changed(void *arg, const struct cf_level *level, int status) -{ - /* - * If there was an error during the transition or - * TSC is P-state invariant, don't do anything. - */ - if (status != 0 || tsc_is_invariant) - return; - - /* Total setting for this level gives the new frequency in MHz. */ - tsc_freq = (uint64_t)level->total_set.freq * 1000000; - tsc_timecounter.tc_frequency = tsc_freq; -} - -static int -sysctl_machdep_tsc_freq(SYSCTL_HANDLER_ARGS) -{ - int error; - uint64_t freq; - - if (tsc_timecounter.tc_frequency == 0) - return (EOPNOTSUPP); - freq = tsc_freq; - error = sysctl_handle_quad(oidp, &freq, 0, req); - if (error == 0 && req->newptr != NULL) { - tsc_freq = freq; - tsc_timecounter.tc_frequency = tsc_freq; - } - return (error); -} - -SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_QUAD | CTLFLAG_RW, - 0, 0, sysctl_machdep_tsc_freq, "QU", ""); - -static unsigned -tsc_get_timecount(struct timecounter *tc) -{ - return (rdtsc()); -} diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index dafd376..b966393 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -339,15 +339,13 @@ cpu_set_syscall_retval(struct thread *td, int error) * Reconstruct pc, we know that 'syscall' is 2 bytes, * lcall $X,y is 7 bytes, int 0x80 is 2 bytes. * We saved this in tf_err. - * We have to do a full context restore so that %r10 - * (which was holding the value of %rcx) is restored + * %r10 (which was holding the value of %rcx) is restored * for the next iteration. - * r10 restore is only required for freebsd/amd64 processes, + * %r10 restore is only required for freebsd/amd64 processes, * but shall be innocent for any ia32 ABI. */ td->td_frame->tf_rip -= td->td_frame->tf_err; td->td_frame->tf_r10 = td->td_frame->tf_rcx; - td->td_pcb->pcb_flags |= PCB_FULLCTX; break; case EJUSTRETURN: diff --git a/sys/amd64/ia32/ia32_signal.c b/sys/amd64/ia32/ia32_signal.c index aefe9f0..e0f30e2 100644 --- a/sys/amd64/ia32/ia32_signal.c +++ b/sys/amd64/ia32/ia32_signal.c @@ -207,7 +207,6 @@ ia32_set_mcontext(struct thread *td, const struct ia32_mcontext *mcp) tp->tf_rflags = rflags; tp->tf_rsp = mcp->mc_esp; tp->tf_ss = mcp->mc_ss; - td->td_pcb->pcb_flags |= PCB_FULLCTX; td->td_pcb->pcb_full_iret = 1; return (0); } @@ -743,7 +742,7 @@ ia32_setregs(struct thread *td, struct image_params *imgp, u_long stack) fpstate_drop(td); /* Return via doreti so that we can change to a different %cs */ - pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT; + pcb->pcb_flags |= PCB_32BIT; pcb->pcb_flags &= ~PCB_GS32BIT; td->td_pcb->pcb_full_iret = 1; td->td_retval[1] = 0; diff --git a/sys/amd64/include/cpu.h b/sys/amd64/include/cpu.h index 1c2871f..3cc4af7 100644 --- a/sys/amd64/include/cpu.h +++ b/sys/amd64/include/cpu.h @@ -56,6 +56,7 @@ #ifdef _KERNEL extern char btext[]; extern char etext[]; +extern int tsc_present; void cpu_halt(void); void cpu_reset(void); diff --git a/sys/amd64/include/pcb.h b/sys/amd64/include/pcb.h index 1f4ff22..e226379 100644 --- a/sys/amd64/include/pcb.h +++ b/sys/amd64/include/pcb.h @@ -73,7 +73,6 @@ struct pcb { #define PCB_USERFPUINITDONE 0x10 /* fpu user state is initialized */ #define PCB_GS32BIT 0x20 /* linux gs switch */ #define PCB_32BIT 0x40 /* process has 32 bit context (segs etc) */ -#define PCB_FULLCTX 0x80 /* full context restore on sysret */ uint16_t pcb_initial_fpucw; diff --git a/sys/amd64/linux32/linux32_sysvec.c b/sys/amd64/linux32/linux32_sysvec.c index f8719c4..3ebb980 100644 --- a/sys/amd64/linux32/linux32_sysvec.c +++ b/sys/amd64/linux32/linux32_sysvec.c @@ -865,13 +865,13 @@ exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) regs->tf_flags = TF_HASSEGS; regs->tf_cs = _ucode32sel; regs->tf_rbx = imgp->ps_strings; - td->td_pcb->pcb_full_iret = 1; load_cr0(rcr0() | CR0_MP | CR0_TS); fpstate_drop(td); - /* Return via doreti so that we can change to a different %cs */ - pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT; + /* Do full restore on return so that we can change to a different %cs */ + pcb->pcb_flags |= PCB_32BIT; pcb->pcb_flags &= ~PCB_GS32BIT; + pcb->pcb_full_iret = 1; td->td_retval[1] = 0; } diff --git a/sys/arm/at91/at91_pmc.c b/sys/arm/at91/at91_pmc.c index 03a7a6e..bbd0ac9 100644 --- a/sys/arm/at91/at91_pmc.c +++ b/sys/arm/at91/at91_pmc.c @@ -325,7 +325,7 @@ at91_pmc_clock_disable(struct at91_pmc_clock *clk) static int at91_pmc_pll_rate(struct at91_pmc_clock *clk, uint32_t reg) { - uint32_t mul, div, freq;; + uint32_t mul, div, freq; freq = clk->parent->hz; div = (reg >> clk->pll_div_shift) & clk->pll_div_mask; diff --git a/sys/cddl/compat/opensolaris/sys/cyclic_impl.h b/sys/cddl/compat/opensolaris/sys/cyclic_impl.h index a195251..57bb167 100644 --- a/sys/cddl/compat/opensolaris/sys/cyclic_impl.h +++ b/sys/cddl/compat/opensolaris/sys/cyclic_impl.h @@ -288,7 +288,14 @@ typedef struct cyc_id { typedef struct cyc_xcallarg { cyc_cpu_t *cyx_cpu; - hrtime_t cyx_exp; + cyc_handler_t *cyx_hdlr; + cyc_time_t *cyx_when; + cyc_index_t cyx_ndx; + cyc_index_t *cyx_heap; + cyclic_t *cyx_cyclics; + cyc_index_t cyx_size; + uint16_t cyx_flags; + int cyx_wait; } cyc_xcallarg_t; #define CY_DEFAULT_PERCPU 1 diff --git a/sys/cddl/dev/cyclic/cyclic.c b/sys/cddl/dev/cyclic/cyclic.c index df0de6b..b9a6979 100644 --- a/sys/cddl/dev/cyclic/cyclic.c +++ b/sys/cddl/dev/cyclic/cyclic.c @@ -473,73 +473,6 @@ cyclic_expire(cyc_cpu_t *cpu, cyc_index_t ndx, cyclic_t *cyclic) (*handler)(arg); } -static void -cyclic_enable_xcall(void *v) -{ - cyc_xcallarg_t *argp = v; - cyc_cpu_t *cpu = argp->cyx_cpu; - cyc_backend_t *be = cpu->cyp_backend; - - be->cyb_enable(be->cyb_arg); -} - -static void -cyclic_enable(cyc_cpu_t *cpu) -{ - cyc_backend_t *be = cpu->cyp_backend; - cyc_xcallarg_t arg; - - arg.cyx_cpu = cpu; - - /* Cross call to the target CPU */ - be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, cyclic_enable_xcall, &arg); -} - -static void -cyclic_disable_xcall(void *v) -{ - cyc_xcallarg_t *argp = v; - cyc_cpu_t *cpu = argp->cyx_cpu; - cyc_backend_t *be = cpu->cyp_backend; - - be->cyb_disable(be->cyb_arg); -} - -static void -cyclic_disable(cyc_cpu_t *cpu) -{ - cyc_backend_t *be = cpu->cyp_backend; - cyc_xcallarg_t arg; - - arg.cyx_cpu = cpu; - - /* Cross call to the target CPU */ - be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, cyclic_disable_xcall, &arg); -} - -static void -cyclic_reprogram_xcall(void *v) -{ - cyc_xcallarg_t *argp = v; - cyc_cpu_t *cpu = argp->cyx_cpu; - cyc_backend_t *be = cpu->cyp_backend; - - be->cyb_reprogram(be->cyb_arg, argp->cyx_exp); -} - -static void -cyclic_reprogram(cyc_cpu_t *cpu, hrtime_t exp) -{ - cyc_backend_t *be = cpu->cyp_backend; - cyc_xcallarg_t arg; - - arg.cyx_cpu = cpu; - arg.cyx_exp = exp; - - /* Cross call to the target CPU */ - be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, cyclic_reprogram_xcall, &arg); -} - /* * cyclic_fire(cpu_t *) * @@ -570,17 +503,15 @@ static void cyclic_fire(cpu_t *c) { cyc_cpu_t *cpu = c->cpu_cyclic; - - mtx_lock_spin(&cpu->cyp_mtx); - + cyc_backend_t *be = cpu->cyp_backend; cyc_index_t *heap = cpu->cyp_heap; cyclic_t *cyclic, *cyclics = cpu->cyp_cyclics; + void *arg = be->cyb_arg; hrtime_t now = gethrtime(); hrtime_t exp; if (cpu->cyp_nelems == 0) { /* This is a spurious fire. */ - mtx_unlock_spin(&cpu->cyp_mtx); return; } @@ -631,8 +562,45 @@ cyclic_fire(cpu_t *c) * Now we have a cyclic in the root slot which isn't in the past; * reprogram the interrupt source. */ - cyclic_reprogram(cpu, exp); + be->cyb_reprogram(arg, exp); +} + +static void +cyclic_expand_xcall(cyc_xcallarg_t *arg) +{ + cyc_cpu_t *cpu = arg->cyx_cpu; + cyc_index_t new_size = arg->cyx_size, size = cpu->cyp_size, i; + cyc_index_t *new_heap = arg->cyx_heap; + cyclic_t *cyclics = cpu->cyp_cyclics, *new_cyclics = arg->cyx_cyclics; + + /* Disable preemption and interrupts. */ + mtx_lock_spin(&cpu->cyp_mtx); + + /* + * Assert that the new size is a power of 2. + */ + ASSERT((new_size & (new_size - 1)) == 0); + ASSERT(new_size == (size << 1)); + ASSERT(cpu->cyp_heap != NULL && cpu->cyp_cyclics != NULL); + + bcopy(cpu->cyp_heap, new_heap, sizeof (cyc_index_t) * size); + bcopy(cyclics, new_cyclics, sizeof (cyclic_t) * size); + + /* + * Set up the free list, and set all of the new cyclics to be CYF_FREE. + */ + for (i = size; i < new_size; i++) { + new_heap[i] = i; + new_cyclics[i].cy_flags = CYF_FREE; + } + /* + * We can go ahead and plow the value of cyp_heap and cyp_cyclics; + * cyclic_expand() has kept a copy. + */ + cpu->cyp_heap = new_heap; + cpu->cyp_cyclics = new_cyclics; + cpu->cyp_size = new_size; mtx_unlock_spin(&cpu->cyp_mtx); } @@ -643,102 +611,70 @@ cyclic_fire(cpu_t *c) static void cyclic_expand(cyc_cpu_t *cpu) { - cyc_index_t new_size, old_size, i; + cyc_index_t new_size, old_size; cyc_index_t *new_heap, *old_heap; cyclic_t *new_cyclics, *old_cyclics; + cyc_xcallarg_t arg; + cyc_backend_t *be = cpu->cyp_backend; ASSERT(MUTEX_HELD(&cpu_lock)); - if ((new_size = ((old_size = cpu->cyp_size) << 1)) == 0) + old_heap = cpu->cyp_heap; + old_cyclics = cpu->cyp_cyclics; + + if ((new_size = ((old_size = cpu->cyp_size) << 1)) == 0) { new_size = CY_DEFAULT_PERCPU; + ASSERT(old_heap == NULL && old_cyclics == NULL); + } /* * Check that the new_size is a power of 2. */ ASSERT(((new_size - 1) & new_size) == 0); - /* Unlock the mutex while allocating memory so we can wait... */ - mtx_unlock_spin(&cpu->cyp_mtx); - new_heap = malloc(sizeof(cyc_index_t) * new_size, M_CYCLIC, M_WAITOK); new_cyclics = malloc(sizeof(cyclic_t) * new_size, M_CYCLIC, M_ZERO | M_WAITOK); - /* Grab the lock again now we've got the memory... */ - mtx_lock_spin(&cpu->cyp_mtx); - - /* Check if another thread beat us while the mutex was unlocked. */ - if (old_size != cpu->cyp_size) { - /* Oh well, he won. */ - mtx_unlock_spin(&cpu->cyp_mtx); - - free(new_heap, M_CYCLIC); - free(new_cyclics, M_CYCLIC); - - mtx_lock_spin(&cpu->cyp_mtx); - return; - } - - old_heap = cpu->cyp_heap; - old_cyclics = cpu->cyp_cyclics; - - bcopy(cpu->cyp_heap, new_heap, sizeof (cyc_index_t) * old_size); - bcopy(old_cyclics, new_cyclics, sizeof (cyclic_t) * old_size); - - /* - * Set up the free list, and set all of the new cyclics to be CYF_FREE. - */ - for (i = old_size; i < new_size; i++) { - new_heap[i] = i; - new_cyclics[i].cy_flags = CYF_FREE; - } + arg.cyx_cpu = cpu; + arg.cyx_heap = new_heap; + arg.cyx_cyclics = new_cyclics; + arg.cyx_size = new_size; - /* - * We can go ahead and plow the value of cyp_heap and cyp_cyclics; - * cyclic_expand() has kept a copy. - */ - cpu->cyp_heap = new_heap; - cpu->cyp_cyclics = new_cyclics; - cpu->cyp_size = new_size; + be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, + (cyc_func_t)cyclic_expand_xcall, &arg); if (old_cyclics != NULL) { ASSERT(old_heap != NULL); ASSERT(old_size != 0); - mtx_unlock_spin(&cpu->cyp_mtx); - free(old_cyclics, M_CYCLIC); free(old_heap, M_CYCLIC); - - mtx_lock_spin(&cpu->cyp_mtx); } } -static cyc_index_t -cyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr, - cyc_time_t *when, uint16_t flags) +static void +cyclic_add_xcall(cyc_xcallarg_t *arg) { + cyc_cpu_t *cpu = arg->cyx_cpu; + cyc_handler_t *hdlr = arg->cyx_hdlr; + cyc_time_t *when = arg->cyx_when; + cyc_backend_t *be = cpu->cyp_backend; cyc_index_t ndx, nelems; + cyb_arg_t bar = be->cyb_arg; cyclic_t *cyclic; - ASSERT(MUTEX_HELD(&cpu_lock)); - - mtx_lock_spin(&cpu->cyp_mtx); - - ASSERT(!(cpu->cyp_cpu->cpu_flags & CPU_OFFLINE)); - ASSERT(when->cyt_when >= 0 && when->cyt_interval > 0); - - while (cpu->cyp_nelems == cpu->cyp_size) - cyclic_expand(cpu); - ASSERT(cpu->cyp_nelems < cpu->cyp_size); + /* Disable preemption and interrupts. */ + mtx_lock_spin(&cpu->cyp_mtx); nelems = cpu->cyp_nelems++; - if (nelems == 0) + if (nelems == 0) { /* * If this is the first element, we need to enable the * backend on this CPU. */ - cyclic_enable(cpu); + be->cyb_enable(bar); + } ndx = cpu->cyp_heap[nelems]; cyclic = &cpu->cyp_cyclics[ndx]; @@ -746,14 +682,20 @@ cyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr, ASSERT(cyclic->cy_flags == CYF_FREE); cyclic->cy_interval = when->cyt_interval; - if (when->cyt_when == 0) - cyclic->cy_expire = gethrtime() + cyclic->cy_interval; - else + if (when->cyt_when == 0) { + /* + * If a start time hasn't been explicitly specified, we'll + * start on the next interval boundary. + */ + cyclic->cy_expire = (gethrtime() / cyclic->cy_interval + 1) * + cyclic->cy_interval; + } else { cyclic->cy_expire = when->cyt_when; + } cyclic->cy_handler = hdlr->cyh_func; cyclic->cy_arg = hdlr->cyh_arg; - cyclic->cy_flags = flags; + cyclic->cy_flags = arg->cyx_flags; if (cyclic_upheap(cpu, nelems)) { hrtime_t exp = cyclic->cy_expire; @@ -762,31 +704,63 @@ cyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr, * If our upheap propagated to the root, we need to * reprogram the interrupt source. */ - cyclic_reprogram(cpu, exp); + be->cyb_reprogram(bar, exp); } - mtx_unlock_spin(&cpu->cyp_mtx); - return (ndx); + arg->cyx_ndx = ndx; } - -static int -cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) +static cyc_index_t +cyclic_add_here(cyc_cpu_t *cpu, cyc_handler_t *hdlr, + cyc_time_t *when, uint16_t flags) { - cyc_index_t nelems, i; - cyclic_t *cyclic; - cyc_index_t *heap, last; + cyc_backend_t *be = cpu->cyp_backend; + cyb_arg_t bar = be->cyb_arg; + cyc_xcallarg_t arg; ASSERT(MUTEX_HELD(&cpu_lock)); - ASSERT(wait == CY_WAIT || wait == CY_NOWAIT); + ASSERT(!(cpu->cyp_cpu->cpu_flags & CPU_OFFLINE)); + ASSERT(when->cyt_when >= 0 && when->cyt_interval > 0); - mtx_lock_spin(&cpu->cyp_mtx); + if (cpu->cyp_nelems == cpu->cyp_size) { + /* + * This is expensive; it will cross call onto the other + * CPU to perform the expansion. + */ + cyclic_expand(cpu); + ASSERT(cpu->cyp_nelems < cpu->cyp_size); + } - heap = cpu->cyp_heap; + /* + * By now, we know that we're going to be able to successfully + * perform the add. Now cross call over to the CPU of interest to + * actually add our cyclic. + */ + arg.cyx_cpu = cpu; + arg.cyx_hdlr = hdlr; + arg.cyx_when = when; + arg.cyx_flags = flags; + + be->cyb_xcall(bar, cpu->cyp_cpu, (cyc_func_t)cyclic_add_xcall, &arg); + + return (arg.cyx_ndx); +} + +static void +cyclic_remove_xcall(cyc_xcallarg_t *arg) +{ + cyc_cpu_t *cpu = arg->cyx_cpu; + cyc_backend_t *be = cpu->cyp_backend; + cyb_arg_t bar = be->cyb_arg; + cyc_index_t ndx = arg->cyx_ndx, nelems = cpu->cyp_nelems, i; + cyc_index_t *heap = cpu->cyp_heap, last; + cyclic_t *cyclic; - nelems = cpu->cyp_nelems; + ASSERT(nelems > 0); + /* Disable preemption and interrupts. */ + mtx_lock_spin(&cpu->cyp_mtx); cyclic = &cpu->cyp_cyclics[ndx]; /* @@ -794,11 +768,17 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) * removed as part of a juggling operation, the expiration time * will be used when the cyclic is added to the new CPU. */ - if (when != NULL) { - when->cyt_when = cyclic->cy_expire; - when->cyt_interval = cyclic->cy_interval; + if (arg->cyx_when != NULL) { + arg->cyx_when->cyt_when = cyclic->cy_expire; + arg->cyx_when->cyt_interval = cyclic->cy_interval; } + /* + * Now set the flags to CYF_FREE. We don't need a membar_enter() + * between zeroing pend and setting the flags because we're at + * CY_HIGH_LEVEL (that is, the zeroing of pend and the setting + * of cy_flags appear atomic to softints). + */ cyclic->cy_flags = CYF_FREE; for (i = 0; i < nelems; i++) { @@ -811,19 +791,21 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) cpu->cyp_nelems = --nelems; - if (nelems == 0) + if (nelems == 0) { /* * If we just removed the last element, then we need to * disable the backend on this CPU. */ - cyclic_disable(cpu); + be->cyb_disable(bar); + } - if (i == nelems) + if (i == nelems) { /* * If we just removed the last element of the heap, then * we don't have to downheap. */ - goto done; + goto out; + } /* * Swap the last element of the heap with the one we want to @@ -833,17 +815,18 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) heap[i] = (last = heap[nelems]); heap[nelems] = ndx; - if (i == 0) + if (i == 0) { cyclic_downheap(cpu, 0); - else { + } else { if (cyclic_upheap(cpu, i) == 0) { /* * The upheap didn't propagate to the root; if it * didn't propagate at all, we need to downheap. */ - if (heap[i] == last) + if (heap[i] == last) { cyclic_downheap(cpu, i); - goto done; + } + goto out; } } @@ -854,10 +837,27 @@ cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) cyclic = &cpu->cyp_cyclics[heap[0]]; ASSERT(nelems != 0); - cyclic_reprogram(cpu, cyclic->cy_expire); - -done: + be->cyb_reprogram(bar, cyclic->cy_expire); +out: mtx_unlock_spin(&cpu->cyp_mtx); +} + +static int +cyclic_remove_here(cyc_cpu_t *cpu, cyc_index_t ndx, cyc_time_t *when, int wait) +{ + cyc_backend_t *be = cpu->cyp_backend; + cyc_xcallarg_t arg; + + ASSERT(MUTEX_HELD(&cpu_lock)); + ASSERT(wait == CY_WAIT || wait == CY_NOWAIT); + + arg.cyx_ndx = ndx; + arg.cyx_cpu = cpu; + arg.cyx_when = when; + arg.cyx_wait = wait; + + be->cyb_xcall(be->cyb_arg, cpu->cyp_cpu, + (cyc_func_t)cyclic_remove_xcall, &arg); return (1); } @@ -1214,15 +1214,10 @@ cyclic_add_omni(cyc_omni_handler_t *omni) idp->cyi_omni_hdlr = *omni; - for (i = 0; i < MAXCPU; i++) { - if (pcpu_find(i) == NULL) - continue; - + CPU_FOREACH(i) { c = &solaris_cpu[i]; - if ((cpu = c->cpu_cyclic) == NULL) continue; - cyclic_omni_start(idp, cpu); } @@ -1325,12 +1320,8 @@ cyclic_mp_init(void) mutex_enter(&cpu_lock); - for (i = 0; i <= mp_maxid; i++) { - if (pcpu_find(i) == NULL) - continue; - + CPU_FOREACH(i) { c = &solaris_cpu[i]; - if (c->cpu_cyclic == NULL) cyclic_configure(c); } @@ -1346,10 +1337,8 @@ cyclic_uninit(void) CPU_FOREACH(id) { c = &solaris_cpu[id]; - if (c->cpu_cyclic == NULL) continue; - cyclic_unconfigure(c); } diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c index 0b6ab59..1d0864a 100644 --- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c +++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c @@ -121,13 +121,7 @@ static void reprogram(cyb_arg_t arg, hrtime_t exp) static void xcall(cyb_arg_t arg, cpu_t *c, cyc_func_t func, void *param) { - /* - * If the target CPU is the current one, just call the - * function. This covers the non-SMP case. - */ - if (c == &solaris_cpu[curcpu]) - (*func)(param); - else - smp_rendezvous_cpus((cpumask_t) (1 << c->cpuid), NULL, - func, smp_no_rendevous_barrier, param); + + smp_rendezvous_cpus((cpumask_t) (1 << c->cpuid), NULL, + func, smp_no_rendevous_barrier, param); } diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c index b4c7eaf..f86f535 100644 --- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c @@ -115,26 +115,13 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { cpumask_t cpus; - critical_enter(); - if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t) (1 << cpu); - - /* If the current CPU is in the set, call the function directly: */ - if ((cpus & (1 << curcpu)) != 0) { - (*func)(arg); - - /* Mask the current CPU from the set */ - cpus &= ~(1 << curcpu); - } - - /* If there are any CPUs in the set, cross-call to those CPUs */ - if (cpus != 0) - smp_rendezvous_cpus(cpus, NULL, func, smp_no_rendevous_barrier, arg); + cpus = (cpumask_t)1 << cpu; - critical_exit(); + smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, + smp_no_rendevous_barrier, arg); } static void @@ -405,6 +392,7 @@ dtrace_gethrtime_init_cpu(void *arg) static void dtrace_gethrtime_init(void *arg) { + struct pcpu *pc; uint64_t tsc_f; cpumask_t map; int i; @@ -437,15 +425,14 @@ dtrace_gethrtime_init(void *arg) nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f; /* The current CPU is the reference one. */ + sched_pin(); tsc_skew[curcpu] = 0; - CPU_FOREACH(i) { if (i == curcpu) continue; - map = 0; - map |= (1 << curcpu); - map |= (1 << i); + pc = pcpu_find(i); + map = PCPU_GET(cpumask) | pc->pc_cpumask; smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, dtrace_gethrtime_init_cpu, @@ -453,6 +440,7 @@ dtrace_gethrtime_init(void *arg) tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; } + sched_unpin(); } SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, NULL); diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c index 9d85873..be23808 100644 --- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c @@ -115,26 +115,13 @@ dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { cpumask_t cpus; - critical_enter(); - if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t) (1 << cpu); - - /* If the current CPU is in the set, call the function directly: */ - if ((cpus & (1 << curcpu)) != 0) { - (*func)(arg); - - /* Mask the current CPU from the set */ - cpus &= ~(1 << curcpu); - } - - /* If there are any CPUs in the set, cross-call to those CPUs */ - if (cpus != 0) - smp_rendezvous_cpus(cpus, NULL, func, smp_no_rendevous_barrier, arg); + cpus = (cpumask_t)1 << cpu; - critical_exit(); + smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, + smp_no_rendevous_barrier, arg); } static void @@ -405,6 +392,7 @@ dtrace_gethrtime_init_cpu(void *arg) static void dtrace_gethrtime_init(void *arg) { + struct pcpu *pc; uint64_t tsc_f; cpumask_t map; int i; @@ -437,15 +425,14 @@ dtrace_gethrtime_init(void *arg) nsec_scale = ((uint64_t)NANOSEC << SCALE_SHIFT) / tsc_f; /* The current CPU is the reference one. */ + sched_pin(); tsc_skew[curcpu] = 0; - CPU_FOREACH(i) { if (i == curcpu) continue; - map = 0; - map |= (1 << curcpu); - map |= (1 << i); + pc = pcpu_find(i); + map = PCPU_GET(cpumask) | pc->pc_cpumask; smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync, dtrace_gethrtime_init_cpu, @@ -453,6 +440,7 @@ dtrace_gethrtime_init(void *arg) tsc_skew[i] = tgt_cpu_tsc - hst_cpu_tsc; } + sched_unpin(); } SYSINIT(dtrace_gethrtime_init, SI_SUB_SMP, SI_ORDER_ANY, dtrace_gethrtime_init, NULL); diff --git a/sys/compat/ndis/kern_ndis.c b/sys/compat/ndis/kern_ndis.c index 321125b..302573a 100644 --- a/sys/compat/ndis/kern_ndis.c +++ b/sys/compat/ndis/kern_ndis.c @@ -433,6 +433,19 @@ ndis_flush_sysctls(arg) return (0); } +void * +ndis_get_routine_address(functbl, name) + struct image_patch_table *functbl; + char *name; +{ + int i; + + for (i = 0; functbl[i].ipt_name != NULL; i++) + if (strcmp(name, functbl[i].ipt_name) == 0) + return (functbl[i].ipt_wrap); + return (NULL); +} + static void ndis_return(dobj, arg) device_object *dobj; diff --git a/sys/compat/ndis/ndis_var.h b/sys/compat/ndis/ndis_var.h index a66a1d7..2692e54 100644 --- a/sys/compat/ndis/ndis_var.h +++ b/sys/compat/ndis/ndis_var.h @@ -1729,6 +1729,7 @@ extern int ndis_mtop(struct mbuf *, ndis_packet **); extern int ndis_ptom(struct mbuf **, ndis_packet *); extern int ndis_get_info(void *, ndis_oid, void *, int *); extern int ndis_set_info(void *, ndis_oid, void *, int *); +extern void *ndis_get_routine_address(struct image_patch_table *, char *); extern int ndis_get_supported_oids(void *, ndis_oid **, int *); extern int ndis_send_packets(void *, ndis_packet **, int); extern int ndis_send_packet(void *, ndis_packet *); diff --git a/sys/compat/ndis/ntoskrnl_var.h b/sys/compat/ndis/ntoskrnl_var.h index 84c2a7f..2642626 100644 --- a/sys/compat/ndis/ntoskrnl_var.h +++ b/sys/compat/ndis/ntoskrnl_var.h @@ -1466,6 +1466,7 @@ extern uint32_t IoConnectInterrupt(kinterrupt **, void *, void *, kspin_lock *, uint32_t, uint8_t, uint8_t, uint8_t, uint8_t, uint32_t, uint8_t); extern uint8_t MmIsAddressValid(void *); +extern void *MmGetSystemRoutineAddress(unicode_string *); extern void *MmMapIoSpace(uint64_t, uint32_t, uint32_t); extern void MmUnmapIoSpace(void *, size_t); extern void MmBuildMdlForNonPagedPool(mdl *); diff --git a/sys/compat/ndis/subr_ndis.c b/sys/compat/ndis/subr_ndis.c index a5caa88..4bdb6ef 100644 --- a/sys/compat/ndis/subr_ndis.c +++ b/sys/compat/ndis/subr_ndis.c @@ -197,6 +197,7 @@ static ndis_status NdisMMapIoSpace(void **, ndis_handle, ndis_physaddr, uint32_t); static void NdisMUnmapIoSpace(ndis_handle, void *, uint32_t); static uint32_t NdisGetCacheFillSize(void); +static void *NdisGetRoutineAddress(unicode_string *); static uint32_t NdisMGetDmaAlignment(ndis_handle); static ndis_status NdisMInitializeScatterGatherDma(ndis_handle, uint8_t, uint32_t); @@ -1642,6 +1643,17 @@ NdisGetCacheFillSize(void) return (128); } +static void * +NdisGetRoutineAddress(ustr) + unicode_string *ustr; +{ + ansi_string astr; + + if (RtlUnicodeStringToAnsiString(&astr, ustr, TRUE)) + return (NULL); + return (ndis_get_routine_address(ndis_functbl, astr.as_buf)); +} + static uint32_t NdisMGetDmaAlignment(handle) ndis_handle handle; @@ -3246,6 +3258,7 @@ image_patch_table ndis_functbl[] = { IMPORT_SFUNC(NdisInitializeString, 2), IMPORT_SFUNC(NdisFreeString, 1), IMPORT_SFUNC(NdisGetCurrentSystemTime, 1), + IMPORT_SFUNC(NdisGetRoutineAddress, 1), IMPORT_SFUNC(NdisGetSystemUpTime, 1), IMPORT_SFUNC(NdisGetVersion, 0), IMPORT_SFUNC(NdisMSynchronizeWithInterrupt, 3), diff --git a/sys/compat/ndis/subr_ntoskrnl.c b/sys/compat/ndis/subr_ntoskrnl.c index c523f8b..17016e0 100644 --- a/sys/compat/ndis/subr_ntoskrnl.c +++ b/sys/compat/ndis/subr_ntoskrnl.c @@ -2589,6 +2589,17 @@ MmGetPhysicalAddress(void *base) return (pmap_extract(kernel_map->pmap, (vm_offset_t)base)); } +void * +MmGetSystemRoutineAddress(ustr) + unicode_string *ustr; +{ + ansi_string astr; + + if (RtlUnicodeStringToAnsiString(&astr, ustr, TRUE)) + return (NULL); + return (ndis_get_routine_address(ntoskrnl_functbl, astr.as_buf)); +} + uint8_t MmIsAddressValid(vaddr) void *vaddr; @@ -4382,6 +4393,7 @@ image_patch_table ntoskrnl_functbl[] = { IMPORT_SFUNC(MmUnmapLockedPages, 2), IMPORT_SFUNC(MmBuildMdlForNonPagedPool, 1), IMPORT_SFUNC(MmGetPhysicalAddress, 1), + IMPORT_SFUNC(MmGetSystemRoutineAddress, 1), IMPORT_SFUNC(MmIsAddressValid, 1), IMPORT_SFUNC(MmMapIoSpace, 3 + 1), IMPORT_SFUNC(MmUnmapIoSpace, 2), diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64 index 470aca5..2b56a15 100644 --- a/sys/conf/files.amd64 +++ b/sys/conf/files.amd64 @@ -97,7 +97,6 @@ amd64/amd64/atomic.c standard amd64/amd64/autoconf.c standard amd64/amd64/bios.c standard amd64/amd64/bpf_jit_machdep.c optional bpf_jitter -amd64/amd64/busdma_machdep.c standard amd64/amd64/cpu_switch.S standard amd64/amd64/db_disasm.c optional ddb amd64/amd64/db_interface.c optional ddb @@ -126,7 +125,6 @@ amd64/amd64/stack_machdep.c optional ddb | stack amd64/amd64/support.S standard amd64/amd64/sys_machdep.c standard amd64/amd64/trap.c standard -amd64/amd64/tsc.c standard amd64/amd64/uio_machdep.c standard amd64/amd64/uma_machdep.c standard amd64/amd64/vm_machdep.c standard @@ -318,6 +316,7 @@ x86/isa/isa_dma.c standard x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/qpi.c standard +x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/io_apic.c standard x86/x86/local_apic.c standard @@ -326,3 +325,4 @@ x86/x86/mptable.c optional mptable x86/x86/mptable_pci.c optional mptable pci x86/x86/msi.c optional pci x86/x86/nexus.c standard +x86/x86/tsc.c standard diff --git a/sys/conf/files.i386 b/sys/conf/files.i386 index c6a3260..179306b 100644 --- a/sys/conf/files.i386 +++ b/sys/conf/files.i386 @@ -272,7 +272,6 @@ i386/i386/autoconf.c standard i386/i386/bios.c optional native i386/i386/bioscall.s optional native i386/i386/bpf_jit_machdep.c optional bpf_jitter -i386/i386/busdma_machdep.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb @@ -312,7 +311,6 @@ i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard -i386/i386/tsc.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard @@ -407,6 +405,7 @@ x86/isa/isa_dma.c optional isa x86/isa/nmi.c standard x86/isa/orm.c optional isa x86/pci/qpi.c standard +x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/local_apic.c optional apic @@ -415,3 +414,4 @@ x86/x86/mptable.c optional apic native x86/x86/mptable_pci.c optional apic pci x86/x86/msi.c optional apic pci x86/x86/nexus.c standard +x86/x86/tsc.c standard diff --git a/sys/conf/files.mips b/sys/conf/files.mips index 61a9295..e5949d2 100644 --- a/sys/conf/files.mips +++ b/sys/conf/files.mips @@ -63,6 +63,7 @@ mips/mips/support.S standard mips/mips/sys_machdep.c standard mips/mips/swtch.S standard mips/mips/uio_machdep.c standard +mips/mips/uma_machdep.c standard crypto/blowfish/bf_enc.c optional crypto | ipsec crypto/des/des_enc.c optional crypto | ipsec | netsmb geom/geom_bsd.c optional geom_bsd diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98 index ab88e22..158fbbd 100644 --- a/sys/conf/files.pc98 +++ b/sys/conf/files.pc98 @@ -134,7 +134,6 @@ i386/i386/autoconf.c standard i386/i386/bios.c standard i386/i386/bioscall.s standard i386/i386/bpf_jit_machdep.c optional bpf_jitter -i386/i386/busdma_machdep.c standard i386/i386/db_disasm.c optional ddb i386/i386/db_interface.c optional ddb i386/i386/db_trace.c optional ddb @@ -164,7 +163,6 @@ i386/i386/support.s standard i386/i386/swtch.s standard i386/i386/sys_machdep.c standard i386/i386/trap.c standard -i386/i386/tsc.c standard i386/i386/uio_machdep.c standard i386/i386/vm86.c standard i386/i386/vm_machdep.c standard @@ -251,6 +249,7 @@ pc98/pc98/pc98_machdep.c standard x86/isa/atpic.c optional atpic x86/isa/clock.c standard x86/isa/isa.c optional isa +x86/x86/busdma_machdep.c standard x86/x86/dump_machdep.c standard x86/x86/io_apic.c optional apic x86/x86/local_apic.c optional apic @@ -259,3 +258,4 @@ x86/x86/mptable.c optional apic x86/x86/mptable_pci.c optional apic pci x86/x86/msi.c optional apic pci x86/x86/nexus.c standard +x86/x86/tsc.c standard diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index d46b39f..4856715 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -83,6 +83,8 @@ powerpc/aim/locore.S optional aim no-obj powerpc/aim/machdep.c optional aim powerpc/aim/mmu_oea.c optional aim powerpc powerpc/aim/mmu_oea64.c optional aim +powerpc/aim/moea64_if.m optional aim +powerpc/aim/moea64_native.c optional aim powerpc/aim/mp_cpudep.c optional aim smp powerpc/aim/nexus.c optional aim powerpc/aim/ofwmagic.S optional aim @@ -177,6 +179,7 @@ powerpc/powerpc/fuswintr.c standard powerpc/powerpc/gdb_machdep.c optional gdb powerpc/powerpc/in_cksum.c optional inet powerpc/powerpc/intr_machdep.c standard +powerpc/powerpc/iommu_if.m standard powerpc/powerpc/mem.c optional mem powerpc/powerpc/mmu_if.m standard powerpc/powerpc/mp_machdep.c optional smp diff --git a/sys/dev/aac/aac_cam.c b/sys/dev/aac/aac_cam.c index 44f4e19..130cd20 100644 --- a/sys/dev/aac/aac_cam.c +++ b/sys/dev/aac/aac_cam.c @@ -587,7 +587,8 @@ aac_cam_complete(struct aac_command *cm) (device == T_PROCESSOR) || (sc->flags & AAC_FLAGS_CAM_PASSONLY)) ccb->csio.data_ptr[0] = - ((device & 0xe0) | T_NODEVICE); + ((ccb->csio.data_ptr[0] & 0xe0) | + T_NODEVICE); } else if (ccb->ccb_h.status == CAM_SEL_TIMEOUT && ccb->ccb_h.target_lun != 0) { /* fix for INQUIRYs on Lun>0 */ diff --git a/sys/dev/acpica/acpi_hpet.c b/sys/dev/acpica/acpi_hpet.c index efe5747..97fe991 100644 --- a/sys/dev/acpica/acpi_hpet.c +++ b/sys/dev/acpica/acpi_hpet.c @@ -303,6 +303,23 @@ hpet_find(ACPI_HANDLE handle, UINT32 level, void *context, return (AE_OK); } +/* + * Find an existing IRQ resource that matches the requested IRQ range + * and return its RID. If one is not found, use a new RID. + */ +static int +hpet_find_irq_rid(device_t dev, u_long start, u_long end) +{ + u_long irq; + int error, rid; + + for (rid = 0;; rid++) { + error = bus_get_resource(dev, SYS_RES_IRQ, rid, &irq, NULL); + if (error != 0 || (start <= irq && irq <= end)) + return (rid); + } +} + /* Discover the HPET via the ACPI table of the same name. */ static void hpet_identify(driver_t *driver, device_t parent) @@ -540,6 +557,7 @@ hpet_attach(device_t dev) dvectors &= ~(1 << t->irq); } if (t->irq >= 0) { + t->intr_rid = hpet_find_irq_rid(dev, t->irq, t->irq); if (!(t->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &t->intr_rid, t->irq, t->irq, 1, RF_ACTIVE))) { @@ -590,12 +608,12 @@ hpet_attach(device_t dev) } bus_write_4(sc->mem_res, HPET_ISR, 0xffffffff); sc->irq = -1; - sc->intr_rid = -1; /* If at least one timer needs legacy IRQ - set it up. */ if (sc->useirq) { j = i = fls(cvectors) - 1; while (j > 0 && (cvectors & (1 << (j - 1))) != 0) j--; + sc->intr_rid = hpet_find_irq_rid(dev, j, i); if (!(sc->intr_res = bus_alloc_resource(dev, SYS_RES_IRQ, &sc->intr_rid, j, i, 1, RF_SHAREABLE | RF_ACTIVE))) device_printf(dev,"Can't map interrupt.\n"); diff --git a/sys/dev/ahci/ahci.c b/sys/dev/ahci/ahci.c index 4b93d9b..f767215 100644 --- a/sys/dev/ahci/ahci.c +++ b/sys/dev/ahci/ahci.c @@ -172,6 +172,10 @@ static struct { {0x614511ab, 0x00, "Marvell 88SX6145", AHCI_Q_NOFORCE|AHCI_Q_4CH|AHCI_Q_EDGEIS}, {0x91231b4b, 0x11, "Marvell 88SE912x", AHCI_Q_NOBSYRES}, {0x91231b4b, 0x00, "Marvell 88SE912x", AHCI_Q_EDGEIS|AHCI_Q_SATA2|AHCI_Q_NOBSYRES}, + {0x06201103, 0x00, "HighPoint RocketRAID 620", AHCI_Q_NOBSYRES}, + {0x06201b4b, 0x00, "HighPoint RocketRAID 620", AHCI_Q_NOBSYRES}, + {0x06221103, 0x00, "HighPoint RocketRAID 622", AHCI_Q_NOBSYRES}, + {0x06221b4b, 0x00, "HighPoint RocketRAID 622", AHCI_Q_NOBSYRES}, {0x044c10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044d10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, {0x044e10de, 0x00, "NVIDIA MCP65", AHCI_Q_NOAA}, diff --git a/sys/dev/bwn/if_bwn.c b/sys/dev/bwn/if_bwn.c index 587b734..b2f72f5 100644 --- a/sys/dev/bwn/if_bwn.c +++ b/sys/dev/bwn/if_bwn.c @@ -2882,7 +2882,7 @@ bwn_set_channel(struct ieee80211com *ic) error = bwn_switch_band(sc, ic->ic_curchan); if (error) - goto fail;; + goto fail; bwn_mac_suspend(mac); bwn_set_txretry(mac, BWN_RETRY_SHORT, BWN_RETRY_LONG); chan = ieee80211_chan2ieee(ic, ic->ic_curchan); @@ -8260,7 +8260,7 @@ bwn_switch_band(struct bwn_softc *sc, struct ieee80211_channel *chan) device_printf(sc->sc_dev, "switching to %s-GHz band\n", IEEE80211_IS_CHAN_2GHZ(chan) ? "2" : "5"); - down_dev = sc->sc_curmac;; + down_dev = sc->sc_curmac; status = down_dev->mac_status; if (status >= BWN_MAC_STATUS_STARTED) bwn_core_stop(down_dev); diff --git a/sys/dev/e1000/if_em.c b/sys/dev/e1000/if_em.c index 6728a98..4cfda7f 100644 --- a/sys/dev/e1000/if_em.c +++ b/sys/dev/e1000/if_em.c @@ -525,7 +525,7 @@ em_attach(device_t dev) /* Sysctl for setting the interface flow control */ em_set_flow_cntrl(adapter, "flow_control", - "max number of rx packets to process", + "configure flow control", &adapter->fc_setting, em_fc_setting); /* @@ -3751,46 +3751,43 @@ em_refresh_mbufs(struct rx_ring *rxr, int limit) cleaned = -1; while (i != limit) { rxbuf = &rxr->rx_buffers[i]; - /* - ** Just skip entries with a buffer, - ** they can only be due to an error - ** and are to be reused. - */ - if (rxbuf->m_head != NULL) - goto reuse; - m = m_getjcl(M_DONTWAIT, MT_DATA, - M_PKTHDR, adapter->rx_mbuf_sz); - /* - ** If we have a temporary resource shortage - ** that causes a failure, just abort refresh - ** for now, we will return to this point when - ** reinvoked from em_rxeof. - */ - if (m == NULL) - goto update; + if (rxbuf->m_head == NULL) { + m = m_getjcl(M_DONTWAIT, MT_DATA, + M_PKTHDR, adapter->rx_mbuf_sz); + /* + ** If we have a temporary resource shortage + ** that causes a failure, just abort refresh + ** for now, we will return to this point when + ** reinvoked from em_rxeof. + */ + if (m == NULL) + goto update; + } else + m = rxbuf->m_head; + m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; + m->m_flags |= M_PKTHDR; + m->m_data = m->m_ext.ext_buf; /* Use bus_dma machinery to setup the memory mapping */ error = bus_dmamap_load_mbuf_sg(rxr->rxtag, rxbuf->map, m, segs, &nsegs, BUS_DMA_NOWAIT); if (error != 0) { + printf("Refresh mbufs: hdr dmamap load" + " failure - %d\n", error); m_free(m); + rxbuf->m_head = NULL; goto update; } - - /* If nsegs is wrong then the stack is corrupt. */ - KASSERT(nsegs == 1, ("Too many segments returned!")); - + rxbuf->m_head = m; bus_dmamap_sync(rxr->rxtag, rxbuf->map, BUS_DMASYNC_PREREAD); - rxbuf->m_head = m; rxr->rx_base[i].buffer_addr = htole64(segs[0].ds_addr); -reuse: + cleaned = i; /* Calculate next index */ if (++i == adapter->num_rx_desc) i = 0; - /* This is the work marker for refresh */ rxr->next_to_refresh = i; } update: @@ -4208,8 +4205,8 @@ em_rxeof(struct rx_ring *rxr, int count, int *done) len = le16toh(cur->length); eop = (status & E1000_RXD_STAT_EOP) != 0; - if ((rxr->discard == TRUE) || (cur->errors & - E1000_RXD_ERR_FRAME_ERR_MASK)) { + if ((cur->errors & E1000_RXD_ERR_FRAME_ERR_MASK) || + (rxr->discard == TRUE)) { ifp->if_ierrors++; ++rxr->rx_discarded; if (!eop) /* Catch subsequent segs */ @@ -4306,9 +4303,7 @@ next_desc: static __inline void em_rx_discard(struct rx_ring *rxr, int i) { - struct adapter *adapter = rxr->adapter; struct em_buffer *rbuf; - struct mbuf *m; rbuf = &rxr->rx_buffers[i]; /* Free any previous pieces */ @@ -4318,14 +4313,14 @@ em_rx_discard(struct rx_ring *rxr, int i) rxr->fmp = NULL; rxr->lmp = NULL; } - - /* Reset state, keep loaded DMA map and reuse */ - m = rbuf->m_head; - m->m_len = m->m_pkthdr.len = adapter->rx_mbuf_sz; - m->m_flags |= M_PKTHDR; - m->m_data = m->m_ext.ext_buf; - m->m_next = NULL; - + /* + ** Free buffer and allow em_refresh_mbufs() + ** to clean up and recharge buffer. + */ + if (rbuf->m_head) { + m_free(rbuf->m_head); + rbuf->m_head = NULL; + } return; } diff --git a/sys/dev/e1000/if_igb.c b/sys/dev/e1000/if_igb.c index 14ccede..ff3c2cf 100644 --- a/sys/dev/e1000/if_igb.c +++ b/sys/dev/e1000/if_igb.c @@ -3429,8 +3429,6 @@ igb_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp) case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); - if (mp->m_len < ehdrlen + ip_hlen) - return (FALSE); ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= E1000_ADVTXD_TUCMD_IPV6; break; diff --git a/sys/dev/ichwd/ichwd.c b/sys/dev/ichwd/ichwd.c index 9ddec20..c64468a 100644 --- a/sys/dev/ichwd/ichwd.c +++ b/sys/dev/ichwd/ichwd.c @@ -288,30 +288,23 @@ static __inline void ichwd_tmr_set(struct ichwd_softc *sc, unsigned int timeout) { - /* - * If the datasheets are to be believed, the minimum value - * actually varies from chipset to chipset - 4 for ICH5 and 2 for - * all other chipsets. I suspect this is a bug in the ICH5 - * datasheet and that the minimum is uniformly 2, but I'd rather - * err on the side of caution. - */ - if (timeout < 4) - timeout = 4; + if (timeout < TCO_RLD_TMR_MIN) + timeout = TCO_RLD_TMR_MIN; if (sc->ich_version <= 5) { uint8_t tmr_val8 = ichwd_read_tco_1(sc, TCO_TMR1); - tmr_val8 &= 0xc0; - if (timeout > 0x3f) - timeout = 0x3f; + tmr_val8 &= (~TCO_RLD1_TMR_MAX & 0xff); + if (timeout > TCO_RLD1_TMR_MAX) + timeout = TCO_RLD1_TMR_MAX; tmr_val8 |= timeout; ichwd_write_tco_1(sc, TCO_TMR1, tmr_val8); } else { uint16_t tmr_val16 = ichwd_read_tco_2(sc, TCO_TMR2); - tmr_val16 &= 0xfc00; - if (timeout > 0x03ff) - timeout = 0x03ff; + tmr_val16 &= (~TCO_RLD2_TMR_MAX & 0xffff); + if (timeout > TCO_RLD2_TMR_MAX) + timeout = TCO_RLD2_TMR_MAX; tmr_val16 |= timeout; ichwd_write_tco_2(sc, TCO_TMR2, tmr_val16); } @@ -520,8 +513,9 @@ ichwd_attach(device_t dev) device_get_desc(dev), sc->ich_version); /* - * Determine if we are coming up after a watchdog-induced reset. - * This bit is cleared in ichwd_sts_reset(). + * Determine if we are coming up after a watchdog-induced reset. Some + * BIOSes may clear this bit at bootup, preventing us from reporting + * this case on such systems. We clear this bit in ichwd_sts_reset(). */ if ((ichwd_read_tco_2(sc, TCO2_STS) & TCO_SECOND_TO_STS) != 0) device_printf(dev, diff --git a/sys/dev/ichwd/ichwd.h b/sys/dev/ichwd/ichwd.h index c0a1141..442460b 100644 --- a/sys/dev/ichwd/ichwd.h +++ b/sys/dev/ichwd/ichwd.h @@ -199,6 +199,17 @@ struct ichwd_softc { #define TCO_TMR_HALT 0x0800 /* clear to enable WDT */ #define TCO_CNT_PRESERVE 0x0200 /* preserve these bits */ +/* + * Masks for the TCO timer value field in TCO_RLD. + * If the datasheets are to be believed, the minimum value actually varies + * from chipset to chipset - 4 for ICH5 and 2 for all other chipsets. + * I suspect this is a bug in the ICH5 datasheet and that the minimum is + * uniformly 2, but I'd rather err on the side of caution. + */ +#define TCO_RLD_TMR_MIN 0x0004 +#define TCO_RLD1_TMR_MAX 0x003f +#define TCO_RLD2_TMR_MAX 0x03ff + /* approximate length in nanoseconds of one WDT tick (about 0.6 sec) */ #define ICHWD_TICK 600000000 diff --git a/sys/dev/iwn/if_iwn.c b/sys/dev/iwn/if_iwn.c index 1d58ef8..a59d8fd 100644 --- a/sys/dev/iwn/if_iwn.c +++ b/sys/dev/iwn/if_iwn.c @@ -223,7 +223,7 @@ static int iwn5000_save_calib_result(struct iwn_softc *, struct iwn_phy_calib *, int, int); static void iwn5000_free_calib_results(struct iwn_softc *); static int iwn5000_chrystal_calib(struct iwn_softc *); -static int iwn5000_send_calib_query(struct iwn_softc *); +static int iwn5000_send_calib_query(struct iwn_softc *, uint32_t); static int iwn5000_rx_calib_result(struct iwn_softc *, struct iwn_rx_desc *, struct iwn_rx_data *); static int iwn5000_send_wimax_coex(struct iwn_softc *); @@ -756,6 +756,7 @@ iwn_hal_attach(struct iwn_softc *sc) default: sc->txchainmask = IWN_ANT_ABC; sc->rxchainmask = IWN_ANT_ABC; + sc->calib_runtime = IWN_CALIB_DC; break; } sc->calib_init = IWN_CALIB_XTAL | IWN_CALIB_LO | @@ -767,8 +768,9 @@ iwn_hal_attach(struct iwn_softc *sc) sc->fwname = "iwn6050fw"; sc->txchainmask = IWN_ANT_AB; sc->rxchainmask = IWN_ANT_AB; - sc->calib_init = IWN_CALIB_XTAL | IWN_CALIB_DC | IWN_CALIB_LO | + sc->calib_init = IWN_CALIB_XTAL | IWN_CALIB_LO | IWN_CALIB_TX_IQ | IWN_CALIB_BASE_BAND; + sc->calib_runtime = IWN_CALIB_DC; break; case IWN_HW_REV_TYPE_6005: sc->sc_hal = &iwn5000_hal; @@ -778,6 +780,7 @@ iwn_hal_attach(struct iwn_softc *sc) sc->rxchainmask = IWN_ANT_AB; sc->calib_init = IWN_CALIB_XTAL | IWN_CALIB_LO | IWN_CALIB_TX_IQ | IWN_CALIB_BASE_BAND; + sc->calib_runtime = IWN_CALIB_DC; break; default: device_printf(sc->sc_dev, "adapter type %d not supported\n", @@ -1980,7 +1983,8 @@ iwn_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) /* * RUN -> RUN transition; Just restart the timers. */ - if (vap->iv_state == IEEE80211_S_RUN) { + if (vap->iv_state == IEEE80211_S_RUN && + vap->iv_opmode != IEEE80211_M_MONITOR) { iwn_calib_reset(sc); break; } @@ -4848,11 +4852,9 @@ iwn_run(struct iwn_softc *sc, struct ieee80211vap *vap) struct iwn_node_info node; int error; - sc->calib.state = IWN_CALIB_STATE_INIT; - if (ic->ic_opmode == IEEE80211_M_MONITOR) { /* Link LED blinks while monitoring. */ - iwn_set_led(sc, IWN_LED_LINK, 5, 5); + iwn_set_led(sc, IWN_LED_LINK, 20, 20); return 0; } error = iwn_set_timing(sc, ni); @@ -5316,7 +5318,7 @@ iwn5000_chrystal_calib(struct iwn_softc *sc) * only once at first boot. */ static int -iwn5000_send_calib_query(struct iwn_softc *sc) +iwn5000_send_calib_query(struct iwn_softc *sc, uint32_t cfg) { #define CALIB_INIT_CFG 0xffffffff; struct iwn5000_calib_config cmd; @@ -5324,12 +5326,15 @@ iwn5000_send_calib_query(struct iwn_softc *sc) memset(&cmd, 0, sizeof cmd); cmd.ucode.once.enable = CALIB_INIT_CFG; - cmd.ucode.once.start = CALIB_INIT_CFG; - cmd.ucode.once.send = CALIB_INIT_CFG; - cmd.ucode.flags = CALIB_INIT_CFG; + if (cfg == 0) { + cmd.ucode.once.start = CALIB_INIT_CFG; + cmd.ucode.once.send = CALIB_INIT_CFG; + cmd.ucode.flags = CALIB_INIT_CFG; + } else + cmd.ucode.once.start = cfg; - DPRINTF(sc, IWN_DEBUG_CALIBRATE, "%s: query calibration results\n", - __func__); + DPRINTF(sc, IWN_DEBUG_CALIBRATE, + "%s: query calibration results, cfg %x\n", __func__, cfg); error = iwn_cmd(sc, IWN5000_CMD_CALIB_CONFIG, &cmd, sizeof cmd, 0); if (error != 0) @@ -5559,7 +5564,7 @@ iwn5000_post_alive(struct iwn_softc *sc) * Query other calibration results from the initialization * firmware. */ - error = iwn5000_send_calib_query(sc); + error = iwn5000_send_calib_query(sc, 0); if (error != 0) { device_printf(sc->sc_dev, "%s: could not query calibration, error=%d\n", @@ -5579,6 +5584,19 @@ iwn5000_post_alive(struct iwn_softc *sc) * firmware to the runtime firmware. */ error = iwn5000_send_calib_results(sc); + + /* + * Tell the runtime firmware to do certain calibration types. + */ + if (sc->calib_runtime != 0) { + error = iwn5000_send_calib_query(sc, sc->calib_runtime); + if (error != 0) { + device_printf(sc->sc_dev, + "%s: could not send query calibration, " + "error=%d, cfg=%x\n", __func__, error, + sc->calib_runtime); + } + } } return error; } diff --git a/sys/dev/iwn/if_iwnvar.h b/sys/dev/iwn/if_iwnvar.h index f8b45b6..deb0be7 100644 --- a/sys/dev/iwn/if_iwnvar.h +++ b/sys/dev/iwn/if_iwnvar.h @@ -264,6 +264,7 @@ struct iwn_softc { int calib_cnt; struct iwn_calib_state calib; u_int calib_init; + u_int calib_runtime; #define IWN_CALIB_XTAL (1 << IWN_CALIB_IDX_XTAL) #define IWN_CALIB_DC (1 << IWN_CALIB_IDX_DC) #define IWN_CALIB_LO (1 << IWN_CALIB_IDX_LO) diff --git a/sys/dev/ixgbe/ixgbe.c b/sys/dev/ixgbe/ixgbe.c index 338172a..e246bf3 100644 --- a/sys/dev/ixgbe/ixgbe.c +++ b/sys/dev/ixgbe/ixgbe.c @@ -46,7 +46,7 @@ int ixgbe_display_debug_stats = 0; /********************************************************************* * Driver version *********************************************************************/ -char ixgbe_driver_version[] = "2.3.6"; +char ixgbe_driver_version[] = "2.3.7"; /********************************************************************* * PCI Device ID Table @@ -3023,16 +3023,12 @@ ixgbe_tx_ctx_setup(struct tx_ring *txr, struct mbuf *mp) case ETHERTYPE_IP: ip = (struct ip *)(mp->m_data + ehdrlen); ip_hlen = ip->ip_hl << 2; - if (mp->m_len < ehdrlen + ip_hlen) - return (FALSE); ipproto = ip->ip_p; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV4; break; case ETHERTYPE_IPV6: ip6 = (struct ip6_hdr *)(mp->m_data + ehdrlen); ip_hlen = sizeof(struct ip6_hdr); - if (mp->m_len < ehdrlen + ip_hlen) - return (FALSE); ipproto = ip6->ip6_nxt; type_tucmd_mlhl |= IXGBE_ADVTXD_TUCMD_IPV6; break; diff --git a/sys/dev/mfi/mfi_cam.c b/sys/dev/mfi/mfi_cam.c index fe1ffe5..5137c36 100644 --- a/sys/dev/mfi/mfi_cam.c +++ b/sys/dev/mfi/mfi_cam.c @@ -340,14 +340,14 @@ mfip_done(struct mfi_command *cm) ccbh->status = CAM_REQ_CMP; csio->scsi_status = pt->header.scsi_status; if (ccbh->flags & CAM_CDB_POINTER) - command = ccb->csio.cdb_io.cdb_ptr[0]; + command = csio->cdb_io.cdb_ptr[0]; else - command = ccb->csio.cdb_io.cdb_bytes[0]; + command = csio->cdb_io.cdb_bytes[0]; if (command == INQUIRY) { - device = ccb->csio.data_ptr[0] & 0x1f; + device = csio->data_ptr[0] & 0x1f; if ((device == T_DIRECT) || (device == T_PROCESSOR)) csio->data_ptr[0] = - (device & 0xe0) | T_NODEVICE; + (csio->data_ptr[0] & 0xe0) | T_NODEVICE; } break; } diff --git a/sys/dev/mps/mps.c b/sys/dev/mps/mps.c index 1fb37e2..61cbaa6 100644 --- a/sys/dev/mps/mps.c +++ b/sys/dev/mps/mps.c @@ -1282,7 +1282,7 @@ mps_dispatch_event(struct mps_softc *sc, uintptr_t data, MPI2_EVENT_NOTIFICATION_REPLY *reply) { struct mps_event_handle *eh; - int event, handled = 0;; + int event, handled = 0; event = reply->Event; TAILQ_FOREACH(eh, &sc->event_list, eh_list) { diff --git a/sys/dev/siba/siba_bwn.c b/sys/dev/siba/siba_bwn.c index b335484..1e0ba0d 100644 --- a/sys/dev/siba/siba_bwn.c +++ b/sys/dev/siba/siba_bwn.c @@ -326,7 +326,7 @@ static int siba_bwn_read_ivar(device_t dev, device_t child, int which, uintptr_t *result) { struct siba_dev_softc *sd; - struct siba_softc *siba;; + struct siba_softc *siba; sd = device_get_ivars(child); siba = sd->sd_bus; diff --git a/sys/dev/sis/if_sisreg.h b/sys/dev/sis/if_sisreg.h index 28d4390..058d9e7 100644 --- a/sys/dev/sis/if_sisreg.h +++ b/sys/dev/sis/if_sisreg.h @@ -497,7 +497,7 @@ struct sis_softc { int sis_tx_prod; int sis_tx_cons; int sis_tx_cnt; - int sis_rx_cons;; + int sis_rx_cons; bus_addr_t sis_rx_paddr; bus_addr_t sis_tx_paddr; struct callout sis_stat_ch; diff --git a/sys/dev/usb/net/if_axe.c b/sys/dev/usb/net/if_axe.c index 98dac91..8be6fc8 100644 --- a/sys/dev/usb/net/if_axe.c +++ b/sys/dev/usb/net/if_axe.c @@ -200,7 +200,8 @@ static const struct usb_config axe_config[AXE_N_TRANSFER] = { .type = UE_BULK, .endpoint = UE_ADDR_ANY, .direction = UE_DIR_OUT, - .bufsize = AXE_BULK_BUF_SIZE, + .frames = 16, + .bufsize = 16 * MCLBYTES, .flags = {.pipe_bof = 1,.force_short_xfer = 1,}, .callback = axe_bulk_write_callback, .timeout = 10000, /* 10 seconds */ @@ -939,7 +940,7 @@ axe_bulk_write_callback(struct usb_xfer *xfer, usb_error_t error) struct ifnet *ifp = uether_getifp(&sc->sc_ue); struct usb_page_cache *pc; struct mbuf *m; - int pos; + int nframes, pos; switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: @@ -956,40 +957,34 @@ tr_setup: */ return; } - pos = 0; - pc = usbd_xfer_get_frame(xfer, 0); - - while (1) { + for (nframes = 0; nframes < 16 && + !IFQ_DRV_IS_EMPTY(&ifp->if_snd); nframes++) { IFQ_DRV_DEQUEUE(&ifp->if_snd, m); - - if (m == NULL) { - if (pos > 0) - break; /* send out data */ - return; - } - if (m->m_pkthdr.len > MCLBYTES) { - m->m_pkthdr.len = MCLBYTES; - } + if (m == NULL) + break; + usbd_xfer_set_frame_offset(xfer, nframes * MCLBYTES, + nframes); + pos = 0; + pc = usbd_xfer_get_frame(xfer, nframes); if (AXE_IS_178_FAMILY(sc)) { - hdr.len = htole16(m->m_pkthdr.len); hdr.ilen = ~hdr.len; - usbd_copy_in(pc, pos, &hdr, sizeof(hdr)); - pos += sizeof(hdr); - - /* - * NOTE: Some drivers force a short packet - * by appending a dummy header with zero - * length at then end of the USB transfer. - * This driver uses the - * USB_FORCE_SHORT_XFER flag instead. - */ + usbd_m_copy_in(pc, pos, m, 0, m->m_pkthdr.len); + pos += m->m_pkthdr.len; + if ((pos % 512) == 0) { + hdr.len = 0; + hdr.ilen = 0xffff; + usbd_copy_in(pc, pos, &hdr, + sizeof(hdr)); + pos += sizeof(hdr); + } + } else { + usbd_m_copy_in(pc, pos, m, 0, m->m_pkthdr.len); + pos += m->m_pkthdr.len; } - usbd_m_copy_in(pc, pos, m, 0, m->m_pkthdr.len); - pos += m->m_pkthdr.len; /* * XXX @@ -1010,22 +1005,16 @@ tr_setup: m_freem(m); - if (AXE_IS_178_FAMILY(sc)) { - if (pos > (AXE_BULK_BUF_SIZE - MCLBYTES - sizeof(hdr))) { - /* send out frame(s) */ - break; - } - } else { - /* send out frame */ - break; - } + /* Set frame length. */ + usbd_xfer_set_frame_len(xfer, nframes, pos); + } + if (nframes != 0) { + usbd_xfer_set_frames(xfer, nframes); + usbd_transfer_submit(xfer); + ifp->if_drv_flags |= IFF_DRV_OACTIVE; } - - usbd_xfer_set_frame_len(xfer, 0, pos); - usbd_transfer_submit(xfer); - ifp->if_drv_flags |= IFF_DRV_OACTIVE; return; - + /* NOTREACHED */ default: /* Error */ DPRINTFN(11, "transfer error, %s\n", usbd_errstr(error)); diff --git a/sys/dev/usb/usb_pf.c b/sys/dev/usb/usb_pf.c index 4ac0eeb..2658907 100644 --- a/sys/dev/usb/usb_pf.c +++ b/sys/dev/usb/usb_pf.c @@ -64,6 +64,7 @@ usbpf_attach(struct usb_bus *ubus) ifp = ubus->ifp = if_alloc(IFT_USB); if_initname(ifp, "usbus", device_get_unit(ubus->bdev)); + ifp->if_flags = IFF_CANTCONFIG; if_attach(ifp); if_up(ifp); diff --git a/sys/dev/usb/usb_request.c b/sys/dev/usb/usb_request.c index 60767388..293c4bf 100644 --- a/sys/dev/usb/usb_request.c +++ b/sys/dev/usb/usb_request.c @@ -793,6 +793,10 @@ usbd_req_reset_port(struct usb_device *udev, struct mtx *mtx, uint8_t port) if (err) { goto done; } + /* if the device disappeared, just give up */ + if (!(UGETW(ps.wPortStatus) & UPS_CURRENT_CONNECT_STATUS)) { + goto done; + } /* check if reset is complete */ if (UGETW(ps.wPortChange) & UPS_C_PORT_RESET) { break; diff --git a/sys/dev/wpi/if_wpi.c b/sys/dev/wpi/if_wpi.c index f6edc91..72d5910 100644 --- a/sys/dev/wpi/if_wpi.c +++ b/sys/dev/wpi/if_wpi.c @@ -1248,8 +1248,25 @@ wpi_newstate(struct ieee80211vap *vap, enum ieee80211_state nstate, int arg) IEEE80211_UNLOCK(ic); WPI_LOCK(sc); - if (nstate == IEEE80211_S_AUTH) { - /* The node must be registered in the firmware before auth */ + if (nstate == IEEE80211_S_SCAN && vap->iv_state != IEEE80211_S_INIT) { + /* + * On !INIT -> SCAN transitions, we need to clear any possible + * knowledge about associations. + */ + error = wpi_config(sc); + if (error != 0) { + device_printf(sc->sc_dev, + "%s: device config failed, error %d\n", + __func__, error); + } + } + if (nstate == IEEE80211_S_AUTH || + (nstate == IEEE80211_S_ASSOC && vap->iv_state == IEEE80211_S_RUN)) { + /* + * The node must be registered in the firmware before auth. + * Also the associd must be cleared on RUN -> ASSOC + * transitions. + */ error = wpi_auth(sc, vap); if (error != 0) { device_printf(sc->sc_dev, diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c index 8ff8757..2e08bb1 100644 --- a/sys/dev/xen/blkfront/blkfront.c +++ b/sys/dev/xen/blkfront/blkfront.c @@ -508,7 +508,7 @@ blkfront_initialize(struct xb_softc *sc) sc->ring_pages = 1; sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE); sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK; - sc->max_request_size = sc->max_request_segments * PAGE_SIZE; + sc->max_request_size = (sc->max_request_segments - 1) * PAGE_SIZE; sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments); /* diff --git a/sys/fs/nfsserver/nfs_nfsdstate.c b/sys/fs/nfsserver/nfs_nfsdstate.c index 908c028..9f657d6 100644 --- a/sys/fs/nfsserver/nfs_nfsdstate.c +++ b/sys/fs/nfsserver/nfs_nfsdstate.c @@ -147,12 +147,20 @@ nfsrv_setclient(struct nfsrv_descript *nd, struct nfsclient **new_clpp, if (nfsrv_openpluslock > NFSRV_V4STATELIMIT) return (NFSERR_RESOURCE); - if ((nd->nd_flag & ND_GSS) && nfsrv_nogsscallback) + if (nfsrv_issuedelegs == 0 || + ((nd->nd_flag & ND_GSS) != 0 && nfsrv_nogsscallback != 0)) /* - * Don't do callbacks for AUTH_GSS. - * (Since these aren't yet debugged, they might cause the - * server to crap out, if they get past the Init call to - * the client.) + * Don't do callbacks when delegations are disabled or + * for AUTH_GSS unless enabled via nfsrv_nogsscallback. + * If establishing a callback connection is attempted + * when a firewall is blocking the callback path, the + * server may wait too long for the connect attempt to + * succeed during the Open. Some clients, such as Linux, + * may timeout and give up on the Open before the server + * replies. Also, since AUTH_GSS callbacks are not + * yet interoperability tested, they might cause the + * server to crap out, if they get past the Init call to + * the client. */ new_clp->lc_program = 0; diff --git a/sys/geom/part/g_part_mbr.c b/sys/geom/part/g_part_mbr.c index 454c759..f9d3d0d 100644 --- a/sys/geom/part/g_part_mbr.c +++ b/sys/geom/part/g_part_mbr.c @@ -449,12 +449,6 @@ g_part_mbr_read(struct g_part_table *basetable, struct g_consumer *cp) basetable->gpt_heads = heads; } } - if ((ent.dp_start % basetable->gpt_sectors) != 0) - printf("GEOM: %s: partition %d does not start on a " - "track boundary.\n", pp->name, index + 1); - if ((ent.dp_size % basetable->gpt_sectors) != 0) - printf("GEOM: %s: partition %d does not end on a " - "track boundary.\n", pp->name, index + 1); entry = (struct g_part_mbr_entry *)g_part_new_entry(basetable, index + 1, ent.dp_start, ent.dp_start + ent.dp_size - 1); diff --git a/sys/i386/i386/identcpu.c b/sys/i386/i386/identcpu.c index afdedc2..3bcc416 100644 --- a/sys/i386/i386/identcpu.c +++ b/sys/i386/i386/identcpu.c @@ -100,6 +100,8 @@ static int hw_clockrate; SYSCTL_INT(_hw, OID_AUTO, clockrate, CTLFLAG_RD, &hw_clockrate, 0, "CPU instruction clock rate"); +static eventhandler_tag tsc_post_tag; + static char cpu_brand[48]; #define MAX_BRAND_INDEX 8 @@ -856,28 +858,6 @@ printcpuinfo(void) * If this CPU supports P-state invariant TSC then * mention the capability. */ - switch (cpu_vendor_id) { - case CPU_VENDOR_AMD: - if ((amd_pminfo & AMDPM_TSC_INVARIANT) || - CPUID_TO_FAMILY(cpu_id) >= 0x10 || - cpu_id == 0x60fb2) - tsc_is_invariant = 1; - break; - case CPU_VENDOR_INTEL: - if ((amd_pminfo & AMDPM_TSC_INVARIANT) || - (CPUID_TO_FAMILY(cpu_id) == 0x6 && - CPUID_TO_MODEL(cpu_id) >= 0xe) || - (CPUID_TO_FAMILY(cpu_id) == 0xf && - CPUID_TO_MODEL(cpu_id) >= 0x3)) - tsc_is_invariant = 1; - break; - case CPU_VENDOR_CENTAUR: - if (CPUID_TO_FAMILY(cpu_id) == 0x6 && - CPUID_TO_MODEL(cpu_id) >= 0xf && - (rdmsr(0x1203) & 0x100000000ULL) == 0) - tsc_is_invariant = 1; - break; - } if (tsc_is_invariant) printf("\n TSC: P-state invariant"); @@ -1071,21 +1051,29 @@ identifycyrix(void) /* Update TSC freq with the value indicated by the caller. */ static void -tsc_freq_changed(void *arg, const struct cf_level *level, int status) +tsc_freq_changed(void *arg __unused, const struct cf_level *level, int status) { - /* - * If there was an error during the transition or - * TSC is P-state invariant, don't do anything. - */ - if (status != 0 || tsc_is_invariant) + + /* If there was an error during the transition, don't do anything. */ + if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ hw_clockrate = level->total_set.freq; } -EVENTHANDLER_DEFINE(cpufreq_post_change, tsc_freq_changed, NULL, - EVENTHANDLER_PRI_ANY); +static void +hook_tsc_freq(void *arg __unused) +{ + + if (tsc_is_invariant) + return; + + tsc_post_tag = EVENTHANDLER_REGISTER(cpufreq_post_change, + tsc_freq_changed, NULL, EVENTHANDLER_PRI_ANY); +} + +SYSINIT(hook_tsc_freq, SI_SUB_CONFIGURE, SI_ORDER_ANY, hook_tsc_freq, NULL); /* * Final stage of CPU identification. -- Should I check TI? diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 49800d9..a04e578 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1157,12 +1157,16 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) thread_unlock(curthread); #endif + tsc2 -= tsc1; + if (tsc_freq != 0 && !tsc_is_broken) { + *rate = tsc2 * 1000; + return (0); + } + /* - * Calculate the difference in readings, convert to Mhz, and - * subtract 0.5% of the total. Empirical testing has shown that + * Subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ - tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index 7efa29c..b7d3648 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -1213,7 +1213,7 @@ pmap_pte(pmap_t pmap, vm_offset_t va) } return (PADDR2 + (i386_btop(va) & (NPTEPG - 1))); } - return (0); + return (NULL); } /* @@ -1291,21 +1291,19 @@ pmap_pte_quick(pmap_t pmap, vm_offset_t va) vm_paddr_t pmap_extract(pmap_t pmap, vm_offset_t va) { + pt_entry_t pte, *ptep; vm_paddr_t rtval; - pt_entry_t *pte; - pd_entry_t pde; rtval = 0; PMAP_LOCK(pmap); - pde = pmap->pm_pdir[va >> PDRSHIFT]; - if (pde != 0) { - if ((pde & PG_PS) != 0) - rtval = (pde & PG_PS_FRAME) | (va & PDRMASK); - else { - pte = pmap_pte(pmap, va); - rtval = (*pte & PG_FRAME) | (va & PAGE_MASK); - pmap_pte_release(pte); - } + ptep = pmap_pte(pmap, va); + pte = (ptep != NULL) ? *ptep : 0; + pmap_pte_release(ptep); + if ((pte & PG_V) != 0) { + if ((pte & PG_PS) != 0) + rtval = (pte & PG_PS_FRAME) | (va & PDRMASK); + else + rtval = (pte & PG_FRAME) | (va & PAGE_MASK); } PMAP_UNLOCK(pmap); return (rtval); @@ -1321,40 +1319,30 @@ pmap_extract(pmap_t pmap, vm_offset_t va) vm_page_t pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) { - pd_entry_t pde; - pt_entry_t pte; + pt_entry_t pte, *ptep; + vm_paddr_t locked_pa, pa; vm_page_t m; - vm_paddr_t pa; - pa = 0; + locked_pa = 0; m = NULL; PMAP_LOCK(pmap); retry: - pde = *pmap_pde(pmap, va); - if (pde != 0) { - if (pde & PG_PS) { - if ((pde & PG_RW) || (prot & VM_PROT_WRITE) == 0) { - if (vm_page_pa_tryrelock(pmap, (pde & PG_PS_FRAME) | - (va & PDRMASK), &pa)) - goto retry; - m = PHYS_TO_VM_PAGE((pde & PG_PS_FRAME) | - (va & PDRMASK)); - vm_page_hold(m); - } - } else { - sched_pin(); - pte = *pmap_pte_quick(pmap, va); - if (pte != 0 && - ((pte & PG_RW) || (prot & VM_PROT_WRITE) == 0)) { - if (vm_page_pa_tryrelock(pmap, pte & PG_FRAME, &pa)) - goto retry; - m = PHYS_TO_VM_PAGE(pte & PG_FRAME); - vm_page_hold(m); - } - sched_unpin(); - } + ptep = pmap_pte(pmap, va); + pte = (ptep != NULL) ? *ptep : 0; + pmap_pte_release(ptep); + if ((pte & PG_V) != 0 && + ((pte & PG_RW) != 0 || (prot & VM_PROT_WRITE) == 0)) { + if ((pte & PG_PS) != 0) { + /* Compute the physical address of the 4KB page. */ + pa = (pte & PG_PS_FRAME) | (va & PG_FRAME & PDRMASK); + } else + pa = pte & PG_FRAME; + if (vm_page_pa_tryrelock(pmap, pa, &locked_pa)) + goto retry; + m = PHYS_TO_VM_PAGE(pa); + vm_page_hold(m); + PA_UNLOCK(locked_pa); } - PA_UNLOCK_COND(pa); PMAP_UNLOCK(pmap); return (m); } @@ -4991,39 +4979,30 @@ pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) int pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) { - pd_entry_t *pdep; pt_entry_t *ptep, pte; vm_paddr_t pa; int val; PMAP_LOCK(pmap); retry: - pdep = pmap_pde(pmap, addr); - if (*pdep != 0) { - if (*pdep & PG_PS) { - pte = *pdep; + ptep = pmap_pte(pmap, addr); + pte = (ptep != NULL) ? *ptep : 0; + pmap_pte_release(ptep); + if ((pte & PG_V) != 0) { + val = MINCORE_INCORE; + if ((pte & PG_PS) != 0) { + val |= MINCORE_SUPER; /* Compute the physical address of the 4KB page. */ - pa = ((*pdep & PG_PS_FRAME) | (addr & PDRMASK)) & - PG_FRAME; - val = MINCORE_SUPER; - } else { - ptep = pmap_pte(pmap, addr); - pte = *ptep; - pmap_pte_release(ptep); + pa = (pte & PG_PS_FRAME) | (addr & PG_FRAME & PDRMASK); + } else pa = pte & PG_FRAME; - val = 0; - } - } else { - pte = 0; - pa = 0; - val = 0; - } - if ((pte & PG_V) != 0) { - val |= MINCORE_INCORE; if ((pte & (PG_M | PG_RW)) == (PG_M | PG_RW)) val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; if ((pte & PG_A) != 0) val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; + } else { + val = 0; + pa = 0; } if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && diff --git a/sys/i386/include/cpu.h b/sys/i386/include/cpu.h index 83defe2..75fc0c6 100644 --- a/sys/i386/include/cpu.h +++ b/sys/i386/include/cpu.h @@ -56,7 +56,7 @@ #ifdef _KERNEL extern char btext[]; extern char etext[]; -extern u_int tsc_present; +extern int tsc_present; void cpu_halt(void); void cpu_reset(void); diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index b86ea91..80a0907 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -460,6 +460,7 @@ proc0_init(void *dummy __unused) td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; + td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PUSER; td->td_oncpu = 0; diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 126c668..61d9531 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -97,9 +97,7 @@ struct fork_args { /* ARGSUSED */ int -fork(td, uap) - struct thread *td; - struct fork_args *uap; +fork(struct thread *td, struct fork_args *uap) { int error; struct proc *p2; @@ -135,9 +133,7 @@ vfork(td, uap) } int -rfork(td, uap) - struct thread *td; - struct rfork_args *uap; +rfork(struct thread *td, struct rfork_args *uap) { struct proc *p2; int error; @@ -197,12 +193,59 @@ sysctl_kern_randompid(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW, 0, 0, sysctl_kern_randompid, "I", "Random PID modulus"); +static int +fork_norfproc(struct thread *td, int flags, struct proc **procp) +{ + int error; + struct proc *p1; + + KASSERT((flags & RFPROC) == 0, + ("fork_norfproc called with RFPROC set")); + p1 = td->td_proc; + *procp = NULL; + + if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && + (flags & (RFCFDG | RFFDG))) { + PROC_LOCK(p1); + if (thread_single(SINGLE_BOUNDARY)) { + PROC_UNLOCK(p1); + return (ERESTART); + } + PROC_UNLOCK(p1); + } + + error = vm_forkproc(td, NULL, NULL, NULL, flags); + if (error) + goto fail; + + /* + * Close all file descriptors. + */ + if (flags & RFCFDG) { + struct filedesc *fdtmp; + fdtmp = fdinit(td->td_proc->p_fd); + fdfree(td); + p1->p_fd = fdtmp; + } + + /* + * Unshare file descriptors (from parent). + */ + if (flags & RFFDG) + fdunshare(p1, td); + +fail: + if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && + (flags & (RFCFDG | RFFDG))) { + PROC_LOCK(p1); + thread_single_end(); + PROC_UNLOCK(p1); + } + return (error); +} + int -fork1(td, flags, pages, procp) - struct thread *td; - int flags; - int pages; - struct proc **procp; +fork1(struct thread *td, int flags, int pages, struct proc **procp) { struct proc *p1, *p2, *pptr; struct proc *newproc; @@ -227,47 +270,8 @@ fork1(td, flags, pages, procp) * Here we don't create a new process, but we divorce * certain parts of a process from itself. */ - if ((flags & RFPROC) == 0) { - if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && - (flags & (RFCFDG | RFFDG))) { - PROC_LOCK(p1); - if (thread_single(SINGLE_BOUNDARY)) { - PROC_UNLOCK(p1); - return (ERESTART); - } - PROC_UNLOCK(p1); - } - - error = vm_forkproc(td, NULL, NULL, NULL, flags); - if (error) - goto norfproc_fail; - - /* - * Close all file descriptors. - */ - if (flags & RFCFDG) { - struct filedesc *fdtmp; - fdtmp = fdinit(td->td_proc->p_fd); - fdfree(td); - p1->p_fd = fdtmp; - } - - /* - * Unshare file descriptors (from parent). - */ - if (flags & RFFDG) - fdunshare(p1, td); - -norfproc_fail: - if (((p1->p_flag & (P_HADTHREADS|P_SYSTEM)) == P_HADTHREADS) && - (flags & (RFCFDG | RFFDG))) { - PROC_LOCK(p1); - thread_single_end(); - PROC_UNLOCK(p1); - } - *procp = NULL; - return (error); - } + if ((flags & RFPROC) == 0) + return (fork_norfproc(td, flags, procp)); /* * XXX @@ -539,6 +543,7 @@ again: td2->td_sigstk = td->td_sigstk; td2->td_sigmask = td->td_sigmask; td2->td_flags = TDF_INMEM; + td2->td_lend_user_pri = PRI_MAX; #ifdef VIMAGE td2->td_vnet = NULL; @@ -798,10 +803,8 @@ fail1: * is called from the MD fork_trampoline() entry point. */ void -fork_exit(callout, arg, frame) - void (*callout)(void *, struct trapframe *); - void *arg; - struct trapframe *frame; +fork_exit(void (*callout)(void *, struct trapframe *), void *arg, + struct trapframe *frame) { struct proc *p; struct thread *td; @@ -855,9 +858,7 @@ fork_exit(callout, arg, frame) * first parameter and is called when returning to a new userland process. */ void -fork_return(td, frame) - struct thread *td; - struct trapframe *frame; +fork_return(struct thread *td, struct trapframe *frame) { userret(td, frame); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index 5f07590..7161a99 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -81,15 +81,54 @@ MTX_SYSINIT(zombie_lock, &zombie_lock, "zombie lock", MTX_SPIN); static void thread_zombie(struct thread *); +#define TID_BUFFER_SIZE 1024 + struct mtx tid_lock; static struct unrhdr *tid_unrhdr; - +static lwpid_t tid_buffer[TID_BUFFER_SIZE]; +static int tid_head, tid_tail; static MALLOC_DEFINE(M_TIDHASH, "tidhash", "thread hash"); struct tidhashhead *tidhashtbl; u_long tidhash; struct rwlock tidhash_lock; +static lwpid_t +tid_alloc(void) +{ + lwpid_t tid; + + tid = alloc_unr(tid_unrhdr); + if (tid != -1) + return (tid); + mtx_lock(&tid_lock); + if (tid_head == tid_tail) { + mtx_unlock(&tid_lock); + return (-1); + } + tid = tid_buffer[tid_head++]; + tid_head %= TID_BUFFER_SIZE; + mtx_unlock(&tid_lock); + return (tid); +} + +static void +tid_free(lwpid_t tid) +{ + lwpid_t tmp_tid = -1; + + mtx_lock(&tid_lock); + if ((tid_tail + 1) % TID_BUFFER_SIZE == tid_head) { + tmp_tid = tid_buffer[tid_head++]; + tid_head = (tid_head + 1) % TID_BUFFER_SIZE; + } + tid_buffer[tid_tail++] = tid; + tid_tail %= TID_BUFFER_SIZE; + mtx_unlock(&tid_lock); + if (tmp_tid != -1) + free_unr(tid_unrhdr, tmp_tid); +} + /* * Prepare a thread for use. */ @@ -102,7 +141,7 @@ thread_ctor(void *mem, int size, void *arg, int flags) td->td_state = TDS_INACTIVE; td->td_oncpu = NOCPU; - td->td_tid = alloc_unr(tid_unrhdr); + td->td_tid = tid_alloc(); /* * Note that td_critnest begins life as 1 because the thread is not @@ -110,6 +149,7 @@ thread_ctor(void *mem, int size, void *arg, int flags) * end of a context switch. */ td->td_critnest = 1; + td->td_lend_user_pri = PRI_MAX; EVENTHANDLER_INVOKE(thread_ctor, td); #ifdef AUDIT audit_thread_alloc(td); @@ -155,7 +195,7 @@ thread_dtor(void *mem, int size, void *arg) osd_thread_exit(td); EVENTHANDLER_INVOKE(thread_dtor, td); - free_unr(tid_unrhdr, td->td_tid); + tid_free(td->td_tid); } /* diff --git a/sys/kern/kern_umtx.c b/sys/kern/kern_umtx.c index 43570ce..e7b9b32 100644 --- a/sys/kern/kern_umtx.c +++ b/sys/kern/kern_umtx.c @@ -1407,17 +1407,19 @@ umtx_propagate_priority(struct thread *td) for (;;) { td = pi->pi_owner; - if (td == NULL) + if (td == NULL || td == curthread) return; MPASS(td->td_proc != NULL); MPASS(td->td_proc->p_magic == P_MAGIC); - if (UPRI(td) <= pri) - return; - thread_lock(td); - sched_lend_user_prio(td, pri); + if (td->td_lend_user_pri > pri) + sched_lend_user_prio(td, pri); + else { + thread_unlock(td); + break; + } thread_unlock(td); /* @@ -3587,8 +3589,8 @@ umtx_thread_cleanup(struct thread *td) pi->pi_owner = NULL; TAILQ_REMOVE(&uq->uq_pi_contested, pi, pi_link); } + mtx_unlock_spin(&umtx_lock); thread_lock(td); - td->td_flags &= ~TDF_UBORROWING; + sched_unlend_user_prio(td, PRI_MAX); thread_unlock(td); - mtx_unlock_spin(&umtx_lock); } diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 9face64..6278126 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -879,25 +879,23 @@ sched_prio(struct thread *td, u_char prio) void sched_user_prio(struct thread *td, u_char prio) { - u_char oldprio; THREAD_LOCK_ASSERT(td, MA_OWNED); td->td_base_user_pri = prio; - if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) + if (td->td_lend_user_pri <= prio) return; - oldprio = td->td_user_pri; td->td_user_pri = prio; } void sched_lend_user_prio(struct thread *td, u_char prio) { - u_char oldprio; THREAD_LOCK_ASSERT(td, MA_OWNED); - td->td_flags |= TDF_UBORROWING; - oldprio = td->td_user_pri; - td->td_user_pri = prio; + if (prio < td->td_lend_user_pri) + td->td_lend_user_pri = prio; + if (prio < td->td_user_pri) + td->td_user_pri = prio; } void @@ -907,12 +905,11 @@ sched_unlend_user_prio(struct thread *td, u_char prio) THREAD_LOCK_ASSERT(td, MA_OWNED); base_pri = td->td_base_user_pri; - if (prio >= base_pri) { - td->td_flags &= ~TDF_UBORROWING; - sched_user_prio(td, base_pri); - } else { - sched_lend_user_prio(td, prio); - } + td->td_lend_user_pri = prio; + if (prio > base_pri) + td->td_user_pri = base_pri; + else + td->td_user_pri = prio; } void diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 030c98d..fb30fdb 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -1677,8 +1677,8 @@ sched_user_prio(struct thread *td, u_char prio) { td->td_base_user_pri = prio; - if (td->td_flags & TDF_UBORROWING && td->td_user_pri <= prio) - return; + if (td->td_lend_user_pri <= prio) + return; td->td_user_pri = prio; } @@ -1687,8 +1687,10 @@ sched_lend_user_prio(struct thread *td, u_char prio) { THREAD_LOCK_ASSERT(td, MA_OWNED); - td->td_flags |= TDF_UBORROWING; - td->td_user_pri = prio; + if (prio < td->td_lend_user_pri) + td->td_lend_user_pri = prio; + if (prio < td->td_user_pri) + td->td_user_pri = prio; } void @@ -1698,12 +1700,11 @@ sched_unlend_user_prio(struct thread *td, u_char prio) THREAD_LOCK_ASSERT(td, MA_OWNED); base_pri = td->td_base_user_pri; - if (prio >= base_pri) { - td->td_flags &= ~TDF_UBORROWING; - sched_user_prio(td, base_pri); - } else { - sched_lend_user_prio(td, prio); - } + td->td_lend_user_pri = prio; + if (prio > base_pri) + td->td_user_pri = base_pri; + else + td->td_user_pri = prio; } /* diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 63437bc..8920201 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$"); #include <sys/namei.h> #include <sys/proc.h> #include <sys/protosw.h> +#include <sys/queue.h> #include <sys/resourcevar.h> #include <sys/rwlock.h> #include <sys/socket.h> @@ -115,6 +116,13 @@ static struct unp_head unp_shead; /* (l) List of stream sockets. */ static struct unp_head unp_dhead; /* (l) List of datagram sockets. */ static struct unp_head unp_sphead; /* (l) List of seqpacket sockets. */ +struct unp_defer { + SLIST_ENTRY(unp_defer) ud_link; + struct file *ud_fp; +}; +static SLIST_HEAD(, unp_defer) unp_defers; +static int unp_defers_count; + static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; /* @@ -126,6 +134,13 @@ static const struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL }; static struct task unp_gc_task; /* + * The close of unix domain sockets attached as SCM_RIGHTS is + * postponed to the taskqueue, to avoid arbitrary recursion depth. + * The attached sockets might have another sockets attached. + */ +static struct task unp_defer_task; + +/* * Both send and receive buffers are allocated PIPSIZ bytes of buffering for * stream sockets, although the total for sender and receiver is actually * only PIPSIZ. @@ -162,8 +177,11 @@ SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, maxseqpacket, CTLFLAG_RW, &unpsp_sendspace, 0, "Default seqpacket send space."); SYSCTL_ULONG(_net_local_seqpacket, OID_AUTO, recvspace, CTLFLAG_RW, &unpsp_recvspace, 0, "Default seqpacket receive space."); -SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, +SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "File descriptors in flight."); +SYSCTL_INT(_net_local, OID_AUTO, deferred, CTLFLAG_RD, + &unp_defers_count, 0, + "File descriptors deferred to taskqueue for close."); /* * Locking and synchronization: @@ -213,6 +231,7 @@ SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, */ static struct rwlock unp_link_rwlock; static struct mtx unp_list_lock; +static struct mtx unp_defers_lock; #define UNP_LINK_LOCK_INIT() rw_init(&unp_link_rwlock, \ "unp_link_rwlock") @@ -234,6 +253,11 @@ static struct mtx unp_list_lock; #define UNP_LIST_LOCK() mtx_lock(&unp_list_lock) #define UNP_LIST_UNLOCK() mtx_unlock(&unp_list_lock) +#define UNP_DEFERRED_LOCK_INIT() mtx_init(&unp_defers_lock, \ + "unp_defer", NULL, MTX_DEF) +#define UNP_DEFERRED_LOCK() mtx_lock(&unp_defers_lock) +#define UNP_DEFERRED_UNLOCK() mtx_unlock(&unp_defers_lock) + #define UNP_PCB_LOCK_INIT(unp) mtx_init(&(unp)->unp_mtx, \ "unp_mtx", "unp_mtx", \ MTX_DUPOK|MTX_DEF|MTX_RECURSE) @@ -259,8 +283,9 @@ static void unp_init(void); static int unp_internalize(struct mbuf **, struct thread *); static void unp_internalize_fp(struct file *); static int unp_externalize(struct mbuf *, struct mbuf **); -static void unp_externalize_fp(struct file *); +static int unp_externalize_fp(struct file *); static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *); +static void unp_process_defers(void * __unused, int); /* * Definitions of protocols supported in the LOCAL domain. @@ -1764,9 +1789,12 @@ unp_init(void) LIST_INIT(&unp_dhead); LIST_INIT(&unp_shead); LIST_INIT(&unp_sphead); + SLIST_INIT(&unp_defers); TASK_INIT(&unp_gc_task, 0, unp_gc, NULL); + TASK_INIT(&unp_defer_task, 0, unp_process_defers, NULL); UNP_LINK_LOCK_INIT(); UNP_LIST_LOCK_INIT(); + UNP_DEFERRED_LOCK_INIT(); } static int @@ -1970,9 +1998,45 @@ fptounp(struct file *fp) static void unp_discard(struct file *fp) { + struct unp_defer *dr; + + if (unp_externalize_fp(fp)) { + dr = malloc(sizeof(*dr), M_TEMP, M_WAITOK); + dr->ud_fp = fp; + UNP_DEFERRED_LOCK(); + SLIST_INSERT_HEAD(&unp_defers, dr, ud_link); + UNP_DEFERRED_UNLOCK(); + atomic_add_int(&unp_defers_count, 1); + taskqueue_enqueue(taskqueue_thread, &unp_defer_task); + } else + (void) closef(fp, (struct thread *)NULL); +} - unp_externalize_fp(fp); - (void) closef(fp, (struct thread *)NULL); +static void +unp_process_defers(void *arg __unused, int pending) +{ + struct unp_defer *dr; + SLIST_HEAD(, unp_defer) drl; + int count; + + SLIST_INIT(&drl); + for (;;) { + UNP_DEFERRED_LOCK(); + if (SLIST_FIRST(&unp_defers) == NULL) { + UNP_DEFERRED_UNLOCK(); + break; + } + SLIST_SWAP(&unp_defers, &drl, unp_defer); + UNP_DEFERRED_UNLOCK(); + count = 0; + while ((dr = SLIST_FIRST(&drl)) != NULL) { + SLIST_REMOVE_HEAD(&drl, ud_link); + closef(dr->ud_fp, NULL); + free(dr, M_TEMP); + count++; + } + atomic_add_int(&unp_defers_count, -count); + } } static void @@ -1990,16 +2054,21 @@ unp_internalize_fp(struct file *fp) UNP_LINK_WUNLOCK(); } -static void +static int unp_externalize_fp(struct file *fp) { struct unpcb *unp; + int ret; UNP_LINK_WLOCK(); - if ((unp = fptounp(fp)) != NULL) + if ((unp = fptounp(fp)) != NULL) { unp->unp_msgcount--; + ret = 1; + } else + ret = 0; unp_rights--; UNP_LINK_WUNLOCK(); + return (ret); } /* diff --git a/sys/mips/adm5120/adm5120_machdep.c b/sys/mips/adm5120/adm5120_machdep.c index 90a6b90..bc7b7b0 100644 --- a/sys/mips/adm5120/adm5120_machdep.c +++ b/sys/mips/adm5120/adm5120_machdep.c @@ -97,6 +97,9 @@ mips_init(void) phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1]; + physmem = realmem; init_param1(); diff --git a/sys/mips/alchemy/alchemy_machdep.c b/sys/mips/alchemy/alchemy_machdep.c index a94d995..047fa42 100644 --- a/sys/mips/alchemy/alchemy_machdep.c +++ b/sys/mips/alchemy/alchemy_machdep.c @@ -97,6 +97,9 @@ mips_init(void) phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1]; + physmem = realmem; init_param1(); diff --git a/sys/mips/atheros/ar71xx_machdep.c b/sys/mips/atheros/ar71xx_machdep.c index ec355cc..c8abc74 100644 --- a/sys/mips/atheros/ar71xx_machdep.c +++ b/sys/mips/atheros/ar71xx_machdep.c @@ -184,6 +184,9 @@ platform_start(__register_t a0 __unused, __register_t a1 __unused, phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1] - phys_avail[0]; + physmem = realmem; /* diff --git a/sys/mips/cavium/octeon_machdep.c b/sys/mips/cavium/octeon_machdep.c index 886595c..edb92af 100644 --- a/sys/mips/cavium/octeon_machdep.c +++ b/sys/mips/cavium/octeon_machdep.c @@ -265,7 +265,7 @@ octeon_memory_init(void) { vm_paddr_t phys_end; int64_t addr; - unsigned i; + unsigned i, j; phys_end = round_page(MIPS_KSEG0_TO_PHYS((vm_offset_t)&end)); @@ -274,6 +274,9 @@ octeon_memory_init(void) phys_avail[0] = phys_end; phys_avail[1] = 96 << 20; + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1]; + realmem = physmem = btoc(phys_avail[1] - phys_avail[0]); return; } @@ -314,6 +317,9 @@ octeon_memory_init(void) i += 2; } + for (j = 0; j < i; j++) + dump_avail[j] = phys_avail[j]; + realmem = physmem; } diff --git a/sys/mips/idt/idt_machdep.c b/sys/mips/idt/idt_machdep.c index 0dfee66..6c76c26 100644 --- a/sys/mips/idt/idt_machdep.c +++ b/sys/mips/idt/idt_machdep.c @@ -167,6 +167,9 @@ platform_start(__register_t a0, __register_t a1, phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1]; + physmem = realmem; /* diff --git a/sys/mips/include/md_var.h b/sys/mips/include/md_var.h index 3cebbdc..3e46ad3 100644 --- a/sys/mips/include/md_var.h +++ b/sys/mips/include/md_var.h @@ -77,6 +77,8 @@ void platform_identify(void); extern int busdma_swi_pending; void busdma_swi(void); -struct dumperinfo; -void minidumpsys(struct dumperinfo *); +struct dumperinfo; +void dump_add_page(vm_paddr_t); +void dump_drop_page(vm_paddr_t); +void minidumpsys(struct dumperinfo *); #endif /* !_MACHINE_MD_VAR_H_ */ diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index c082abb..cdbf9bc 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -163,6 +163,9 @@ void pmap_kenter_temporary_free(vm_paddr_t pa); int pmap_compute_pages_to_dump(void); void pmap_flush_pvcache(vm_page_t m); int pmap_emulate_modified(pmap_t pmap, vm_offset_t va); +void pmap_grow_direct_page_cache(void); +vm_page_t pmap_alloc_direct_page(unsigned int index, int req); + #endif /* _KERNEL */ #endif /* !LOCORE */ diff --git a/sys/mips/include/vmparam.h b/sys/mips/include/vmparam.h index 3050a91..212a0c3 100644 --- a/sys/mips/include/vmparam.h +++ b/sys/mips/include/vmparam.h @@ -46,11 +46,6 @@ /* * Machine dependent constants mips processors. */ -/* - * USRTEXT is the start of the user text/data space, while USRSTACK - * is the top (end) of the user stack. - */ -#define USRTEXT (1*PAGE_SIZE) /* * Virtual memory related constants, all in bytes @@ -94,7 +89,6 @@ #define VM_MAX_ADDRESS ((vm_offset_t)(intptr_t)(int32_t)0xffffffff) #define VM_MINUSER_ADDRESS ((vm_offset_t)0x00000000) -#define VM_MAX_MMAP_ADDR VM_MAXUSER_ADDRESS #ifdef __mips_n64 #define VM_MAXUSER_ADDRESS (VM_MINUSER_ADDRESS + (NPDEPG * NBSEG)) @@ -155,6 +149,8 @@ #define VM_INITIAL_PAGEIN 16 #endif +#define UMA_MD_SMALL_ALLOC + /* * max number of non-contig chunks of physical RAM you can have */ diff --git a/sys/mips/malta/malta_machdep.c b/sys/mips/malta/malta_machdep.c index 6cbdcd8..0ff34cb 100644 --- a/sys/mips/malta/malta_machdep.c +++ b/sys/mips/malta/malta_machdep.c @@ -181,6 +181,9 @@ mips_init(void) phys_avail[0] = MIPS_KSEG0_TO_PHYS(kernel_kseg0_end); phys_avail[1] = ctob(realmem); + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1]; + physmem = realmem; init_param1(); diff --git a/sys/mips/mips/minidump_machdep.c b/sys/mips/mips/minidump_machdep.c index 1ac384a..cded3ae 100644 --- a/sys/mips/mips/minidump_machdep.c +++ b/sys/mips/mips/minidump_machdep.c @@ -83,7 +83,7 @@ is_dumpable(vm_paddr_t pa) return (0); } -static void +void dump_add_page(vm_paddr_t pa) { int idx, bit; @@ -94,7 +94,7 @@ dump_add_page(vm_paddr_t pa) atomic_set_int(&vm_page_dump[idx], 1ul << bit); } -static void +void dump_drop_page(vm_paddr_t pa) { int idx, bit; diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index 2b993cb..41de5fb 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -164,7 +164,7 @@ mips_ipi_handler(void *arg) break; case IPI_HARDCLOCK: CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__); - hardclockintr();; + hardclockintr(); break; default: panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu); diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 4e2b34e..7b0d09b 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -185,8 +185,6 @@ static vm_page_t pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags); static vm_page_t _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags); static int pmap_unuse_pt(pmap_t, vm_offset_t, vm_page_t); static int init_pte_prot(vm_offset_t va, vm_page_t m, vm_prot_t prot); -static vm_page_t pmap_alloc_pte_page(unsigned int index, int req); -static void pmap_grow_pte_page_cache(void); #ifdef SMP static void pmap_invalidate_page_action(void *arg); @@ -196,14 +194,15 @@ static void pmap_update_page_action(void *arg); #ifndef __mips_n64 /* - * This structure is for high memory (memory above 512Meg in 32 bit) - * This memory area does not have direct mapping, so we a mechanism to do - * temporary per-CPU mapping to access these addresses. + * This structure is for high memory (memory above 512Meg in 32 bit) support. + * The highmem area does not have a KSEG0 mapping, and we need a mechanism to + * do temporary per-CPU mappings for pmap_zero_page, pmap_copy_page etc. * - * At bootup we reserve 2 virtual pages per CPU for mapping highmem pages, to - * access a highmem physical address on a CPU, we will disable interrupts and - * add the mapping from the reserved virtual address for the CPU to the physical - * address in the kernel pagetable. + * At bootup, we reserve 2 virtual pages per CPU for mapping highmem pages. To + * access a highmem physical address on a CPU, we map the physical address to + * the reserved virtual address for the CPU in the kernel pagetable. This is + * done with interrupts disabled(although a spinlock and sched_pin would be + * sufficient). */ struct local_sysmaps { vm_offset_t base; @@ -520,11 +519,11 @@ again: } /* - * In 32 bit, we may have memory which cannot be mapped directly - * this memory will need temporary mapping before it can be + * In 32 bit, we may have memory which cannot be mapped directly. + * This memory will need temporary mapping before it can be * accessed. */ - if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1])) + if (!MIPS_DIRECT_MAPPABLE(phys_avail[i - 1] - 1)) need_local_mappings = 1; /* @@ -893,7 +892,7 @@ pmap_map(vm_offset_t *virt, vm_offset_t start, vm_offset_t end, int prot) { vm_offset_t va, sva; - if (MIPS_DIRECT_MAPPABLE(end)) + if (MIPS_DIRECT_MAPPABLE(end - 1)) return (MIPS_PHYS_TO_DIRECT(start)); va = sva = *virt; @@ -1061,8 +1060,8 @@ pmap_pinit0(pmap_t pmap) bzero(&pmap->pm_stats, sizeof pmap->pm_stats); } -static void -pmap_grow_pte_page_cache() +void +pmap_grow_direct_page_cache() { #ifdef __mips_n64 @@ -1072,8 +1071,8 @@ pmap_grow_pte_page_cache() #endif } -static vm_page_t -pmap_alloc_pte_page(unsigned int index, int req) +vm_page_t +pmap_alloc_direct_page(unsigned int index, int req) { vm_page_t m; @@ -1106,8 +1105,8 @@ pmap_pinit(pmap_t pmap) /* * allocate the page directory page */ - while ((ptdpg = pmap_alloc_pte_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) - pmap_grow_pte_page_cache(); + while ((ptdpg = pmap_alloc_direct_page(NUSERPGTBLS, VM_ALLOC_NORMAL)) == NULL) + pmap_grow_direct_page_cache(); ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); pmap->pm_segtab = (pd_entry_t *)ptdva; @@ -1140,11 +1139,11 @@ _pmap_allocpte(pmap_t pmap, unsigned ptepindex, int flags) /* * Find or fabricate a new pagetable page */ - if ((m = pmap_alloc_pte_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { + if ((m = pmap_alloc_direct_page(ptepindex, VM_ALLOC_NORMAL)) == NULL) { if (flags & M_WAITOK) { PMAP_UNLOCK(pmap); vm_page_unlock_queues(); - pmap_grow_pte_page_cache(); + pmap_grow_direct_page_cache(); vm_page_lock_queues(); PMAP_LOCK(pmap); } @@ -1312,7 +1311,7 @@ pmap_growkernel(vm_offset_t addr) #ifdef __mips_n64 if (*pdpe == 0) { /* new intermediate page table entry */ - nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); + nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); if (nkpg == NULL) panic("pmap_growkernel: no memory to grow kernel"); *pdpe = (pd_entry_t)MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(nkpg)); @@ -1332,7 +1331,7 @@ pmap_growkernel(vm_offset_t addr) /* * This index is bogus, but out of the way */ - nkpg = pmap_alloc_pte_page(nkpt, VM_ALLOC_INTERRUPT); + nkpg = pmap_alloc_direct_page(nkpt, VM_ALLOC_INTERRUPT); if (!nkpg) panic("pmap_growkernel: no memory to grow kernel"); nkpt++; @@ -3099,7 +3098,7 @@ pads(pmap_t pm) va >= VM_MAXUSER_ADDRESS) continue; ptep = pmap_pte(pm, va); - if (pmap_pte_v(ptep)) + if (pte_test(ptep, PTE_V)) printf("%x:%x ", va, *(int *)ptep); } diff --git a/sys/mips/mips/uma_machdep.c b/sys/mips/mips/uma_machdep.c new file mode 100644 index 0000000..690fc23 --- /dev/null +++ b/sys/mips/mips/uma_machdep.c @@ -0,0 +1,87 @@ +/*- + * Copyright (c) 2003 Alan L. Cox <alc@cs.rice.edu> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/systm.h> +#include <vm/vm.h> +#include <vm/vm_page.h> +#include <vm/vm_pageout.h> +#include <vm/uma.h> +#include <vm/uma_int.h> +#include <machine/md_var.h> +#include <machine/vmparam.h> + +void * +uma_small_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +{ + static vm_pindex_t color; + vm_paddr_t pa; + vm_page_t m; + int pflags; + void *va; + + *flags = UMA_SLAB_PRIV; + + if ((wait & (M_NOWAIT|M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT; + else + pflags = VM_ALLOC_SYSTEM; + + for (;;) { + m = pmap_alloc_direct_page(color++, pflags); + if (m == NULL) { + if (wait & M_NOWAIT) + return (NULL); + else + pmap_grow_direct_page_cache(); + } else + break; + } + + pa = VM_PAGE_TO_PHYS(m); + va = (void *)MIPS_PHYS_TO_DIRECT(pa); + if ((wait & M_ZERO) && (m->flags & PG_ZERO) == 0) + bzero(va, PAGE_SIZE); + return (va); +} + +void +uma_small_free(void *mem, int size, u_int8_t flags) +{ + vm_page_t m; + vm_paddr_t pa; + + pa = MIPS_DIRECT_TO_PHYS((vm_offset_t)mem); + m = PHYS_TO_VM_PAGE(pa); + m->wire_count--; + vm_page_free(m); + atomic_subtract_int(&cnt.v_wire_count, 1); +} diff --git a/sys/mips/mips/vm_machdep.c b/sys/mips/mips/vm_machdep.c index 73fab5a..b4059e6 100644 --- a/sys/mips/mips/vm_machdep.c +++ b/sys/mips/mips/vm_machdep.c @@ -538,6 +538,9 @@ sf_buf_free(struct sf_buf *sf) void swi_vm(void *dummy) { + + if (busdma_swi_pending) + busdma_swi(); } int diff --git a/sys/mips/rmi/dev/nlge/if_nlge.c b/sys/mips/rmi/dev/nlge/if_nlge.c index 9ff88ea..5c3dd2f 100644 --- a/sys/mips/rmi/dev/nlge/if_nlge.c +++ b/sys/mips/rmi/dev/nlge/if_nlge.c @@ -213,7 +213,6 @@ static int send_fmn_msg_tx(struct nlge_softc *, struct msgrng_msg *, //#define DEBUG #ifdef DEBUG static int mac_debug = 1; -static int reg_dump = 0; #undef PDEBUG #define PDEBUG(fmt, args...) \ do {\ diff --git a/sys/mips/rmi/xlr_machdep.c b/sys/mips/rmi/xlr_machdep.c index 8f96633..5a5bb79 100644 --- a/sys/mips/rmi/xlr_machdep.c +++ b/sys/mips/rmi/xlr_machdep.c @@ -363,6 +363,9 @@ xlr_mem_init(void) (void *)phys_avail[0], (void *)phys_avail[1]); + dump_avail[0] = phys_avail[0]; + dump_avail[1] = phys_avail[1]; + } else { /* * Can't use this code yet, because most of the fixed allocations happen from @@ -390,6 +393,10 @@ xlr_mem_init(void) (void *)phys_avail[j], (void *)phys_avail[j+1]); } + + dump_avail[j] = phys_avail[j]; + dump_avail[j+1] = phys_avail[j+1]; + physsz += boot_map->physmem_map[i].size; } } diff --git a/sys/mips/rmi/xlr_pci.c b/sys/mips/rmi/xlr_pci.c index 3204691..8cdaede 100644 --- a/sys/mips/rmi/xlr_pci.c +++ b/sys/mips/rmi/xlr_pci.c @@ -105,13 +105,6 @@ __FBSDID("$FreeBSD$"); (MSI_MIPS_DATA_TRGRLVL | MSI_MIPS_DATA_DELFIXED | \ MSI_MIPS_DATA_ASSERT | (irq)) -#define DEBUG -#ifdef DEBUG -#define dbg_devprintf device_printf -#else -#define dbg_devprintf(dev, fmt, ...) -#endif - struct xlr_pcib_softc { bus_dma_tag_t sc_pci_dmat; /* PCI DMA tag pointer */ }; diff --git a/sys/mips/sentry5/s5_machdep.c b/sys/mips/sentry5/s5_machdep.c index 00e6231..4491b93 100644 --- a/sys/mips/sentry5/s5_machdep.c +++ b/sys/mips/sentry5/s5_machdep.c @@ -91,7 +91,7 @@ platform_cpu_init() static void mips_init(void) { - int i; + int i, j; printf("entry: mips_init()\n"); @@ -128,6 +128,9 @@ mips_init(void) realmem = btoc(physmem); #endif + for (j = 0; j < i; j++) + dump_avail[j] = phys_avail[j]; + physmem = realmem; init_param1(); diff --git a/sys/mips/sibyte/sb_machdep.c b/sys/mips/sibyte/sb_machdep.c index ba4b62e9..ac30451 100644 --- a/sys/mips/sibyte/sb_machdep.c +++ b/sys/mips/sibyte/sb_machdep.c @@ -138,7 +138,7 @@ sb_intr_init(int cpuid) static void mips_init(void) { - int i, cfe_mem_idx, tmp; + int i, j, cfe_mem_idx, tmp; uint64_t maxmem; #ifdef CFE_ENV @@ -225,6 +225,9 @@ mips_init(void) realmem = btoc(physmem); #endif + for (j = 0; j < i; j++) + dump_avail[j] = phys_avail[j]; + physmem = realmem; init_param1(); diff --git a/sys/net/if.h b/sys/net/if.h index a99b4a7..d291da8 100644 --- a/sys/net/if.h +++ b/sys/net/if.h @@ -145,7 +145,7 @@ struct if_data { #define IFF_LINK2 0x4000 /* per link layer defined bit */ #define IFF_ALTPHYS IFF_LINK2 /* use alternate physical connection */ #define IFF_MULTICAST 0x8000 /* (i) supports multicast */ -/* 0x10000 */ +#define IFF_CANTCONFIG 0x10000 /* (i) unconfigurable using ioctl(2) */ #define IFF_PPROMISC 0x20000 /* (n) user-requested promisc mode */ #define IFF_MONITOR 0x40000 /* (n) user-requested monitor mode */ #define IFF_STATICARP 0x80000 /* (n) static ARP */ @@ -165,7 +165,7 @@ struct if_data { #define IFF_CANTCHANGE \ (IFF_BROADCAST|IFF_POINTOPOINT|IFF_DRV_RUNNING|IFF_DRV_OACTIVE|\ IFF_SIMPLEX|IFF_MULTICAST|IFF_ALLMULTI|IFF_SMART|IFF_PROMISC|\ - IFF_DYING) + IFF_DYING|IFF_CANTCONFIG) /* * Values for if_link_state. diff --git a/sys/netinet/ip_fastfwd.c b/sys/netinet/ip_fastfwd.c index 0399393..a1adb85 100644 --- a/sys/netinet/ip_fastfwd.c +++ b/sys/netinet/ip_fastfwd.c @@ -218,7 +218,7 @@ ip_fastforward(struct mbuf *m) */ hlen = ip->ip_hl << 2; if (hlen < sizeof(struct ip)) { /* minimum header length */ - IPSTAT_INC(ips_badlen); + IPSTAT_INC(ips_badhlen); goto drop; } if (hlen > m->m_len) { diff --git a/sys/netinet/sctp_indata.c b/sys/netinet/sctp_indata.c index 05b40f7..bd484a7 100644 --- a/sys/netinet/sctp_indata.c +++ b/sys/netinet/sctp_indata.c @@ -3081,14 +3081,17 @@ sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct int num_frs = 0; int chunk_freed; int non_revocable; - uint16_t frag_strt, frag_end; - uint32_t last_frag_high; + uint16_t frag_strt, frag_end, prev_frag_end; - tp1 = NULL; - last_frag_high = 0; + tp1 = TAILQ_FIRST(&asoc->sent_queue); + prev_frag_end = 0; chunk_freed = 0; for (i = 0; i < (num_seg + num_nr_seg); i++) { + if (i == num_seg) { + prev_frag_end = 0; + tp1 = TAILQ_FIRST(&asoc->sent_queue); + } frag = (struct sctp_gap_ack_block *)sctp_m_getptr(m, *offset, sizeof(struct sctp_gap_ack_block), (uint8_t *) & block); *offset += sizeof(block); @@ -3097,58 +3100,29 @@ sctp_handle_segments(struct mbuf *m, int *offset, struct sctp_tcb *stcb, struct } frag_strt = ntohs(frag->start); frag_end = ntohs(frag->end); - /* some sanity checks on the fragment offsets */ + if (frag_strt > frag_end) { - /* this one is malformed, skip */ + /* This gap report is malformed, skip it. */ continue; } - if (compare_with_wrap((frag_end + last_tsn), *biggest_tsn_acked, - MAX_TSN)) - *biggest_tsn_acked = frag_end + last_tsn; - - /* mark acked dgs and find out the highestTSN being acked */ - if (tp1 == NULL) { + if (frag_strt <= prev_frag_end) { + /* This gap report is not in order, so restart. */ tp1 = TAILQ_FIRST(&asoc->sent_queue); - /* save the locations of the last frags */ - last_frag_high = frag_end + last_tsn; - } else { - /* - * now lets see if we need to reset the queue due to - * a out-of-order SACK fragment - */ - if (compare_with_wrap(frag_strt + last_tsn, - last_frag_high, MAX_TSN)) { - /* - * if the new frag starts after the last TSN - * frag covered, we are ok and this one is - * beyond the last one - */ - ; - } else { - /* - * ok, they have reset us, so we need to - * reset the queue this will cause extra - * hunting but hey, they chose the - * performance hit when they failed to order - * their gaps - */ - tp1 = TAILQ_FIRST(&asoc->sent_queue); - } - last_frag_high = frag_end + last_tsn; + } + if (compare_with_wrap((last_tsn + frag_end), *biggest_tsn_acked, MAX_TSN)) { + *biggest_tsn_acked = last_tsn + frag_end; } if (i < num_seg) { non_revocable = 0; } else { non_revocable = 1; } - if (i == num_seg) { - tp1 = NULL; - } if (sctp_process_segment_range(stcb, &tp1, last_tsn, frag_strt, frag_end, non_revocable, &num_frs, biggest_newly_acked_tsn, this_sack_lowest_newack, ecn_seg_sums)) { chunk_freed = 1; } + prev_frag_end = frag_end; } if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_FR_LOGGING_ENABLE) { if (num_frs) @@ -4817,7 +4791,7 @@ sctp_handle_sack(struct mbuf *m, int offset_seg, int offset_dup, } } /********************************************/ - /* drop the acked chunks from the sendqueue */ + /* drop the acked chunks from the sentqueue */ /********************************************/ asoc->last_acked_seq = cum_ack; @@ -4925,9 +4899,10 @@ done_with_it: * we had some before and now we have NONE. */ - if (num_seg) + if (num_seg) { sctp_check_for_revoked(stcb, asoc, cum_ack, biggest_tsn_acked); - else if (asoc->saw_sack_with_frags) { + asoc->saw_sack_with_frags = 1; + } else if (asoc->saw_sack_with_frags) { int cnt_revoked = 0; tp1 = TAILQ_FIRST(&asoc->sent_queue); @@ -4963,10 +4938,10 @@ done_with_it: } asoc->saw_sack_with_frags = 0; } - if (num_seg || num_nr_seg) - asoc->saw_sack_with_frags = 1; + if (num_nr_seg > 0) + asoc->saw_sack_with_nr_frags = 1; else - asoc->saw_sack_with_frags = 0; + asoc->saw_sack_with_nr_frags = 0; /* JRS - Use the congestion control given in the CC module */ asoc->cc_functions.sctp_cwnd_update_after_sack(stcb, asoc, accum_moved, reneged_all, will_exit_fast_recovery); diff --git a/sys/netinet/sctp_input.c b/sys/netinet/sctp_input.c index ad4209f..88d67a3 100644 --- a/sys/netinet/sctp_input.c +++ b/sys/netinet/sctp_input.c @@ -4644,6 +4644,7 @@ process_control_chunks: ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) || (cum_ack == stcb->asoc.last_acked_seq)) && (stcb->asoc.saw_sack_with_frags == 0) && + (stcb->asoc.saw_sack_with_nr_frags == 0) && (!TAILQ_EMPTY(&stcb->asoc.sent_queue)) ) { /* @@ -4737,6 +4738,7 @@ process_control_chunks: ((compare_with_wrap(cum_ack, stcb->asoc.last_acked_seq, MAX_TSN)) || (cum_ack == stcb->asoc.last_acked_seq)) && (stcb->asoc.saw_sack_with_frags == 0) && + (stcb->asoc.saw_sack_with_nr_frags == 0) && (!TAILQ_EMPTY(&stcb->asoc.sent_queue))) { /* * We have a SIMPLE sack having no diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index 56f4946..94d0395 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -1058,6 +1058,7 @@ struct sctp_association { uint8_t delayed_connection; uint8_t ifp_had_enobuf; uint8_t saw_sack_with_frags; + uint8_t saw_sack_with_nr_frags; uint8_t in_asocid_hash; uint8_t assoc_up_sent; uint8_t adaptation_needed; diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index c4ff308..fe8bada 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1053,15 +1053,6 @@ nd6_free(struct llentry *ln, int gc) return (next); } - if (ln->ln_router || dr) { - /* - * rt6_flush must be called whether or not the neighbor - * is in the Default Router List. - * See a corresponding comment in nd6_na_input(). - */ - rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); - } - if (dr) { /* * Unreachablity of a router might affect the default @@ -1077,8 +1068,28 @@ nd6_free(struct llentry *ln, int gc) * or the entry itself will be deleted. */ ln->ln_state = ND6_LLINFO_INCOMPLETE; + } + + if (ln->ln_router || dr) { /* + * We need to unlock to avoid a LOR with rt6_flush() with the + * rnh and for the calls to pfxlist_onlink_check() and + * defrouter_select() in the block further down for calls + * into nd6_lookup(). We still hold a ref. + */ + LLE_WUNLOCK(ln); + + /* + * rt6_flush must be called whether or not the neighbor + * is in the Default Router List. + * See a corresponding comment in nd6_na_input(). + */ + rt6_flush(&L3_ADDR_SIN6(ln)->sin6_addr, ifp); + } + + if (dr) { + /* * Since defrouter_select() does not affect the * on-link determination and MIP6 needs the check * before the default router selection, we perform @@ -1087,13 +1098,13 @@ nd6_free(struct llentry *ln, int gc) pfxlist_onlink_check(); /* - * Refresh default router list. Have to unlock as - * it calls into nd6_lookup(), still holding a ref. + * Refresh default router list. */ - LLE_WUNLOCK(ln); defrouter_select(); - LLE_WLOCK(ln); } + + if (ln->ln_router || dr) + LLE_WLOCK(ln); } /* diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 03933a6..5a27646 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1092,12 +1092,16 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) thread_unlock(curthread); #endif + tsc2 -= tsc1; + if (tsc_freq != 0 && !tsc_is_broken) { + *rate = tsc2 * 1000; + return (0); + } + /* - * Calculate the difference in readings, convert to Mhz, and - * subtract 0.5% of the total. Empirical testing has shown that + * Subtract 0.5% of the total. Empirical testing has shown that * overhead in DELAY() works out to approximately this value. */ - tsc2 -= tsc1; *rate = tsc2 * 1000 - tsc2 * 5; return (0); } diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 7bd07e1..6f99081 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -161,24 +161,6 @@ __FBSDID("$FreeBSD$"); #define VSID_TO_SR(vsid) ((vsid) & 0xf) #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) -#define PVO_PTEGIDX_MASK 0x007 /* which PTEG slot */ -#define PVO_PTEGIDX_VALID 0x008 /* slot is valid */ -#define PVO_WIRED 0x010 /* PVO entry is wired */ -#define PVO_MANAGED 0x020 /* PVO entry is managed */ -#define PVO_EXECUTABLE 0x040 /* PVO entry is executable */ -#define PVO_BOOTSTRAP 0x080 /* PVO entry allocated during - bootstrap */ -#define PVO_FAKE 0x100 /* fictitious phys page */ -#define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF) -#define PVO_ISEXECUTABLE(pvo) ((pvo)->pvo_vaddr & PVO_EXECUTABLE) -#define PVO_ISFAKE(pvo) ((pvo)->pvo_vaddr & PVO_FAKE) -#define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK) -#define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID) -#define PVO_PTEGIDX_CLR(pvo) \ - ((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK))) -#define PVO_PTEGIDX_SET(pvo, i) \ - ((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID)) - #define MOEA_PVO_CHECK(pvo) struct ofw_map { diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 506676f..cd791f8 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -155,79 +155,13 @@ __FBSDID("$FreeBSD$"); #include <machine/trap.h> #include <machine/mmuvar.h> +#include "mmu_oea64.h" #include "mmu_if.h" +#include "moea64_if.h" -#define MOEA_DEBUG - -#define TODO panic("%s: not implemented", __func__); void moea64_release_vsid(uint64_t vsid); uintptr_t moea64_get_unique_vsid(void); -static __inline register_t -cntlzd(volatile register_t a) { - register_t b; - __asm ("cntlzd %0, %1" : "=r"(b) : "r"(a)); - return b; -} - -#define PTESYNC() __asm __volatile("ptesync"); -#define TLBSYNC() __asm __volatile("tlbsync; ptesync"); -#define SYNC() __asm __volatile("sync"); -#define EIEIO() __asm __volatile("eieio"); - -/* - * The tlbie instruction must be executed in 64-bit mode - * so we have to twiddle MSR[SF] around every invocation. - * Just to add to the fun, exceptions must be off as well - * so that we can't trap in 64-bit mode. What a pain. - */ -struct mtx tlbie_mutex; - -static __inline void -TLBIE(uint64_t vpn) { -#ifndef __powerpc64__ - register_t vpn_hi, vpn_lo; - register_t msr; - register_t scratch; -#endif - - vpn <<= ADDR_PIDX_SHFT; - vpn &= ~(0xffffULL << 48); - - mtx_lock_spin(&tlbie_mutex); -#ifdef __powerpc64__ - __asm __volatile("\ - ptesync; \ - tlbie %0; \ - eieio; \ - tlbsync; \ - ptesync;" - :: "r"(vpn) : "memory"); -#else - vpn_hi = (uint32_t)(vpn >> 32); - vpn_lo = (uint32_t)vpn; - - __asm __volatile("\ - mfmsr %0; \ - mr %1, %0; \ - insrdi %1,%5,1,0; \ - mtmsrd %1; isync; \ - ptesync; \ - \ - sld %1,%2,%4; \ - or %1,%1,%3; \ - tlbie %1; \ - \ - mtmsrd %0; isync; \ - eieio; \ - tlbsync; \ - ptesync;" - : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) - : "memory"); -#endif - mtx_unlock_spin(&tlbie_mutex); -} - #define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR); isync() #define ENABLE_TRANS(msr) mtmsr(msr); isync() @@ -235,24 +169,6 @@ TLBIE(uint64_t vpn) { #define VSID_TO_HASH(vsid) (((vsid) >> 4) & 0xfffff) #define VSID_HASH_MASK 0x0000007fffffffffULL -#define PVO_PTEGIDX_MASK 0x007UL /* which PTEG slot */ -#define PVO_PTEGIDX_VALID 0x008UL /* slot is valid */ -#define PVO_WIRED 0x010UL /* PVO entry is wired */ -#define PVO_MANAGED 0x020UL /* PVO entry is managed */ -#define PVO_BOOTSTRAP 0x080UL /* PVO entry allocated during - bootstrap */ -#define PVO_FAKE 0x100UL /* fictitious phys page */ -#define PVO_LARGE 0x200UL /* large page */ -#define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF) -#define PVO_ISFAKE(pvo) ((pvo)->pvo_vaddr & PVO_FAKE) -#define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK) -#define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID) -#define PVO_PTEGIDX_CLR(pvo) \ - ((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK))) -#define PVO_PTEGIDX_SET(pvo, i) \ - ((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID)) -#define PVO_VSID(pvo) ((pvo)->pvo_vpn >> 16) - #define MOEA_PVO_CHECK(pvo) #define LOCK_TABLE() mtx_lock(&moea64_table_mutex) @@ -277,7 +193,6 @@ static int regions_sz, pregions_sz; extern void bs_remap_earlyboot(void); - /* * Lock for the pteg and pvo tables. */ @@ -287,7 +202,6 @@ struct mtx moea64_slb_mutex; /* * PTEG data. */ -static struct lpteg *moea64_pteg_table; u_int moea64_pteg_count; u_int moea64_pteg_mask; @@ -337,8 +251,8 @@ SYSCTL_INT(_machdep, OID_AUTO, moea64_pvo_remove_calls, CTLFLAG_RD, &moea64_pvo_remove_calls, 0, ""); vm_offset_t moea64_scratchpage_va[2]; -uint64_t moea64_scratchpage_vpn[2]; -struct lpte *moea64_scratchpage_pte[2]; +struct pvo_entry *moea64_scratchpage_pvo[2]; +uintptr_t moea64_scratchpage_pte[2]; struct mtx moea64_scratchpage_mtx; uint64_t moea64_large_page_mask = 0; @@ -346,41 +260,23 @@ int moea64_large_page_size = 0; int moea64_large_page_shift = 0; /* - * Allocate physical memory for use in moea64_bootstrap. - */ -static vm_offset_t moea64_bootstrap_alloc(vm_size_t, u_int); - -/* - * PTE calls. - */ -static int moea64_pte_insert(u_int, struct lpte *); - -/* * PVO calls. */ -static int moea64_pvo_enter(pmap_t, uma_zone_t, struct pvo_head *, +static int moea64_pvo_enter(mmu_t, pmap_t, uma_zone_t, struct pvo_head *, vm_offset_t, vm_offset_t, uint64_t, int); -static void moea64_pvo_remove(struct pvo_entry *); +static void moea64_pvo_remove(mmu_t, struct pvo_entry *); static struct pvo_entry *moea64_pvo_find_va(pmap_t, vm_offset_t); -static struct lpte *moea64_pvo_to_pte(const struct pvo_entry *); /* * Utility routines. */ -static void moea64_bootstrap(mmu_t mmup, - vm_offset_t kernelstart, vm_offset_t kernelend); -static void moea64_cpu_bootstrap(mmu_t, int ap); -static void moea64_enter_locked(pmap_t, vm_offset_t, vm_page_t, - vm_prot_t, boolean_t); -static boolean_t moea64_query_bit(vm_page_t, u_int64_t); -static u_int moea64_clear_bit(vm_page_t, u_int64_t); +static void moea64_enter_locked(mmu_t, pmap_t, vm_offset_t, + vm_page_t, vm_prot_t, boolean_t); +static boolean_t moea64_query_bit(mmu_t, vm_page_t, u_int64_t); +static u_int moea64_clear_bit(mmu_t, vm_page_t, u_int64_t); static void moea64_kremove(mmu_t, vm_offset_t); -static void moea64_syncicache(pmap_t pmap, vm_offset_t va, +static void moea64_syncicache(mmu_t, pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz); -static void tlbia(void); -#ifdef __powerpc64__ -static void slbia(void); -#endif /* * Kernel MMU interface @@ -463,8 +359,6 @@ static mmu_method_t moea64_methods[] = { MMUMETHOD(mmu_page_set_memattr, moea64_page_set_memattr), /* Internal interfaces */ - MMUMETHOD(mmu_bootstrap, moea64_bootstrap), - MMUMETHOD(mmu_cpu_bootstrap, moea64_cpu_bootstrap), MMUMETHOD(mmu_mapdev, moea64_mapdev), MMUMETHOD(mmu_mapdev_attr, moea64_mapdev_attr), MMUMETHOD(mmu_unmapdev, moea64_unmapdev), @@ -476,7 +370,7 @@ static mmu_method_t moea64_methods[] = { { 0, 0 } }; -MMU_DEF(oea64_mmu, MMU_TYPE_G5, moea64_methods, 0); +MMU_DEF(oea64_mmu, "mmu_oea64_base", moea64_methods, 0); static __inline u_int va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) @@ -542,81 +436,6 @@ moea64_pte_create(struct lpte *pt, uint64_t vsid, vm_offset_t va, pt->pte_lo = pte_lo; } -static __inline void -moea64_pte_synch(struct lpte *pt, struct lpte *pvo_pt) -{ - - ASSERT_TABLE_LOCK(); - - pvo_pt->pte_lo |= pt->pte_lo & (LPTE_REF | LPTE_CHG); -} - -static __inline void -moea64_pte_clear(struct lpte *pt, uint64_t vpn, u_int64_t ptebit) -{ - ASSERT_TABLE_LOCK(); - - /* - * As shown in Section 7.6.3.2.3 - */ - pt->pte_lo &= ~ptebit; - TLBIE(vpn); -} - -static __inline void -moea64_pte_set(struct lpte *pt, struct lpte *pvo_pt) -{ - - ASSERT_TABLE_LOCK(); - pvo_pt->pte_hi |= LPTE_VALID; - - /* - * Update the PTE as defined in section 7.6.3.1. - * Note that the REF/CHG bits are from pvo_pt and thus should have - * been saved so this routine can restore them (if desired). - */ - pt->pte_lo = pvo_pt->pte_lo; - EIEIO(); - pt->pte_hi = pvo_pt->pte_hi; - PTESYNC(); - moea64_pte_valid++; -} - -static __inline void -moea64_pte_unset(struct lpte *pt, struct lpte *pvo_pt, uint64_t vpn) -{ - ASSERT_TABLE_LOCK(); - pvo_pt->pte_hi &= ~LPTE_VALID; - - /* - * Force the reg & chg bits back into the PTEs. - */ - SYNC(); - - /* - * Invalidate the pte. - */ - pt->pte_hi &= ~LPTE_VALID; - TLBIE(vpn); - - /* - * Save the reg & chg bits. - */ - moea64_pte_synch(pt, pvo_pt); - moea64_pte_valid--; -} - -static __inline void -moea64_pte_change(struct lpte *pt, struct lpte *pvo_pt, uint64_t vpn) -{ - - /* - * Invalidate the PTE - */ - moea64_pte_unset(pt, pvo_pt, vpn); - moea64_pte_set(pt, pvo_pt); -} - static __inline uint64_t moea64_calc_wimg(vm_offset_t pa, vm_memattr_t ma) { @@ -696,49 +515,6 @@ om_cmp(const void *a, const void *b) } static void -moea64_cpu_bootstrap(mmu_t mmup, int ap) -{ - int i = 0; - #ifdef __powerpc64__ - struct slb *slb = PCPU_GET(slb); - #endif - - /* - * Initialize segment registers and MMU - */ - - mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); isync(); - - /* - * Install kernel SLB entries - */ - - #ifdef __powerpc64__ - slbia(); - - for (i = 0; i < 64; i++) { - if (!(slb[i].slbe & SLBE_VALID)) - continue; - - __asm __volatile ("slbmte %0, %1" :: - "r"(slb[i].slbv), "r"(slb[i].slbe)); - } - #else - for (i = 0; i < 16; i++) - mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); - #endif - - /* - * Install page table - */ - - __asm __volatile ("ptesync; mtsdr1 %0; isync" - :: "r"((uintptr_t)moea64_pteg_table - | (64 - cntlzd(moea64_pteg_mask >> 11)))); - tlbia(); -} - -static void moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz) { struct ofw_map translations[sz/sizeof(struct ofw_map)]; @@ -874,7 +650,7 @@ moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, pregions[i].mr_start + pregions[i].mr_size) pte_lo |= LPTE_G; - moea64_pvo_enter(kernel_pmap, moea64_upvo_zone, + moea64_pvo_enter(mmup, kernel_pmap, moea64_upvo_zone, &moea64_pvo_kunmanaged, pa, pa, pte_lo, PVO_WIRED | PVO_LARGE | VM_PROT_EXECUTE); @@ -882,10 +658,6 @@ moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, } PMAP_UNLOCK(kernel_pmap); } else { - size = moea64_pteg_count * sizeof(struct lpteg); - off = (vm_offset_t)(moea64_pteg_table); - for (pa = off; pa < off + size; pa += PAGE_SIZE) - moea64_kenter(mmup, pa, pa); size = sizeof(struct pvo_head) * moea64_pteg_count; off = (vm_offset_t)(moea64_pvo_table); for (pa = off; pa < off + size; pa += PAGE_SIZE) @@ -911,18 +683,11 @@ moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart, ENABLE_TRANS(msr); } -static void -moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) +void +moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) { - ihandle_t mmui; - phandle_t chosen; - phandle_t mmu; - size_t sz; int i, j; - vm_size_t size, physsz, hwphyssz; - vm_offset_t pa, va; - register_t msr; - void *dpcpu; + vm_size_t physsz, hwphyssz; #ifndef __powerpc64__ /* We don't have a direct map since there is no BAT */ @@ -1009,9 +774,6 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) physmem = btoc(physsz); - /* - * Allocate PTEG table. - */ #ifdef PTEGCOUNT moea64_pteg_count = PTEGCOUNT; #else @@ -1022,27 +784,20 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) moea64_pteg_count >>= 1; #endif /* PTEGCOUNT */ +} - size = moea64_pteg_count * sizeof(struct lpteg); - CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", - moea64_pteg_count, size); +void +moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) +{ + vm_size_t size; + register_t msr; + int i; /* - * We now need to allocate memory. This memory, to be allocated, - * has to reside in a page table. The page table we are about to - * allocate. We don't have BAT. So drop to data real mode for a minute - * as a measure of last resort. We do this a couple times. + * Set PTEG mask */ - - moea64_pteg_table = (struct lpteg *)moea64_bootstrap_alloc(size, size); - DISABLE_TRANS(msr); - bzero((void *)moea64_pteg_table, moea64_pteg_count * sizeof(struct lpteg)); - ENABLE_TRANS(msr); - moea64_pteg_mask = moea64_pteg_count - 1; - CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); - /* * Allocate pv/overflow lists. */ @@ -1066,11 +821,6 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) mtx_init(&moea64_slb_mutex, "SLB table", NULL, MTX_DEF); /* - * Initialize the TLBIE lock. TLBIE can only be executed by one CPU. - */ - mtx_init(&tlbie_mutex, "tlbie mutex", NULL, MTX_SPIN); - - /* * Initialise the unmanaged pvo pool. */ moea64_bpvo_pool = (struct pvo_entry *)moea64_bootstrap_alloc( @@ -1109,6 +859,18 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) */ moea64_setup_direct_map(mmup, kernelstart, kernelend); +} + +void +moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) +{ + ihandle_t mmui; + phandle_t chosen; + phandle_t mmu; + size_t sz; + int i; + vm_offset_t pa, va; + void *dpcpu; /* * Set up the Open Firmware pmap and add its mappings if not in real @@ -1137,7 +899,7 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) /* * Initialize MMU and remap early physical mappings */ - moea64_cpu_bootstrap(mmup,0); + MMU_CPU_BOOTSTRAP(mmup,0); mtmsr(mfmsr() | PSL_DR | PSL_IR); isync(); pmap_bootstrapped++; bs_remap_earlyboot(); @@ -1173,47 +935,6 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) #endif /* - * Allocate some things for page zeroing. We put this directly - * in the page table, marked with LPTE_LOCKED, to avoid any - * of the PVO book-keeping or other parts of the VM system - * from even knowing that this hack exists. - */ - - if (!hw_direct_map) { - mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, - MTX_DEF); - for (i = 0; i < 2; i++) { - struct lpte pt; - uint64_t vsid; - int pteidx, ptegidx; - - moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; - virtual_end -= PAGE_SIZE; - - LOCK_TABLE(); - - vsid = va_to_vsid(kernel_pmap, - moea64_scratchpage_va[i]); - moea64_pte_create(&pt, vsid, moea64_scratchpage_va[i], - LPTE_NOEXEC, 0); - pt.pte_hi |= LPTE_LOCKED; - - moea64_scratchpage_vpn[i] = (vsid << 16) | - ((moea64_scratchpage_va[i] & ADDR_PIDX) >> - ADDR_PIDX_SHFT); - ptegidx = va_to_pteg(vsid, moea64_scratchpage_va[i], 0); - pteidx = moea64_pte_insert(ptegidx, &pt); - if (pt.pte_hi & LPTE_HID) - ptegidx ^= moea64_pteg_mask; - - moea64_scratchpage_pte[i] = - &moea64_pteg_table[ptegidx].pt[pteidx]; - - UNLOCK_TABLE(); - } - } - - /* * Allocate a kernel stack with a guard page for thread0 and map it * into the kernel page map. */ @@ -1255,6 +976,36 @@ moea64_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) va += PAGE_SIZE; } dpcpu_init(dpcpu, 0); + + /* + * Allocate some things for page zeroing. We put this directly + * in the page table, marked with LPTE_LOCKED, to avoid any + * of the PVO book-keeping or other parts of the VM system + * from even knowing that this hack exists. + */ + + if (!hw_direct_map) { + mtx_init(&moea64_scratchpage_mtx, "pvo zero page", NULL, + MTX_DEF); + for (i = 0; i < 2; i++) { + moea64_scratchpage_va[i] = (virtual_end+1) - PAGE_SIZE; + virtual_end -= PAGE_SIZE; + + moea64_kenter(mmup, moea64_scratchpage_va[i], 0); + + moea64_scratchpage_pvo[i] = moea64_pvo_find_va( + kernel_pmap, (vm_offset_t)moea64_scratchpage_va[i]); + LOCK_TABLE(); + moea64_scratchpage_pte[i] = MOEA64_PVO_TO_PTE( + mmup, moea64_scratchpage_pvo[i]); + moea64_scratchpage_pvo[i]->pvo_pte.lpte.pte_hi + |= LPTE_LOCKED; + MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[i], + &moea64_scratchpage_pvo[i]->pvo_pte.lpte, + moea64_scratchpage_pvo[i]->pvo_vpn); + UNLOCK_TABLE(); + } + } } /* @@ -1294,7 +1045,7 @@ void moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) { struct pvo_entry *pvo; - struct lpte *pt; + uintptr_t pt; uint64_t vsid; int i, ptegidx; @@ -1303,7 +1054,7 @@ moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) if (pvo != NULL) { LOCK_TABLE(); - pt = moea64_pvo_to_pte(pvo); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); if (wired) { if ((pvo->pvo_vaddr & PVO_WIRED) == 0) @@ -1317,9 +1068,9 @@ moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) pvo->pvo_pte.lpte.pte_hi &= ~LPTE_WIRED; } - if (pt != NULL) { + if (pt != -1) { /* Update wiring flag in page table. */ - moea64_pte_change(pt, &pvo->pvo_pte.lpte, + MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); } else if (wired) { /* @@ -1330,7 +1081,8 @@ moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), pvo->pvo_vaddr & PVO_LARGE); - i = moea64_pte_insert(ptegidx, &pvo->pvo_pte.lpte); + i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); + if (i >= 0) { PVO_PTEGIDX_CLR(pvo); PVO_PTEGIDX_SET(pvo, i); @@ -1350,22 +1102,18 @@ moea64_change_wiring(mmu_t mmu, pmap_t pm, vm_offset_t va, boolean_t wired) */ static __inline -void moea64_set_scratchpage_pa(int which, vm_offset_t pa) { +void moea64_set_scratchpage_pa(mmu_t mmup, int which, vm_offset_t pa) { KASSERT(!hw_direct_map, ("Using OEA64 scratchpage with a direct map!")); mtx_assert(&moea64_scratchpage_mtx, MA_OWNED); - moea64_scratchpage_pte[which]->pte_hi &= ~LPTE_VALID; - TLBIE(moea64_scratchpage_vpn[which]); - - moea64_scratchpage_pte[which]->pte_lo &= + moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo &= ~(LPTE_WIMG | LPTE_RPGN); - moea64_scratchpage_pte[which]->pte_lo |= + moea64_scratchpage_pvo[which]->pvo_pte.lpte.pte_lo |= moea64_calc_wimg(pa, VM_MEMATTR_DEFAULT) | (uint64_t)pa; - EIEIO(); - - moea64_scratchpage_pte[which]->pte_hi |= LPTE_VALID; - PTESYNC(); isync(); + MOEA64_PTE_CHANGE(mmup, moea64_scratchpage_pte[which], + &moea64_scratchpage_pvo[which]->pvo_pte.lpte, + moea64_scratchpage_pvo[which]->pvo_vpn); } void @@ -1382,8 +1130,8 @@ moea64_copy_page(mmu_t mmu, vm_page_t msrc, vm_page_t mdst) } else { mtx_lock(&moea64_scratchpage_mtx); - moea64_set_scratchpage_pa(0,src); - moea64_set_scratchpage_pa(1,dst); + moea64_set_scratchpage_pa(mmu, 0, src); + moea64_set_scratchpage_pa(mmu, 1, dst); kcopy((void *)moea64_scratchpage_va[0], (void *)moea64_scratchpage_va[1], PAGE_SIZE); @@ -1406,7 +1154,7 @@ moea64_zero_page_area(mmu_t mmu, vm_page_t m, int off, int size) bzero((caddr_t)pa + off, size); } else { mtx_lock(&moea64_scratchpage_mtx); - moea64_set_scratchpage_pa(0,pa); + moea64_set_scratchpage_pa(mmu, 0, pa); bzero((caddr_t)moea64_scratchpage_va[0] + off, size); mtx_unlock(&moea64_scratchpage_mtx); } @@ -1427,7 +1175,7 @@ moea64_zero_page(mmu_t mmu, vm_page_t m) if (!hw_direct_map) { mtx_lock(&moea64_scratchpage_mtx); - moea64_set_scratchpage_pa(0,pa); + moea64_set_scratchpage_pa(mmu, 0, pa); va = moea64_scratchpage_va[0]; } else { va = pa; @@ -1459,7 +1207,7 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, vm_page_lock_queues(); PMAP_LOCK(pmap); - moea64_enter_locked(pmap, va, m, prot, wired); + moea64_enter_locked(mmu, pmap, va, m, prot, wired); vm_page_unlock_queues(); PMAP_UNLOCK(pmap); } @@ -1473,8 +1221,8 @@ moea64_enter(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, */ static void -moea64_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, - boolean_t wired) +moea64_enter_locked(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_page_t m, + vm_prot_t prot, boolean_t wired) { struct pvo_head *pvo_head; uma_zone_t zone; @@ -1528,20 +1276,20 @@ moea64_enter_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, if ((m->flags & PG_FICTITIOUS) != 0) pvo_flags |= PVO_FAKE; - error = moea64_pvo_enter(pmap, zone, pvo_head, va, VM_PAGE_TO_PHYS(m), - pte_lo, pvo_flags); + error = moea64_pvo_enter(mmu, pmap, zone, pvo_head, va, + VM_PAGE_TO_PHYS(m), pte_lo, pvo_flags); /* * Flush the page from the instruction cache if this page is * mapped executable and cacheable. */ - if ((pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { - moea64_syncicache(pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); - } + if ((pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) + moea64_syncicache(mmu, pmap, va, VM_PAGE_TO_PHYS(m), PAGE_SIZE); } static void -moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz) +moea64_syncicache(mmu_t mmu, pmap_t pmap, vm_offset_t va, vm_offset_t pa, + vm_size_t sz) { /* @@ -1568,7 +1316,7 @@ moea64_syncicache(pmap_t pmap, vm_offset_t va, vm_offset_t pa, vm_size_t sz) mtx_lock(&moea64_scratchpage_mtx); - moea64_set_scratchpage_pa(1,pa & ~ADDR_POFF); + moea64_set_scratchpage_pa(mmu, 1, pa & ~ADDR_POFF); __syncicache((void *)(moea64_scratchpage_va[1] + (va & ADDR_POFF)), sz); @@ -1600,7 +1348,7 @@ moea64_enter_object(mmu_t mmu, pmap_t pm, vm_offset_t start, vm_offset_t end, vm_page_lock_queues(); PMAP_LOCK(pm); while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { - moea64_enter_locked(pm, start + ptoa(diff), m, prot & + moea64_enter_locked(mmu, pm, start + ptoa(diff), m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); m = TAILQ_NEXT(m, listq); } @@ -1615,8 +1363,8 @@ moea64_enter_quick(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_page_t m, vm_page_lock_queues(); PMAP_LOCK(pm); - moea64_enter_locked(pm, va, m, prot & (VM_PROT_READ | VM_PROT_EXECUTE), - FALSE); + moea64_enter_locked(mmu, pm, va, m, + prot & (VM_PROT_READ | VM_PROT_EXECUTE), FALSE); vm_page_unlock_queues(); PMAP_UNLOCK(pm); } @@ -1669,6 +1417,8 @@ retry: return (m); } +static mmu_t installed_mmu; + static void * moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) { @@ -1709,7 +1459,7 @@ moea64_uma_page_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) va = VM_PAGE_TO_PHYS(m); - moea64_pvo_enter(kernel_pmap, moea64_upvo_zone, + moea64_pvo_enter(installed_mmu, kernel_pmap, moea64_upvo_zone, &moea64_pvo_kunmanaged, va, VM_PAGE_TO_PHYS(m), LPTE_M, PVO_WIRED | PVO_BOOTSTRAP); @@ -1736,6 +1486,7 @@ moea64_init(mmu_t mmu) UMA_ZONE_VM | UMA_ZONE_NOFREE); if (!hw_direct_map) { + installed_mmu = mmu; uma_zone_set_allocf(moea64_upvo_zone,moea64_uma_page_alloc); uma_zone_set_allocf(moea64_mpvo_zone,moea64_uma_page_alloc); } @@ -1749,7 +1500,7 @@ moea64_is_referenced(mmu_t mmu, vm_page_t m) KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("moea64_is_referenced: page %p is not managed", m)); - return (moea64_query_bit(m, PTE_REF)); + return (moea64_query_bit(mmu, m, PTE_REF)); } boolean_t @@ -1768,7 +1519,7 @@ moea64_is_modified(mmu_t mmu, vm_page_t m) if ((m->oflags & VPO_BUSY) == 0 && (m->flags & PG_WRITEABLE) == 0) return (FALSE); - return (moea64_query_bit(m, LPTE_CHG)); + return (moea64_query_bit(mmu, m, LPTE_CHG)); } boolean_t @@ -1790,7 +1541,7 @@ moea64_clear_reference(mmu_t mmu, vm_page_t m) KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("moea64_clear_reference: page %p is not managed", m)); - moea64_clear_bit(m, LPTE_REF); + moea64_clear_bit(mmu, m, LPTE_REF); } void @@ -1810,7 +1561,7 @@ moea64_clear_modify(mmu_t mmu, vm_page_t m) */ if ((m->flags & PG_WRITEABLE) == 0) return; - moea64_clear_bit(m, LPTE_CHG); + moea64_clear_bit(mmu, m, LPTE_CHG); } /* @@ -1820,7 +1571,7 @@ void moea64_remove_write(mmu_t mmu, vm_page_t m) { struct pvo_entry *pvo; - struct lpte *pt; + uintptr_t pt; pmap_t pmap; uint64_t lo; @@ -1838,21 +1589,21 @@ moea64_remove_write(mmu_t mmu, vm_page_t m) return; vm_page_lock_queues(); lo = moea64_attr_fetch(m); - SYNC(); + powerpc_sync(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); LOCK_TABLE(); if ((pvo->pvo_pte.lpte.pte_lo & LPTE_PP) != LPTE_BR) { - pt = moea64_pvo_to_pte(pvo); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); pvo->pvo_pte.lpte.pte_lo &= ~LPTE_PP; pvo->pvo_pte.lpte.pte_lo |= LPTE_BR; - if (pt != NULL) { - moea64_pte_synch(pt, &pvo->pvo_pte.lpte); + if (pt != -1) { + MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); lo |= pvo->pvo_pte.lpte.pte_lo; pvo->pvo_pte.lpte.pte_lo &= ~LPTE_CHG; - moea64_pte_change(pt, &pvo->pvo_pte.lpte, - pvo->pvo_vpn); + MOEA64_PTE_CHANGE(mmu, pt, + &pvo->pvo_pte.lpte, pvo->pvo_vpn); if (pvo->pvo_pmap == kernel_pmap) isync(); } @@ -1886,7 +1637,7 @@ moea64_ts_referenced(mmu_t mmu, vm_page_t m) KASSERT((m->flags & (PG_FICTITIOUS | PG_UNMANAGED)) == 0, ("moea64_ts_referenced: page %p is not managed", m)); - return (moea64_clear_bit(m, LPTE_REF)); + return (moea64_clear_bit(mmu, m, LPTE_REF)); } /* @@ -1897,7 +1648,7 @@ moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) { struct pvo_entry *pvo; struct pvo_head *pvo_head; - struct lpte *pt; + uintptr_t pt; pmap_t pmap; uint64_t lo; @@ -1913,11 +1664,11 @@ moea64_page_set_memattr(mmu_t mmu, vm_page_t m, vm_memattr_t ma) pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); LOCK_TABLE(); - pt = moea64_pvo_to_pte(pvo); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); pvo->pvo_pte.lpte.pte_lo &= ~LPTE_WIMG; pvo->pvo_pte.lpte.pte_lo |= lo; - if (pt != NULL) { - moea64_pte_change(pt, &pvo->pvo_pte.lpte, + if (pt != -1) { + MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); if (pvo->pvo_pmap == kernel_pmap) isync(); @@ -1941,7 +1692,7 @@ moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) pte_lo = moea64_calc_wimg(pa, ma); PMAP_LOCK(kernel_pmap); - error = moea64_pvo_enter(kernel_pmap, moea64_upvo_zone, + error = moea64_pvo_enter(mmu, kernel_pmap, moea64_upvo_zone, &moea64_pvo_kunmanaged, va, pa, pte_lo, PVO_WIRED | VM_PROT_EXECUTE); @@ -1952,9 +1703,8 @@ moea64_kenter_attr(mmu_t mmu, vm_offset_t va, vm_offset_t pa, vm_memattr_t ma) /* * Flush the memory from the instruction cache. */ - if ((pte_lo & (LPTE_I | LPTE_G)) == 0) { + if ((pte_lo & (LPTE_I | LPTE_G)) == 0) __syncicache((void *)va, PAGE_SIZE); - } PMAP_UNLOCK(kernel_pmap); } @@ -2183,7 +1933,7 @@ moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) { struct pvo_entry *pvo; - struct lpte *pt; + uintptr_t pt; CTR4(KTR_PMAP, "moea64_protect: pm=%p sva=%#x eva=%#x prot=%#x", pm, sva, eva, prot); @@ -2209,7 +1959,7 @@ moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, * copy. */ LOCK_TABLE(); - pt = moea64_pvo_to_pte(pvo); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); /* * Change the protection of the page. @@ -2223,11 +1973,12 @@ moea64_protect(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva, /* * If the PVO is in the page table, update that pte as well. */ - if (pt != NULL) { - moea64_pte_change(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); + if (pt != -1) { + MOEA64_PTE_CHANGE(mmu, pt, &pvo->pvo_pte.lpte, + pvo->pvo_vpn); if ((pvo->pvo_pte.lpte.pte_lo & (LPTE_I | LPTE_G | LPTE_NOEXEC)) == 0) { - moea64_syncicache(pm, sva, + moea64_syncicache(mmu, pm, sva, pvo->pvo_pte.lpte.pte_lo & LPTE_RPGN, PAGE_SIZE); } @@ -2314,7 +2065,7 @@ moea64_remove(mmu_t mmu, pmap_t pm, vm_offset_t sva, vm_offset_t eva) for (; sva < eva; sva += PAGE_SIZE) { pvo = moea64_pvo_find_va(pm, sva); if (pvo != NULL) - moea64_pvo_remove(pvo); + moea64_pvo_remove(mmu, pvo); } vm_page_unlock_queues(); PMAP_UNLOCK(pm); @@ -2339,7 +2090,7 @@ moea64_remove_all(mmu_t mmu, vm_page_t m) MOEA_PVO_CHECK(pvo); /* sanity check */ pmap = pvo->pvo_pmap; PMAP_LOCK(pmap); - moea64_pvo_remove(pvo); + moea64_pvo_remove(mmu, pvo); PMAP_UNLOCK(pmap); } if ((m->flags & PG_WRITEABLE) && moea64_is_modified(mmu, m)) { @@ -2355,7 +2106,7 @@ moea64_remove_all(mmu_t mmu, vm_page_t m) * Can only be called from moea64_bootstrap before avail start and end are * calculated. */ -static vm_offset_t +vm_offset_t moea64_bootstrap_alloc(vm_size_t size, u_int align) { vm_offset_t s, e; @@ -2396,53 +2147,10 @@ moea64_bootstrap_alloc(vm_size_t size, u_int align) panic("moea64_bootstrap_alloc: could not allocate memory"); } -static void -tlbia(void) -{ - vm_offset_t i; - #ifndef __powerpc64__ - register_t msr, scratch; - #endif - - TLBSYNC(); - - for (i = 0; i < 0xFF000; i += 0x00001000) { - #ifdef __powerpc64__ - __asm __volatile("tlbiel %0" :: "r"(i)); - #else - __asm __volatile("\ - mfmsr %0; \ - mr %1, %0; \ - insrdi %1,%3,1,0; \ - mtmsrd %1; \ - isync; \ - \ - tlbiel %2; \ - \ - mtmsrd %0; \ - isync;" - : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); - #endif - } - - EIEIO(); - TLBSYNC(); -} - -#ifdef __powerpc64__ -static void -slbia(void) -{ - register_t seg0; - - __asm __volatile ("slbia"); - __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : "r"(0)); -} -#endif - static int -moea64_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, - vm_offset_t va, vm_offset_t pa, uint64_t pte_lo, int flags) +moea64_pvo_enter(mmu_t mmu, pmap_t pm, uma_zone_t zone, + struct pvo_head *pvo_head, vm_offset_t va, vm_offset_t pa, + uint64_t pte_lo, int flags) { struct pvo_entry *pvo; uint64_t vsid; @@ -2488,7 +2196,7 @@ moea64_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, (pte_lo & LPTE_PP)) { if (!(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID)) { /* Re-insert if spilled */ - i = moea64_pte_insert(ptegidx, + i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); if (i >= 0) PVO_PTEGIDX_SET(pvo, i); @@ -2497,7 +2205,7 @@ moea64_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, UNLOCK_TABLE(); return (0); } - moea64_pvo_remove(pvo); + moea64_pvo_remove(mmu, pvo); break; } } @@ -2572,7 +2280,7 @@ moea64_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, /* * We hope this succeeds but it isn't required. */ - i = moea64_pte_insert(ptegidx, &pvo->pvo_pte.lpte); + i = MOEA64_PTE_INSERT(mmu, ptegidx, &pvo->pvo_pte.lpte); if (i >= 0) { PVO_PTEGIDX_SET(pvo, i); } else { @@ -2598,18 +2306,18 @@ moea64_pvo_enter(pmap_t pm, uma_zone_t zone, struct pvo_head *pvo_head, } static void -moea64_pvo_remove(struct pvo_entry *pvo) +moea64_pvo_remove(mmu_t mmu, struct pvo_entry *pvo) { - struct lpte *pt; + uintptr_t pt; /* * If there is an active pte entry, we need to deactivate it (and * save the ref & cfg bits). */ LOCK_TABLE(); - pt = moea64_pvo_to_pte(pvo); - if (pt != NULL) { - moea64_pte_unset(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); + if (pt != -1) { + MOEA64_PTE_UNSET(mmu, pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); PVO_PTEGIDX_CLR(pvo); } else { moea64_pte_overflow--; @@ -2698,192 +2406,11 @@ moea64_pvo_find_va(pmap_t pm, vm_offset_t va) return (pvo); } -static struct lpte * -moea64_pvo_to_pte(const struct pvo_entry *pvo) -{ - struct lpte *pt; - int pteidx, ptegidx; - uint64_t vsid; - - ASSERT_TABLE_LOCK(); - - /* If the PTEG index is not set, then there is no page table entry */ - if (!PVO_PTEGIDX_ISSET(pvo)) - return (NULL); - - /* - * Calculate the ptegidx - */ - vsid = PVO_VSID(pvo); - ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), - pvo->pvo_vaddr & PVO_LARGE); - - /* - * We can find the actual pte entry without searching by grabbing - * the PTEG index from 3 unused bits in pvo_vaddr and by - * noticing the HID bit. - */ - if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID) - ptegidx ^= moea64_pteg_mask; - - pteidx = (ptegidx << 3) | PVO_PTEGIDX_GET(pvo); - - if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && - !PVO_PTEGIDX_ISSET(pvo)) { - panic("moea64_pvo_to_pte: pvo %p has valid pte in pvo but no " - "valid pte index", pvo); - } - - if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0 && - PVO_PTEGIDX_ISSET(pvo)) { - panic("moea64_pvo_to_pte: pvo %p has valid pte index in pvo " - "pvo but no valid pte", pvo); - } - - pt = &moea64_pteg_table[pteidx >> 3].pt[pteidx & 7]; - if ((pt->pte_hi ^ (pvo->pvo_pte.lpte.pte_hi & ~LPTE_VALID)) == - LPTE_VALID) { - if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0) { - panic("moea64_pvo_to_pte: pvo %p has valid pte in " - "moea64_pteg_table %p but invalid in pvo", pvo, pt); - } - - if (((pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo) & - ~(LPTE_M|LPTE_CHG|LPTE_REF)) != 0) { - panic("moea64_pvo_to_pte: pvo %p pte does not match " - "pte %p in moea64_pteg_table difference is %#x", - pvo, pt, - (uint32_t)(pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo)); - } - - return (pt); - } - - if (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) { - panic("moea64_pvo_to_pte: pvo %p has invalid pte %p in " - "moea64_pteg_table but valid in pvo", pvo, pt); - } - - return (NULL); -} - -static __inline int -moea64_pte_spillable_ident(u_int ptegidx) -{ - struct lpte *pt; - int i, j, k; - - /* Start at a random slot */ - i = mftb() % 8; - k = -1; - for (j = 0; j < 8; j++) { - pt = &moea64_pteg_table[ptegidx].pt[(i + j) % 8]; - if (pt->pte_hi & (LPTE_LOCKED | LPTE_WIRED)) - continue; - - /* This is a candidate, so remember it */ - k = (i + j) % 8; - - /* Try to get a page that has not been used lately */ - if (!(pt->pte_lo & LPTE_REF)) - return (k); - } - - return (k); -} - -static int -moea64_pte_insert(u_int ptegidx, struct lpte *pvo_pt) -{ - struct lpte *pt; - struct pvo_entry *pvo; - u_int pteg_bktidx; - int i; - - ASSERT_TABLE_LOCK(); - - /* - * First try primary hash. - */ - pteg_bktidx = ptegidx; - for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) { - if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) { - pvo_pt->pte_hi &= ~LPTE_HID; - moea64_pte_set(pt, pvo_pt); - return (i); - } - } - - /* - * Now try secondary hash. - */ - pteg_bktidx ^= moea64_pteg_mask; - for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) { - if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) { - pvo_pt->pte_hi |= LPTE_HID; - moea64_pte_set(pt, pvo_pt); - return (i); - } - } - - /* - * Out of luck. Find a PTE to sacrifice. - */ - pteg_bktidx = ptegidx; - i = moea64_pte_spillable_ident(pteg_bktidx); - if (i < 0) { - pteg_bktidx ^= moea64_pteg_mask; - i = moea64_pte_spillable_ident(pteg_bktidx); - } - - if (i < 0) { - /* No freeable slots in either PTEG? We're hosed. */ - panic("moea64_pte_insert: overflow"); - return (-1); - } - - if (pteg_bktidx == ptegidx) - pvo_pt->pte_hi &= ~LPTE_HID; - else - pvo_pt->pte_hi |= LPTE_HID; - - /* - * Synchronize the sacrifice PTE with its PVO, then mark both - * invalid. The PVO will be reused when/if the VM system comes - * here after a fault. - */ - pt = &moea64_pteg_table[pteg_bktidx].pt[i]; - - if (pt->pte_hi & LPTE_HID) - pteg_bktidx ^= moea64_pteg_mask; /* PTEs indexed by primary */ - - LIST_FOREACH(pvo, &moea64_pvo_table[pteg_bktidx], pvo_olink) { - if (pvo->pvo_pte.lpte.pte_hi == pt->pte_hi) { - KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID, - ("Invalid PVO for valid PTE!")); - moea64_pte_unset(pt, &pvo->pvo_pte.lpte, pvo->pvo_vpn); - PVO_PTEGIDX_CLR(pvo); - moea64_pte_overflow++; - break; - } - } - - KASSERT(pvo->pvo_pte.lpte.pte_hi == pt->pte_hi, - ("Unable to find PVO for spilled PTE")); - - /* - * Set the new PTE. - */ - moea64_pte_set(pt, pvo_pt); - - return (i); -} - static boolean_t -moea64_query_bit(vm_page_t m, u_int64_t ptebit) +moea64_query_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { struct pvo_entry *pvo; - struct lpte *pt; + uintptr_t pt; if (moea64_attr_fetch(m) & ptebit) return (TRUE); @@ -2910,7 +2437,7 @@ moea64_query_bit(vm_page_t m, u_int64_t ptebit) * themselves. Sync so that any pending REF/CHG bits are flushed to * the PTEs. */ - SYNC(); + powerpc_sync(); LIST_FOREACH(pvo, vm_page_to_pvoh(m), pvo_vlink) { MOEA_PVO_CHECK(pvo); /* sanity check */ @@ -2920,9 +2447,9 @@ moea64_query_bit(vm_page_t m, u_int64_t ptebit) * ptebit is set, cache it and return success. */ LOCK_TABLE(); - pt = moea64_pvo_to_pte(pvo); - if (pt != NULL) { - moea64_pte_synch(pt, &pvo->pvo_pte.lpte); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); + if (pt != -1) { + MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); if (pvo->pvo_pte.lpte.pte_lo & ptebit) { UNLOCK_TABLE(); @@ -2940,11 +2467,11 @@ moea64_query_bit(vm_page_t m, u_int64_t ptebit) } static u_int -moea64_clear_bit(vm_page_t m, u_int64_t ptebit) +moea64_clear_bit(mmu_t mmu, vm_page_t m, u_int64_t ptebit) { u_int count; struct pvo_entry *pvo; - struct lpte *pt; + uintptr_t pt; vm_page_lock_queues(); @@ -2960,7 +2487,7 @@ moea64_clear_bit(vm_page_t m, u_int64_t ptebit) * table, we don't have to worry about further accesses setting the * REF/CHG bits. */ - SYNC(); + powerpc_sync(); /* * For each pvo entry, clear the pvo's ptebit. If this pvo has a @@ -2971,12 +2498,13 @@ moea64_clear_bit(vm_page_t m, u_int64_t ptebit) MOEA_PVO_CHECK(pvo); /* sanity check */ LOCK_TABLE(); - pt = moea64_pvo_to_pte(pvo); - if (pt != NULL) { - moea64_pte_synch(pt, &pvo->pvo_pte.lpte); + pt = MOEA64_PVO_TO_PTE(mmu, pvo); + if (pt != -1) { + MOEA64_PTE_SYNCH(mmu, pt, &pvo->pvo_pte.lpte); if (pvo->pvo_pte.lpte.pte_lo & ptebit) { count++; - moea64_pte_clear(pt, pvo->pvo_vpn, ptebit); + MOEA64_PTE_CLEAR(mmu, pt, &pvo->pvo_pte.lpte, + pvo->pvo_vpn, ptebit); } } pvo->pvo_pte.lpte.pte_lo &= ~ptebit; @@ -3058,7 +2586,7 @@ moea64_unmapdev(mmu_t mmu, vm_offset_t va, vm_size_t size) kmem_free(kernel_map, base, size); } -static void +void moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) { struct pvo_entry *pvo; @@ -3074,7 +2602,7 @@ moea64_sync_icache(mmu_t mmu, pmap_t pm, vm_offset_t va, vm_size_t sz) if (pvo != NULL) { pa = (pvo->pvo_pte.pte.pte_lo & LPTE_RPGN) | (va & ADDR_POFF); - moea64_syncicache(pm, va, pa, len); + moea64_syncicache(mmu, pm, va, pa, len); } va += len; sz -= len; diff --git a/sys/powerpc/aim/mmu_oea64.h b/sys/powerpc/aim/mmu_oea64.h new file mode 100644 index 0000000..101181d --- /dev/null +++ b/sys/powerpc/aim/mmu_oea64.h @@ -0,0 +1,77 @@ +/*- + * Copyright (C) 2010 Nathan Whitehorn + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _POWERPC_AIM_MMU_OEA64_H +#define _POWERPC_AIM_MMU_OEA64_H + +#include <machine/mmuvar.h> + +extern mmu_def_t oea64_mmu; + +/* + * Helper routines + */ + +/* Allocate physical memory for use in moea64_bootstrap. */ +vm_offset_t moea64_bootstrap_alloc(vm_size_t, u_int); + +/* + * Bootstrap subroutines + * + * An MMU_BOOTSTRAP() implementation looks like this: + * moea64_early_bootstrap(); + * Allocate Page Table + * moea64_mid_bootstrap(); + * Add mappings for MMU resources + * moea64_late_bootstrap(); + */ + +void moea64_early_bootstrap(mmu_t mmup, vm_offset_t kernelstart, + vm_offset_t kernelend); +void moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, + vm_offset_t kernelend); +void moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, + vm_offset_t kernelend); + +/* + * Statistics + */ + +extern u_int moea64_pte_valid; +extern u_int moea64_pte_overflow; + +/* + * State variables + */ + +extern struct pvo_head *moea64_pvo_table; +extern int moea64_large_page_shift; +extern u_int moea64_pteg_count; +extern u_int moea64_pteg_mask; + +#endif /* _POWERPC_AIM_MMU_OEA64_H */ + diff --git a/sys/powerpc/aim/moea64_if.m b/sys/powerpc/aim/moea64_if.m new file mode 100644 index 0000000..f041838 --- /dev/null +++ b/sys/powerpc/aim/moea64_if.m @@ -0,0 +1,115 @@ +#- +# Copyright (c) 2010 Nathan Whitehorn +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + + +#include <sys/param.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/systm.h> + +#include <vm/vm.h> +#include <vm/vm_page.h> + +#include <machine/mmuvar.h> + +/** + * MOEA64 kobj methods for 64-bit Book-S page table + * manipulation routines used, for example, by hypervisors. + */ + +INTERFACE moea64; + + +/** + * Copy ref/changed bits from PTE referenced by _pt_cookie to _pvo_pt. + */ +METHOD void pte_synch { + mmu_t _mmu; + uintptr_t _pt_cookie; + struct lpte *_pvo_pt; +}; + +/** + * Clear bits ptebit (a mask) from the low word of the PTE referenced by + * _pt_cookie. Note that _pvo_pt is for reference use only -- the bit should + * NOT be cleared there. + */ +METHOD void pte_clear { + mmu_t _mmu; + uintptr_t _pt_cookie; + struct lpte *_pvo_pt; + uint64_t _vpn; + uint64_t _ptebit; +}; + +/** + * Invalidate the PTE referenced by _pt_cookie, synchronizing its validity + * and ref/changed bits after completion. + */ +METHOD void pte_unset { + mmu_t _mmu; + uintptr_t _pt_cookie; + struct lpte *_pvo_pt; + uint64_t _vpn; +}; + +/** + * Update the PTE referenced by _pt_cookie with the values in _pvo_pt, + * making sure that the values of ref/changed bits are preserved and + * synchronized back to _pvo_pt. + */ +METHOD void pte_change { + mmu_t _mmu; + uintptr_t _pt_cookie; + struct lpte *_pvo_pt; + uint64_t _vpn; +}; + + +/** + * Insert the PTE _pvo_pt into the PTEG group _ptegidx, returning the index + * of the PTE in its group at completion, or -1 if no slots were free. Must + * not replace PTEs marked LPTE_WIRED or LPTE_LOCKED, and must set LPTE_HID + * and LPTE_VALID appropriately in _pvo_pt. + */ +METHOD int pte_insert { + mmu_t _mmu; + u_int _ptegidx; + struct lpte *_pvo_pt; +}; + +/** + * Return the page table reference cookie corresponding to _pvo, or -1 if + * the _pvo is not currently in the page table. + */ +METHOD uintptr_t pvo_to_pte { + mmu_t _mmu; + const struct pvo_entry *_pvo; +}; + + diff --git a/sys/powerpc/aim/moea64_native.c b/sys/powerpc/aim/moea64_native.c new file mode 100644 index 0000000..a386b93 --- /dev/null +++ b/sys/powerpc/aim/moea64_native.c @@ -0,0 +1,637 @@ +/*- + * Copyright (c) 2001 The NetBSD Foundation, Inc. + * All rights reserved. + * + * This code is derived from software contributed to The NetBSD Foundation + * by Matt Thomas <matt@3am-software.com> of Allegro Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the NetBSD + * Foundation, Inc. and its contributors. + * 4. Neither the name of The NetBSD Foundation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS + * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS + * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ +/*- + * Copyright (C) 1995, 1996 Wolfgang Solfrank. + * Copyright (C) 1995, 1996 TooLs GmbH. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by TooLs GmbH. + * 4. The name of TooLs GmbH may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY TOOLS GMBH ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $NetBSD: pmap.c,v 1.28 2000/03/26 20:42:36 kleink Exp $ + */ +/*- + * Copyright (C) 2001 Benno Rice. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY Benno Rice ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL TOOLS GMBH BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +/* + * Native 64-bit page table operations for running without a hypervisor. + */ + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/ktr.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/sysctl.h> +#include <sys/systm.h> + +#include <sys/kdb.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_kern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#include <vm/vm_object.h> +#include <vm/vm_extern.h> +#include <vm/vm_pageout.h> +#include <vm/vm_pager.h> + +#include <machine/md_var.h> +#include <machine/mmuvar.h> + +#include "mmu_oea64.h" +#include "mmu_if.h" +#include "moea64_if.h" + +#define PTESYNC() __asm __volatile("ptesync"); +#define TLBSYNC() __asm __volatile("tlbsync; ptesync"); +#define SYNC() __asm __volatile("sync"); +#define EIEIO() __asm __volatile("eieio"); + +#define VSID_HASH_MASK 0x0000007fffffffffULL + +/* + * The tlbie instruction must be executed in 64-bit mode + * so we have to twiddle MSR[SF] around every invocation. + * Just to add to the fun, exceptions must be off as well + * so that we can't trap in 64-bit mode. What a pain. + */ +struct mtx tlbie_mutex; + +static __inline void +TLBIE(uint64_t vpn) { +#ifndef __powerpc64__ + register_t vpn_hi, vpn_lo; + register_t msr; + register_t scratch; +#endif + + vpn <<= ADDR_PIDX_SHFT; + vpn &= ~(0xffffULL << 48); + + mtx_lock_spin(&tlbie_mutex); +#ifdef __powerpc64__ + __asm __volatile("\ + ptesync; \ + tlbie %0; \ + eieio; \ + tlbsync; \ + ptesync;" + :: "r"(vpn) : "memory"); +#else + vpn_hi = (uint32_t)(vpn >> 32); + vpn_lo = (uint32_t)vpn; + + __asm __volatile("\ + mfmsr %0; \ + mr %1, %0; \ + insrdi %1,%5,1,0; \ + mtmsrd %1; isync; \ + ptesync; \ + \ + sld %1,%2,%4; \ + or %1,%1,%3; \ + tlbie %1; \ + \ + mtmsrd %0; isync; \ + eieio; \ + tlbsync; \ + ptesync;" + : "=r"(msr), "=r"(scratch) : "r"(vpn_hi), "r"(vpn_lo), "r"(32), "r"(1) + : "memory"); +#endif + mtx_unlock_spin(&tlbie_mutex); +} + +#define DISABLE_TRANS(msr) msr = mfmsr(); mtmsr(msr & ~PSL_DR); isync() +#define ENABLE_TRANS(msr) mtmsr(msr); isync() + +/* + * PTEG data. + */ +static struct lpteg *moea64_pteg_table; + +/* + * PTE calls. + */ +static int moea64_pte_insert_native(mmu_t, u_int, struct lpte *); +static uintptr_t moea64_pvo_to_pte_native(mmu_t, const struct pvo_entry *); +static void moea64_pte_synch_native(mmu_t, uintptr_t pt, + struct lpte *pvo_pt); +static void moea64_pte_clear_native(mmu_t, uintptr_t pt, + struct lpte *pvo_pt, uint64_t vpn, uint64_t ptebit); +static void moea64_pte_change_native(mmu_t, uintptr_t pt, + struct lpte *pvo_pt, uint64_t vpn); +static void moea64_pte_unset_native(mmu_t mmu, uintptr_t pt, + struct lpte *pvo_pt, uint64_t vpn); + +/* + * Utility routines. + */ +static void moea64_bootstrap_native(mmu_t mmup, + vm_offset_t kernelstart, vm_offset_t kernelend); +static void moea64_cpu_bootstrap_native(mmu_t, int ap); +static void tlbia(void); + +static mmu_method_t moea64_native_methods[] = { + /* Internal interfaces */ + MMUMETHOD(mmu_bootstrap, moea64_bootstrap_native), + MMUMETHOD(mmu_cpu_bootstrap, moea64_cpu_bootstrap_native), + + MMUMETHOD(moea64_pte_synch, moea64_pte_synch_native), + MMUMETHOD(moea64_pte_clear, moea64_pte_clear_native), + MMUMETHOD(moea64_pte_unset, moea64_pte_unset_native), + MMUMETHOD(moea64_pte_change, moea64_pte_change_native), + MMUMETHOD(moea64_pte_insert, moea64_pte_insert_native), + MMUMETHOD(moea64_pvo_to_pte, moea64_pvo_to_pte_native), + + { 0, 0 } +}; + +MMU_DEF_INHERIT(oea64_mmu_native, MMU_TYPE_G5, moea64_native_methods, + 0, oea64_mmu); + +static __inline u_int +va_to_pteg(uint64_t vsid, vm_offset_t addr, int large) +{ + uint64_t hash; + int shift; + + shift = large ? moea64_large_page_shift : ADDR_PIDX_SHFT; + hash = (vsid & VSID_HASH_MASK) ^ (((uint64_t)addr & ADDR_PIDX) >> + shift); + return (hash & moea64_pteg_mask); +} + +static void +moea64_pte_synch_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt) +{ + struct lpte *pt = (struct lpte *)pt_cookie; + + pvo_pt->pte_lo |= pt->pte_lo & (LPTE_REF | LPTE_CHG); +} + +static void +moea64_pte_clear_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt, + uint64_t vpn, uint64_t ptebit) +{ + struct lpte *pt = (struct lpte *)pt_cookie; + + /* + * As shown in Section 7.6.3.2.3 + */ + pt->pte_lo &= ~ptebit; + TLBIE(vpn); +} + +static void +moea64_pte_set_native(struct lpte *pt, struct lpte *pvo_pt) +{ + + pvo_pt->pte_hi |= LPTE_VALID; + + /* + * Update the PTE as defined in section 7.6.3.1. + * Note that the REF/CHG bits are from pvo_pt and thus should have + * been saved so this routine can restore them (if desired). + */ + pt->pte_lo = pvo_pt->pte_lo; + EIEIO(); + pt->pte_hi = pvo_pt->pte_hi; + PTESYNC(); + moea64_pte_valid++; +} + +static void +moea64_pte_unset_native(mmu_t mmu, uintptr_t pt_cookie, struct lpte *pvo_pt, + uint64_t vpn) +{ + struct lpte *pt = (struct lpte *)pt_cookie; + + pvo_pt->pte_hi &= ~LPTE_VALID; + + /* + * Force the reg & chg bits back into the PTEs. + */ + SYNC(); + + /* + * Invalidate the pte. + */ + pt->pte_hi &= ~LPTE_VALID; + TLBIE(vpn); + + /* + * Save the reg & chg bits. + */ + moea64_pte_synch_native(mmu, pt_cookie, pvo_pt); + moea64_pte_valid--; +} + +static void +moea64_pte_change_native(mmu_t mmu, uintptr_t pt, struct lpte *pvo_pt, + uint64_t vpn) +{ + + /* + * Invalidate the PTE + */ + moea64_pte_unset_native(mmu, pt, pvo_pt, vpn); + moea64_pte_set_native((struct lpte *)pt, pvo_pt); +} + +static void +moea64_cpu_bootstrap_native(mmu_t mmup, int ap) +{ + int i = 0; + #ifdef __powerpc64__ + struct slb *slb = PCPU_GET(slb); + register_t seg0; + #endif + + /* + * Initialize segment registers and MMU + */ + + mtmsr(mfmsr() & ~PSL_DR & ~PSL_IR); isync(); + + /* + * Install kernel SLB entries + */ + + #ifdef __powerpc64__ + __asm __volatile ("slbia"); + __asm __volatile ("slbmfee %0,%1; slbie %0;" : "=r"(seg0) : + "r"(0)); + + for (i = 0; i < 64; i++) { + if (!(slb[i].slbe & SLBE_VALID)) + continue; + + __asm __volatile ("slbmte %0, %1" :: + "r"(slb[i].slbv), "r"(slb[i].slbe)); + } + #else + for (i = 0; i < 16; i++) + mtsrin(i << ADDR_SR_SHFT, kernel_pmap->pm_sr[i]); + #endif + + /* + * Install page table + */ + + __asm __volatile ("ptesync; mtsdr1 %0; isync" + :: "r"((uintptr_t)moea64_pteg_table + | (uintptr_t)(flsl(moea64_pteg_mask >> 11)))); + tlbia(); +} + +static void +moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart, + vm_offset_t kernelend) +{ + vm_size_t size; + vm_offset_t off; + vm_paddr_t pa; + register_t msr; + + moea64_early_bootstrap(mmup, kernelstart, kernelend); + + /* + * Allocate PTEG table. + */ + + size = moea64_pteg_count * sizeof(struct lpteg); + CTR2(KTR_PMAP, "moea64_bootstrap: %d PTEGs, %d bytes", + moea64_pteg_count, size); + + /* + * We now need to allocate memory. This memory, to be allocated, + * has to reside in a page table. The page table we are about to + * allocate. We don't have BAT. So drop to data real mode for a minute + * as a measure of last resort. We do this a couple times. + */ + + moea64_pteg_table = (struct lpteg *)moea64_bootstrap_alloc(size, size); + DISABLE_TRANS(msr); + bzero((void *)moea64_pteg_table, moea64_pteg_count * sizeof(struct lpteg)); + ENABLE_TRANS(msr); + + CTR1(KTR_PMAP, "moea64_bootstrap: PTEG table at %p", moea64_pteg_table); + + /* + * Initialize the TLBIE lock. TLBIE can only be executed by one CPU. + */ + mtx_init(&tlbie_mutex, "tlbie mutex", NULL, MTX_SPIN); + + moea64_mid_bootstrap(mmup, kernelstart, kernelend); + + /* + * Add a mapping for the page table itself if there is no direct map. + */ + if (!hw_direct_map) { + size = moea64_pteg_count * sizeof(struct lpteg); + off = (vm_offset_t)(moea64_pteg_table); + DISABLE_TRANS(msr); + for (pa = off; pa < off + size; pa += PAGE_SIZE) + pmap_kenter(pa, pa); + ENABLE_TRANS(msr); + } + + /* Bring up virtual memory */ + moea64_late_bootstrap(mmup, kernelstart, kernelend); +} + +static void +tlbia(void) +{ + vm_offset_t i; + #ifndef __powerpc64__ + register_t msr, scratch; + #endif + + TLBSYNC(); + + for (i = 0; i < 0xFF000; i += 0x00001000) { + #ifdef __powerpc64__ + __asm __volatile("tlbiel %0" :: "r"(i)); + #else + __asm __volatile("\ + mfmsr %0; \ + mr %1, %0; \ + insrdi %1,%3,1,0; \ + mtmsrd %1; \ + isync; \ + \ + tlbiel %2; \ + \ + mtmsrd %0; \ + isync;" + : "=r"(msr), "=r"(scratch) : "r"(i), "r"(1)); + #endif + } + + EIEIO(); + TLBSYNC(); +} + +static uintptr_t +moea64_pvo_to_pte_native(mmu_t mmu, const struct pvo_entry *pvo) +{ + struct lpte *pt; + int pteidx, ptegidx; + uint64_t vsid; + + /* If the PTEG index is not set, then there is no page table entry */ + if (!PVO_PTEGIDX_ISSET(pvo)) + return (-1); + + /* + * Calculate the ptegidx + */ + vsid = PVO_VSID(pvo); + ptegidx = va_to_pteg(vsid, PVO_VADDR(pvo), + pvo->pvo_vaddr & PVO_LARGE); + + /* + * We can find the actual pte entry without searching by grabbing + * the PTEG index from 3 unused bits in pvo_vaddr and by + * noticing the HID bit. + */ + if (pvo->pvo_pte.lpte.pte_hi & LPTE_HID) + ptegidx ^= moea64_pteg_mask; + + pteidx = (ptegidx << 3) | PVO_PTEGIDX_GET(pvo); + + if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) && + !PVO_PTEGIDX_ISSET(pvo)) { + panic("moea64_pvo_to_pte: pvo %p has valid pte in pvo but no " + "valid pte index", pvo); + } + + if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0 && + PVO_PTEGIDX_ISSET(pvo)) { + panic("moea64_pvo_to_pte: pvo %p has valid pte index in pvo " + "pvo but no valid pte", pvo); + } + + pt = &moea64_pteg_table[pteidx >> 3].pt[pteidx & 7]; + if ((pt->pte_hi ^ (pvo->pvo_pte.lpte.pte_hi & ~LPTE_VALID)) == + LPTE_VALID) { + if ((pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) == 0) { + panic("moea64_pvo_to_pte: pvo %p has valid pte in " + "moea64_pteg_table %p but invalid in pvo", pvo, pt); + } + + if (((pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo) & + ~(LPTE_M|LPTE_CHG|LPTE_REF)) != 0) { + panic("moea64_pvo_to_pte: pvo %p pte does not match " + "pte %p in moea64_pteg_table difference is %#x", + pvo, pt, + (uint32_t)(pt->pte_lo ^ pvo->pvo_pte.lpte.pte_lo)); + } + + return ((uintptr_t)pt); + } + + if (pvo->pvo_pte.lpte.pte_hi & LPTE_VALID) { + panic("moea64_pvo_to_pte: pvo %p has invalid pte %p in " + "moea64_pteg_table but valid in pvo", pvo, pt); + } + + return (-1); +} + +static __inline int +moea64_pte_spillable_ident(u_int ptegidx) +{ + struct lpte *pt; + int i, j, k; + + /* Start at a random slot */ + i = mftb() % 8; + k = -1; + for (j = 0; j < 8; j++) { + pt = &moea64_pteg_table[ptegidx].pt[(i + j) % 8]; + if (pt->pte_hi & (LPTE_LOCKED | LPTE_WIRED)) + continue; + + /* This is a candidate, so remember it */ + k = (i + j) % 8; + + /* Try to get a page that has not been used lately */ + if (!(pt->pte_lo & LPTE_REF)) + return (k); + } + + return (k); +} + +static int +moea64_pte_insert_native(mmu_t mmu, u_int ptegidx, struct lpte *pvo_pt) +{ + struct lpte *pt; + struct pvo_entry *pvo; + u_int pteg_bktidx; + int i; + + /* + * First try primary hash. + */ + pteg_bktidx = ptegidx; + for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) { + if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) { + pvo_pt->pte_hi &= ~LPTE_HID; + moea64_pte_set_native(pt, pvo_pt); + return (i); + } + } + + /* + * Now try secondary hash. + */ + pteg_bktidx ^= moea64_pteg_mask; + for (pt = moea64_pteg_table[pteg_bktidx].pt, i = 0; i < 8; i++, pt++) { + if ((pt->pte_hi & (LPTE_VALID | LPTE_LOCKED)) == 0) { + pvo_pt->pte_hi |= LPTE_HID; + moea64_pte_set_native(pt, pvo_pt); + return (i); + } + } + + /* + * Out of luck. Find a PTE to sacrifice. + */ + pteg_bktidx = ptegidx; + i = moea64_pte_spillable_ident(pteg_bktidx); + if (i < 0) { + pteg_bktidx ^= moea64_pteg_mask; + i = moea64_pte_spillable_ident(pteg_bktidx); + } + + if (i < 0) { + /* No freeable slots in either PTEG? We're hosed. */ + panic("moea64_pte_insert: overflow"); + return (-1); + } + + if (pteg_bktidx == ptegidx) + pvo_pt->pte_hi &= ~LPTE_HID; + else + pvo_pt->pte_hi |= LPTE_HID; + + /* + * Synchronize the sacrifice PTE with its PVO, then mark both + * invalid. The PVO will be reused when/if the VM system comes + * here after a fault. + */ + pt = &moea64_pteg_table[pteg_bktidx].pt[i]; + + if (pt->pte_hi & LPTE_HID) + pteg_bktidx ^= moea64_pteg_mask; /* PTEs indexed by primary */ + + LIST_FOREACH(pvo, &moea64_pvo_table[pteg_bktidx], pvo_olink) { + if (pvo->pvo_pte.lpte.pte_hi == pt->pte_hi) { + KASSERT(pvo->pvo_pte.lpte.pte_hi & LPTE_VALID, + ("Invalid PVO for valid PTE!")); + moea64_pte_unset_native(mmu, (uintptr_t)pt, + &pvo->pvo_pte.lpte, pvo->pvo_vpn); + PVO_PTEGIDX_CLR(pvo); + moea64_pte_overflow++; + break; + } + } + + KASSERT(pvo->pvo_pte.lpte.pte_hi == pt->pte_hi, + ("Unable to find PVO for spilled PTE")); + + /* + * Set the new PTE. + */ + moea64_pte_set_native(pt, pvo_pt); + + return (i); +} + diff --git a/sys/powerpc/include/bus_dma.h b/sys/powerpc/include/bus_dma.h index d10a055..e070a94 100644 --- a/sys/powerpc/include/bus_dma.h +++ b/sys/powerpc/include/bus_dma.h @@ -30,4 +30,8 @@ #include <sys/bus_dma.h> +struct device; + +int bus_dma_tag_set_iommu(bus_dma_tag_t, struct device *iommu, void *cookie); + #endif /* _POWERPC_BUS_DMA_H_ */ diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h index c030416..2b26185 100644 --- a/sys/powerpc/include/pmap.h +++ b/sys/powerpc/include/pmap.h @@ -121,6 +121,25 @@ struct pvo_entry { }; LIST_HEAD(pvo_head, pvo_entry); +#define PVO_PTEGIDX_MASK 0x007UL /* which PTEG slot */ +#define PVO_PTEGIDX_VALID 0x008UL /* slot is valid */ +#define PVO_WIRED 0x010UL /* PVO entry is wired */ +#define PVO_MANAGED 0x020UL /* PVO entry is managed */ +#define PVO_EXECUTABLE 0x040UL /* PVO entry is executable */ +#define PVO_BOOTSTRAP 0x080UL /* PVO entry allocated during + bootstrap */ +#define PVO_FAKE 0x100UL /* fictitious phys page */ +#define PVO_LARGE 0x200UL /* large page */ +#define PVO_VADDR(pvo) ((pvo)->pvo_vaddr & ~ADDR_POFF) +#define PVO_ISFAKE(pvo) ((pvo)->pvo_vaddr & PVO_FAKE) +#define PVO_PTEGIDX_GET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_MASK) +#define PVO_PTEGIDX_ISSET(pvo) ((pvo)->pvo_vaddr & PVO_PTEGIDX_VALID) +#define PVO_PTEGIDX_CLR(pvo) \ + ((void)((pvo)->pvo_vaddr &= ~(PVO_PTEGIDX_VALID|PVO_PTEGIDX_MASK))) +#define PVO_PTEGIDX_SET(pvo, i) \ + ((void)((pvo)->pvo_vaddr |= (i)|PVO_PTEGIDX_VALID)) +#define PVO_VSID(pvo) ((pvo)->pvo_vpn >> 16) + struct md_page { u_int64_t mdpg_attrs; vm_memattr_t mdpg_cache_attrs; diff --git a/sys/powerpc/include/pte.h b/sys/powerpc/include/pte.h index 6c4eb93..8b9dd4e 100644 --- a/sys/powerpc/include/pte.h +++ b/sys/powerpc/include/pte.h @@ -95,8 +95,8 @@ struct lpteg { /* High quadword: */ #define LPTE_VSID_SHIFT 12 #define LPTE_API 0x0000000000000F80ULL -#define LPTE_WIRED 0x0000000000000010ULL -#define LPTE_LOCKED 0x0000000000000008ULL +#define LPTE_LOCKED 0x0000000000000040ULL +#define LPTE_WIRED 0x0000000000000008ULL #define LPTE_BIG 0x0000000000000004ULL /* 4kb/16Mb page */ #define LPTE_HID 0x0000000000000002ULL #define LPTE_VALID 0x0000000000000001ULL diff --git a/sys/powerpc/powerpc/busdma_machdep.c b/sys/powerpc/powerpc/busdma_machdep.c index f66413f..84e3bc6 100644 --- a/sys/powerpc/powerpc/busdma_machdep.c +++ b/sys/powerpc/powerpc/busdma_machdep.c @@ -53,7 +53,9 @@ __FBSDID("$FreeBSD$"); #include <machine/bus.h> #include <machine/md_var.h> -#define MAX_BPAGES 8192 +#include "iommu_if.h" + +#define MAX_BPAGES MIN(8192, physmem/40) struct bounce_zone; @@ -73,8 +75,9 @@ struct bus_dma_tag { int map_count; bus_dma_lock_t *lockfunc; void *lockfuncarg; - bus_dma_segment_t *segments; struct bounce_zone *bounce_zone; + device_t iommu; + void *iommu_cookie; }; struct bounce_page { @@ -121,6 +124,8 @@ struct bus_dmamap { bus_dma_tag_t dmat; void *buf; /* unmapped buffer pointer */ bus_size_t buflen; /* unmapped buffer length */ + bus_dma_segment_t *segments; + int nsegs; bus_dmamap_callback_t *callback; void *callback_arg; STAILQ_ENTRY(bus_dmamap) links; @@ -128,7 +133,6 @@ struct bus_dmamap { static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist; static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist; -static struct bus_dmamap nobounce_dmamap; static void init_bounce_pages(void *dummy); static int alloc_bounce_zone(bus_dma_tag_t dmat); @@ -156,10 +160,14 @@ run_filter(bus_dma_tag_t dmat, bus_addr_t paddr) retval = 0; do { - if (((paddr > dmat->lowaddr && paddr <= dmat->highaddr) - || ((paddr & (dmat->alignment - 1)) != 0)) - && (dmat->filter == NULL - || (*dmat->filter)(dmat->filterarg, paddr) != 0)) + if (dmat->filter == NULL && dmat->iommu == NULL && + paddr > dmat->lowaddr && paddr <= dmat->highaddr) + retval = 1; + if (dmat->filter == NULL && + (paddr & (dmat->alignment - 1)) != 0) + retval = 1; + if (dmat->filter != NULL && + (*dmat->filter)(dmat->filterarg, paddr) != 0) retval = 1; dmat = dmat->parent; @@ -258,7 +266,6 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, newtag->lockfunc = dflt_lock; newtag->lockfuncarg = NULL; } - newtag->segments = NULL; /* Take into account any restrictions imposed by our parent tag */ if (parent != NULL) { @@ -280,10 +287,14 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, } if (newtag->parent != NULL) atomic_add_int(&parent->ref_count, 1); + newtag->iommu = parent->iommu; + newtag->iommu_cookie = parent->iommu_cookie; } - if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) - || newtag->alignment > 1) + if (newtag->lowaddr < ptoa((vm_paddr_t)Maxmem) && newtag->iommu == NULL) + newtag->flags |= BUS_DMA_COULD_BOUNCE; + + if (newtag->alignment > 1) newtag->flags |= BUS_DMA_COULD_BOUNCE; if (((newtag->flags & BUS_DMA_COULD_BOUNCE) != 0) && @@ -343,8 +354,6 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat) parent = dmat->parent; atomic_subtract_int(&dmat->ref_count, 1); if (dmat->ref_count == 0) { - if (dmat->segments != NULL) - free(dmat->segments, M_DEVBUF); free(dmat, M_DEVBUF); /* * Last reference count, so @@ -372,17 +381,15 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) error = 0; - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - M_NOWAIT); - if (dmat->segments == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } + *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, + M_NOWAIT | M_ZERO); + if (*mapp == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); } + /* * Bouncing might be required if the driver asks for an active * exclusion region, a data alignment that is stricter than 1, and/or @@ -400,14 +407,6 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) } bz = dmat->bounce_zone; - *mapp = (bus_dmamap_t)malloc(sizeof(**mapp), M_DEVBUF, - M_NOWAIT | M_ZERO); - if (*mapp == NULL) { - CTR3(KTR_BUSDMA, "%s: tag %p error %d", - __func__, dmat, ENOMEM); - return (ENOMEM); - } - /* Initialize the new map */ STAILQ_INIT(&((*mapp)->bpages)); @@ -437,9 +436,18 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) } } bz->map_count++; - } else { - *mapp = NULL; } + + (*mapp)->nsegs = 0; + (*mapp)->segments = (bus_dma_segment_t *)malloc( + sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, + M_NOWAIT); + if ((*mapp)->segments == NULL) { + CTR3(KTR_BUSDMA, "%s: tag %p error %d", + __func__, dmat, ENOMEM); + return (ENOMEM); + } + if (error == 0) dmat->map_count++; CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", @@ -454,7 +462,7 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp) int bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) { - if (map != NULL && map != &nobounce_dmamap) { + if (dmat->flags & BUS_DMA_COULD_BOUNCE) { if (STAILQ_FIRST(&map->bpages) != NULL) { CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, EBUSY); @@ -462,8 +470,9 @@ bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map) } if (dmat->bounce_zone) dmat->bounce_zone->map_count--; - free(map, M_DEVBUF); } + free(map->segments, M_DEVBUF); + free(map, M_DEVBUF); dmat->map_count--; CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat); return (0); @@ -486,19 +495,8 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, else mflags = M_WAITOK; - /* If we succeed, no mapping/bouncing will be required */ - *mapp = NULL; - - if (dmat->segments == NULL) { - dmat->segments = (bus_dma_segment_t *)malloc( - sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF, - mflags); - if (dmat->segments == NULL) { - CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", - __func__, dmat, dmat->flags, ENOMEM); - return (ENOMEM); - } - } + bus_dmamap_create(dmat, flags, mapp); + if (flags & BUS_DMA_ZERO) mflags |= M_ZERO; @@ -535,7 +533,7 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, #ifdef NOTYET if (flags & BUS_DMA_NOCACHE) pmap_change_attr((vm_offset_t)*vaddr, dmat->maxsize, - PAT_UNCACHEABLE); + VM_MEMATTR_UNCACHEABLE); #endif CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d", __func__, dmat, dmat->flags, 0); @@ -549,14 +547,10 @@ bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags, void bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map) { - /* - * dmamem does not need to be bounced, so the map should be - * NULL - */ - if (map != NULL) - panic("bus_dmamem_free: Invalid map freed\n"); + bus_dmamap_destroy(dmat, map); + #ifdef NOTYET - pmap_change_attr((vm_offset_t)vaddr, dmat->maxsize, PAT_WRITE_BACK); + pmap_change_attr((vm_offset_t)vaddr, dmat->maxsize, VM_MEMATTR_DEFAULT); #endif if ((dmat->maxsize <= PAGE_SIZE) && (dmat->alignment < dmat->maxsize) && @@ -591,18 +585,13 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_addr_t paddr; int seg; - if (map == NULL) - map = &nobounce_dmamap; - - if ((map != &nobounce_dmamap && map->pagesneeded == 0) - && ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0)) { + if (map->pagesneeded == 0 && ((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0)) { vm_offset_t vendaddr; CTR4(KTR_BUSDMA, "lowaddr= %d Maxmem= %d, boundary= %d, " "alignment= %d", dmat->lowaddr, ptoa((vm_paddr_t)Maxmem), dmat->boundary, dmat->alignment); - CTR3(KTR_BUSDMA, "map= %p, nobouncemap= %p, pagesneeded= %d", - map, &nobounce_dmamap, map->pagesneeded); + CTR2(KTR_BUSDMA, "map= %p, pagesneeded= %d", map, map->pagesneeded); /* * Count the number of bounce pages * needed in order to complete this transfer @@ -731,29 +720,36 @@ bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf, bus_size_t buflen, bus_dmamap_callback_t *callback, void *callback_arg, int flags) { - bus_addr_t lastaddr = 0; - int error, nsegs = 0; + bus_addr_t lastaddr = 0; + int error; - if (map != NULL) { + if (dmat->flags & BUS_DMA_COULD_BOUNCE) { flags |= BUS_DMA_WAITOK; map->callback = callback; map->callback_arg = callback_arg; } + map->nsegs = 0; error = _bus_dmamap_load_buffer(dmat, map, buf, buflen, NULL, flags, - &lastaddr, dmat->segments, &nsegs, 1); + &lastaddr, map->segments, &map->nsegs, 1); + map->nsegs++; CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, nsegs + 1); + __func__, dmat, dmat->flags, error, map->nsegs); if (error == EINPROGRESS) { return (error); } + if (dmat->iommu != NULL) + IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, dmat->lowaddr, + dmat->highaddr, dmat->alignment, dmat->boundary, + dmat->iommu_cookie); + if (error) - (*callback)(callback_arg, dmat->segments, 0, error); + (*callback)(callback_arg, map->segments, 0, error); else - (*callback)(callback_arg, dmat->segments, nsegs + 1, 0); + (*callback)(callback_arg, map->segments, map->nsegs, 0); /* * Return ENOMEM to the caller so that it can pass it up the stack. @@ -775,12 +771,12 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmamap_callback2_t *callback, void *callback_arg, int flags) { - int nsegs, error; + int error; M_ASSERTPKTHDR(m0); flags |= BUS_DMA_NOWAIT; - nsegs = 0; + map->nsegs = 0; error = 0; if (m0->m_pkthdr.len <= dmat->maxsize) { int first = 1; @@ -792,7 +788,7 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, error = _bus_dmamap_load_buffer(dmat, map, m->m_data, m->m_len, NULL, flags, &lastaddr, - dmat->segments, &nsegs, first); + map->segments, &map->nsegs, first); first = 0; } } @@ -800,15 +796,21 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, error = EINVAL; } + map->nsegs++; + if (dmat->iommu != NULL) + IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, dmat->lowaddr, + dmat->highaddr, dmat->alignment, dmat->boundary, + dmat->iommu_cookie); + if (error) { /* force "no valid mappings" in callback */ - (*callback)(callback_arg, dmat->segments, 0, 0, error); + (*callback)(callback_arg, map->segments, 0, 0, error); } else { - (*callback)(callback_arg, dmat->segments, - nsegs+1, m0->m_pkthdr.len, error); + (*callback)(callback_arg, map->segments, + map->nsegs, m0->m_pkthdr.len, error); } CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, nsegs + 1); + __func__, dmat, dmat->flags, error, map->nsegs); return (error); } @@ -844,6 +846,15 @@ bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map, /* XXX FIXME: Having to increment nsegs is really annoying */ ++*nsegs; + + if (dmat->iommu != NULL) + IOMMU_MAP(dmat->iommu, segs, nsegs, dmat->lowaddr, + dmat->highaddr, dmat->alignment, dmat->boundary, + dmat->iommu_cookie); + + map->nsegs = *nsegs; + memcpy(map->segments, segs, map->nsegs*sizeof(segs[0])); + CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", __func__, dmat, dmat->flags, error, *nsegs); return (error); @@ -859,7 +870,7 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, int flags) { bus_addr_t lastaddr = 0; - int nsegs, error, first, i; + int error, first, i; bus_size_t resid; struct iovec *iov; pmap_t pmap; @@ -875,7 +886,7 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, } else pmap = NULL; - nsegs = 0; + map->nsegs = 0; error = 0; first = 1; for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) { @@ -890,22 +901,28 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, if (minlen > 0) { error = _bus_dmamap_load_buffer(dmat, map, addr, minlen, pmap, flags, &lastaddr, - dmat->segments, &nsegs, first); + map->segments, &map->nsegs, first); first = 0; resid -= minlen; } } + map->nsegs++; + if (dmat->iommu != NULL) + IOMMU_MAP(dmat->iommu, map->segments, &map->nsegs, dmat->lowaddr, + dmat->highaddr, dmat->alignment, dmat->boundary, + dmat->iommu_cookie); + if (error) { /* force "no valid mappings" in callback */ - (*callback)(callback_arg, dmat->segments, 0, 0, error); + (*callback)(callback_arg, map->segments, 0, 0, error); } else { - (*callback)(callback_arg, dmat->segments, - nsegs+1, uio->uio_resid, error); + (*callback)(callback_arg, map->segments, + map->nsegs, uio->uio_resid, error); } CTR5(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d nsegs %d", - __func__, dmat, dmat->flags, error, nsegs + 1); + __func__, dmat, dmat->flags, error, map->nsegs); return (error); } @@ -917,6 +934,11 @@ _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map) { struct bounce_page *bpage; + if (dmat->iommu) { + IOMMU_UNMAP(dmat->iommu, map->segments, map->nsegs, dmat->iommu_cookie); + map->nsegs = 0; + } + while ((bpage = STAILQ_FIRST(&map->bpages)) != NULL) { STAILQ_REMOVE_HEAD(&map->bpages, links); free_bounce_page(dmat, bpage); @@ -1122,8 +1144,6 @@ add_bounce_page(bus_dma_tag_t dmat, bus_dmamap_t map, vm_offset_t vaddr, struct bounce_page *bpage; KASSERT(dmat->bounce_zone != NULL, ("no bounce zone in dma tag")); - KASSERT(map != NULL && map != &nobounce_dmamap, - ("add_bounce_page: bad map %p", map)); bz = dmat->bounce_zone; if (map->pagesneeded == 0) @@ -1210,3 +1230,13 @@ busdma_swi(void) } mtx_unlock(&bounce_lock); } + +int +bus_dma_tag_set_iommu(bus_dma_tag_t tag, struct device *iommu, void *cookie) +{ + tag->iommu = iommu; + tag->iommu_cookie = cookie; + + return (0); +} + diff --git a/sys/powerpc/powerpc/iommu_if.m b/sys/powerpc/powerpc/iommu_if.m new file mode 100644 index 0000000..dec70e3 --- /dev/null +++ b/sys/powerpc/powerpc/iommu_if.m @@ -0,0 +1,54 @@ +#- +# Copyright (c) 2010 Nathan Whitehorn +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# from: src/sys/kern/bus_if.m,v 1.21 2002/04/21 11:16:10 markm Exp +# $FreeBSD$ +# + +#include <machine/bus.h> + +#include <sys/bus.h> +#include <sys/bus_dma.h> + +INTERFACE iommu; + +METHOD int map { + device_t iommu; + bus_dma_segment_t *segs; + int *nsegs; + bus_addr_t lowaddr; + bus_addr_t highaddr; + bus_size_t alignment; + bus_size_t boundary; + void *cookie; +}; + +METHOD int unmap { + device_t iommu; + bus_dma_segment_t *segs; + int nsegs; + void *cookie; +}; + diff --git a/sys/sys/param.h b/sys/sys/param.h index 257d272..3c796e2 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 900026 /* Master, propagated to newvers */ +#define __FreeBSD_version 900027 /* Master, propagated to newvers */ #ifdef _KERNEL #define P_OSREL_SIGSEGV 700004 diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 02f228c..48ef012 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -214,6 +214,7 @@ struct thread { lwpid_t td_tid; /* (b) Thread ID. */ sigqueue_t td_sigqueue; /* (c) Sigs arrived, not delivered. */ #define td_siglist td_sigqueue.sq_signals + u_char td_lend_user_pri; /* (t) Lend user pri. */ /* Cleared during fork1() */ #define td_startzero td_flags @@ -343,7 +344,7 @@ do { \ #define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */ #define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */ #define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */ -#define TDF_UBORROWING 0x00000200 /* Thread is borrowing user pri. */ +#define TDF_UNUSED09 0x00000200 /* --available-- */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ #define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ diff --git a/sys/sys/queue.h b/sys/sys/queue.h index 257679b..f0bae8d 100644 --- a/sys/sys/queue.h +++ b/sys/sys/queue.h @@ -213,6 +213,12 @@ struct { \ SLIST_FIRST((head)) = SLIST_NEXT(SLIST_FIRST((head)), field); \ } while (0) +#define SLIST_SWAP(head1, head2, type) do { \ + struct type *swap_first = SLIST_FIRST(head1); \ + SLIST_FIRST(head1) = SLIST_FIRST(head2); \ + SLIST_FIRST(head2) = swap_first; \ +} while (0) + /* * Singly-linked Tail queue declarations. */ diff --git a/sys/teken/teken_subr.h b/sys/teken/teken_subr.h index c84679a..2934bcc 100644 --- a/sys/teken/teken_subr.h +++ b/sys/teken/teken_subr.h @@ -1299,10 +1299,9 @@ teken_subr_vertical_position_absolute(teken_t *t, unsigned int row) { t->t_cursor.tp_row = t->t_originreg.ts_begin + row - 1; - if (row >= t->t_originreg.ts_end) + if (t->t_cursor.tp_row >= t->t_originreg.ts_end) t->t_cursor.tp_row = t->t_originreg.ts_end - 1; - t->t_stateflags &= ~TS_WRAPPED; teken_funcs_cursor(t); } diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c index 35552a6..1a51af8 100644 --- a/sys/vm/vm_map.c +++ b/sys/vm/vm_map.c @@ -671,6 +671,41 @@ vm_map_wakeup(vm_map_t map) wakeup(&map->root); } +void +vm_map_busy(vm_map_t map) +{ + + VM_MAP_ASSERT_LOCKED(map); + map->busy++; +} + +void +vm_map_unbusy(vm_map_t map) +{ + + VM_MAP_ASSERT_LOCKED(map); + KASSERT(map->busy, ("vm_map_unbusy: not busy")); + if (--map->busy == 0 && (map->flags & MAP_BUSY_WAKEUP)) { + vm_map_modflags(map, 0, MAP_BUSY_WAKEUP); + wakeup(&map->busy); + } +} + +void +vm_map_wait_busy(vm_map_t map) +{ + + VM_MAP_ASSERT_LOCKED(map); + while (map->busy) { + vm_map_modflags(map, MAP_BUSY_WAKEUP, 0); + if (map->system_map) + msleep(&map->busy, &map->system_mtx, 0, "mbusy", 0); + else + sx_sleep(&map->busy, &map->lock, 0, "mbusy", 0); + } + map->timestamp++; +} + long vmspace_resident_count(struct vmspace *vmspace) { @@ -718,6 +753,7 @@ _vm_map_init(vm_map_t map, pmap_t pmap, vm_offset_t min, vm_offset_t max) map->flags = 0; map->root = NULL; map->timestamp = 0; + map->busy = 0; } void @@ -2382,12 +2418,14 @@ vm_map_wire(vm_map_t map, vm_offset_t start, vm_offset_t end, entry->object.vm_object->type == OBJT_SG); /* * Release the map lock, relying on the in-transition - * mark. + * mark. Mark the map busy for fork. */ + vm_map_busy(map); vm_map_unlock(map); rv = vm_fault_wire(map, saved_start, saved_end, fictitious); vm_map_lock(map); + vm_map_unbusy(map); if (last_timestamp + 1 != map->timestamp) { /* * Look again for the entry because the map was @@ -2995,6 +3033,8 @@ vmspace_fork(struct vmspace *vm1, vm_ooffset_t *fork_charge) int locked; vm_map_lock(old_map); + if (old_map->busy) + vm_map_wait_busy(old_map); vm2 = vmspace_alloc(old_map->min_offset, old_map->max_offset); if (vm2 == NULL) goto unlock_and_return; diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index f7fc5f5..fecbffe 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -187,12 +187,14 @@ struct vm_map { pmap_t pmap; /* (c) Physical map */ #define min_offset header.start /* (c) */ #define max_offset header.end /* (c) */ + int busy; }; /* * vm_flags_t values */ #define MAP_WIREFUTURE 0x01 /* wire all future pages */ +#define MAP_BUSY_WAKEUP 0x02 #ifdef _KERNEL static __inline vm_offset_t @@ -275,6 +277,9 @@ int _vm_map_lock_upgrade(vm_map_t map, const char *file, int line); void _vm_map_lock_downgrade(vm_map_t map, const char *file, int line); int vm_map_locked(vm_map_t map); void vm_map_wakeup(vm_map_t map); +void vm_map_busy(vm_map_t map); +void vm_map_unbusy(vm_map_t map); +void vm_map_wait_busy(vm_map_t map); #define vm_map_lock(map) _vm_map_lock(map, LOCK_FILE, LOCK_LINE) #define vm_map_unlock(map) _vm_map_unlock(map, LOCK_FILE, LOCK_LINE) diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index f2dba2c..a73e03a 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -276,14 +276,14 @@ mmap(td, uap) if (addr + size < addr) return (EINVAL); } else { - /* - * XXX for non-fixed mappings where no hint is provided or - * the hint would fall in the potential heap space, - * place it after the end of the largest possible heap. - * - * There should really be a pmap call to determine a reasonable - * location. - */ + /* + * XXX for non-fixed mappings where no hint is provided or + * the hint would fall in the potential heap space, + * place it after the end of the largest possible heap. + * + * There should really be a pmap call to determine a reasonable + * location. + */ PROC_LOCK(td->td_proc); if (addr == 0 || (addr >= round_page((vm_offset_t)vms->vm_taddr) && diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 863b842..1208ea0 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -430,11 +430,12 @@ vm_page_startup(vm_offset_t vaddr) */ new_end = vm_reserv_startup(&vaddr, new_end, high_water); #endif -#ifdef __amd64__ +#if defined(__amd64__) || defined(__mips__) /* - * pmap_map on amd64 comes out of the direct-map, not kvm like i386, - * so the pages must be tracked for a crashdump to include this data. - * This includes the vm_page_array and the early UMA bootstrap pages. + * pmap_map on amd64 and mips can come out of the direct-map, not kvm + * like i386, so the pages must be tracked for a crashdump to include + * this data. This includes the vm_page_array and the early UMA + * bootstrap pages. */ for (pa = new_end; pa < phys_avail[biggestone + 1]; pa += PAGE_SIZE) dump_add_page(pa); diff --git a/sys/i386/i386/busdma_machdep.c b/sys/x86/x86/busdma_machdep.c index 45ab8b3..feca1a6 100644 --- a/sys/i386/i386/busdma_machdep.c +++ b/sys/x86/x86/busdma_machdep.c @@ -28,9 +28,6 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> -#include <sys/kdb.h> -#include <ddb/ddb.h> -#include <ddb/db_output.h> #include <sys/systm.h> #include <sys/malloc.h> #include <sys/bus.h> @@ -53,7 +50,11 @@ __FBSDID("$FreeBSD$"); #include <machine/md_var.h> #include <machine/specialreg.h> +#ifdef __i386__ #define MAX_BPAGES 512 +#else +#define MAX_BPAGES 8192 +#endif #define BUS_DMA_COULD_BOUNCE BUS_DMA_BUS3 #define BUS_DMA_MIN_ALLOC_COMP BUS_DMA_BUS4 @@ -249,8 +250,7 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment, newtag->alignment = alignment; newtag->boundary = boundary; newtag->lowaddr = trunc_page((vm_paddr_t)lowaddr) + (PAGE_SIZE - 1); - newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + - (PAGE_SIZE - 1); + newtag->highaddr = trunc_page((vm_paddr_t)highaddr) + (PAGE_SIZE - 1); newtag->filter = filter; newtag->filterarg = filterarg; newtag->maxsize = maxsize; @@ -597,15 +597,19 @@ _bus_dmamap_count_pages(bus_dma_tag_t dmat, bus_dmamap_t map, pmap_t pmap, vendaddr = (vm_offset_t)buf + buflen; while (vaddr < vendaddr) { + bus_size_t sg_len; + + sg_len = PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK); if (pmap) paddr = pmap_extract(pmap, vaddr); else paddr = pmap_kextract(vaddr); if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && run_filter(dmat, paddr) != 0) { + sg_len = roundup2(sg_len, dmat->alignment); map->pagesneeded++; } - vaddr += (PAGE_SIZE - ((vm_offset_t)vaddr & PAGE_MASK)); + vaddr += sg_len; } CTR1(KTR_BUSDMA, "pagesneeded= %d\n", map->pagesneeded); } @@ -672,6 +676,8 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, bmask = ~(dmat->boundary - 1); for (seg = *segp; buflen > 0 ; ) { + bus_size_t max_sgsize; + /* * Get the physical address for this segment. */ @@ -683,11 +689,16 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, /* * Compute the segment size, and adjust counts. */ - sgsize = PAGE_SIZE - ((u_long)curaddr & PAGE_MASK); - if (sgsize > dmat->maxsegsz) - sgsize = dmat->maxsegsz; - if (buflen < sgsize) - sgsize = buflen; + max_sgsize = MIN(buflen, dmat->maxsegsz); + sgsize = PAGE_SIZE - ((vm_offset_t)curaddr & PAGE_MASK); + if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && + map->pagesneeded != 0 && run_filter(dmat, curaddr)) { + sgsize = roundup2(sgsize, dmat->alignment); + sgsize = MIN(sgsize, max_sgsize); + curaddr = add_bounce_page(dmat, map, vaddr, sgsize); + } else { + sgsize = MIN(sgsize, max_sgsize); + } /* * Make sure we don't cross any boundaries. @@ -698,10 +709,6 @@ _bus_dmamap_load_buffer(bus_dma_tag_t dmat, sgsize = (baddr - curaddr); } - if (((dmat->flags & BUS_DMA_COULD_BOUNCE) != 0) && - map->pagesneeded != 0 && run_filter(dmat, curaddr)) - curaddr = add_bounce_page(dmat, map, vaddr, sgsize); - /* * Insert chunk into a segment, coalescing with * previous segment if possible. @@ -861,7 +868,7 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmamap_callback2_t *callback, void *callback_arg, int flags) { - bus_addr_t lastaddr; + bus_addr_t lastaddr = 0; int nsegs, error, first, i; bus_size_t resid; struct iovec *iov; @@ -881,7 +888,6 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, nsegs = 0; error = 0; first = 1; - lastaddr = (bus_addr_t) 0; for (i = 0; i < uio->uio_iovcnt && resid != 0 && !error; i++) { /* * Now at the first iovec to load. Load each iovec diff --git a/sys/i386/i386/tsc.c b/sys/x86/x86/tsc.c index 882b442..e39c4af 100644 --- a/sys/i386/i386/tsc.c +++ b/sys/x86/x86/tsc.c @@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$"); #include <sys/power.h> #include <sys/smp.h> #include <machine/clock.h> +#include <machine/cputypes.h> #include <machine/md_var.h> #include <machine/specialreg.h> @@ -49,7 +50,7 @@ __FBSDID("$FreeBSD$"); uint64_t tsc_freq; int tsc_is_broken; int tsc_is_invariant; -u_int tsc_present; +int tsc_present; static eventhandler_tag tsc_levels_tag, tsc_pre_tag, tsc_post_tag; SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN, @@ -103,14 +104,38 @@ init_TSC(void) if (bootverbose) printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); + switch (cpu_vendor_id) { + case CPU_VENDOR_AMD: + if ((amd_pminfo & AMDPM_TSC_INVARIANT) || + CPUID_TO_FAMILY(cpu_id) >= 0x10) + tsc_is_invariant = 1; + break; + case CPU_VENDOR_INTEL: + if ((amd_pminfo & AMDPM_TSC_INVARIANT) || + (CPUID_TO_FAMILY(cpu_id) == 0x6 && + CPUID_TO_MODEL(cpu_id) >= 0xe) || + (CPUID_TO_FAMILY(cpu_id) == 0xf && + CPUID_TO_MODEL(cpu_id) >= 0x3)) + tsc_is_invariant = 1; + break; + case CPU_VENDOR_CENTAUR: + if (CPUID_TO_FAMILY(cpu_id) == 0x6 && + CPUID_TO_MODEL(cpu_id) >= 0xf && + (rdmsr(0x1203) & 0x100000000ULL) == 0) + tsc_is_invariant = 1; + break; + } + /* - * Inform CPU accounting about our boot-time clock rate. Once the - * system is finished booting, we will get the real max clock rate - * via tsc_freq_max(). This also will be updated if someone loads - * a cpufreq driver after boot that discovers a new max frequency. + * Inform CPU accounting about our boot-time clock rate. This will + * be updated if someone loads a cpufreq driver after boot that + * discovers a new max frequency. */ set_cputicker(rdtsc, tsc_freq, 1); + if (tsc_is_invariant) + return; + /* Register to find out about changes in CPU frequency. */ tsc_pre_tag = EVENTHANDLER_REGISTER(cpufreq_pre_change, tsc_freq_changing, NULL, EVENTHANDLER_PRI_FIRST); @@ -207,8 +232,7 @@ static void tsc_freq_changing(void *arg, const struct cf_level *level, int *status) { - if (*status != 0 || timecounter != &tsc_timecounter || - tsc_is_invariant) + if (*status != 0 || timecounter != &tsc_timecounter) return; printf("timecounter TSC must not be in use when " @@ -220,11 +244,9 @@ tsc_freq_changing(void *arg, const struct cf_level *level, int *status) static void tsc_freq_changed(void *arg, const struct cf_level *level, int status) { - /* - * If there was an error during the transition or - * TSC is P-state invariant, don't do anything. - */ - if (status != 0 || tsc_is_invariant) + + /* If there was an error during the transition, don't do anything. */ + if (status != 0) return; /* Total setting for this level gives the new frequency in MHz. */ diff --git a/sys/xen/evtchn/evtchn.c b/sys/xen/evtchn/evtchn.c index 3832277..3ad2e2c 100644 --- a/sys/xen/evtchn/evtchn.c +++ b/sys/xen/evtchn/evtchn.c @@ -256,7 +256,7 @@ find_unbound_irq(void) } static int -bind_caller_port_to_irq(unsigned int caller_port) +bind_caller_port_to_irq(unsigned int caller_port, int * port) { int irq; @@ -271,7 +271,7 @@ bind_caller_port_to_irq(unsigned int caller_port) } irq_bindcount[irq]++; - unmask_evtchn(caller_port); + *port = caller_port; out: mtx_unlock_spin(&irq_mapping_update_lock); @@ -279,7 +279,7 @@ bind_caller_port_to_irq(unsigned int caller_port) } static int -bind_local_port_to_irq(unsigned int local_port) +bind_local_port_to_irq(unsigned int local_port, int * port) { int irq; @@ -298,7 +298,7 @@ bind_local_port_to_irq(unsigned int local_port) evtchn_to_irq[local_port] = irq; irq_info[irq] = mk_irq_info(IRQT_LOCAL_PORT, 0, local_port); irq_bindcount[irq]++; - unmask_evtchn(local_port); + *port = local_port; out: mtx_unlock_spin(&irq_mapping_update_lock); @@ -306,7 +306,7 @@ bind_local_port_to_irq(unsigned int local_port) } static int -bind_listening_port_to_irq(unsigned int remote_domain) +bind_listening_port_to_irq(unsigned int remote_domain, int * port) { struct evtchn_alloc_unbound alloc_unbound; int err; @@ -317,12 +317,12 @@ bind_listening_port_to_irq(unsigned int remote_domain) err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &alloc_unbound); - return err ? : bind_local_port_to_irq(alloc_unbound.port); + return err ? : bind_local_port_to_irq(alloc_unbound.port, port); } static int bind_interdomain_evtchn_to_irq(unsigned int remote_domain, - unsigned int remote_port) + unsigned int remote_port, int * port) { struct evtchn_bind_interdomain bind_interdomain; int err; @@ -333,11 +333,11 @@ bind_interdomain_evtchn_to_irq(unsigned int remote_domain, err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &bind_interdomain); - return err ? : bind_local_port_to_irq(bind_interdomain.local_port); + return err ? : bind_local_port_to_irq(bind_interdomain.local_port, port); } static int -bind_virq_to_irq(unsigned int virq, unsigned int cpu) +bind_virq_to_irq(unsigned int virq, unsigned int cpu, int * port) { struct evtchn_bind_virq bind_virq; int evtchn = 0, irq; @@ -363,7 +363,7 @@ bind_virq_to_irq(unsigned int virq, unsigned int cpu) } irq_bindcount[irq]++; - unmask_evtchn(evtchn); + *port = evtchn; out: mtx_unlock_spin(&irq_mapping_update_lock); @@ -371,10 +371,8 @@ out: } -extern int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu); - -int -bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) +static int +bind_ipi_to_irq(unsigned int ipi, unsigned int cpu, int * port) { struct evtchn_bind_ipi bind_ipi; int irq; @@ -398,7 +396,7 @@ bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) bind_evtchn_to_cpu(evtchn, cpu); } irq_bindcount[irq]++; - unmask_evtchn(evtchn); + *port = evtchn; out: mtx_unlock_spin(&irq_mapping_update_lock); @@ -449,9 +447,10 @@ bind_caller_port_to_irqhandler(unsigned int caller_port, unsigned long irqflags, unsigned int *irqp) { unsigned int irq; + int port = -1; int error; - irq = bind_caller_port_to_irq(caller_port); + irq = bind_caller_port_to_irq(caller_port, &port); intr_register_source(&xp->xp_pins[irq].xp_intsrc); error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, &xp->xp_pins[irq].xp_cookie); @@ -460,6 +459,8 @@ bind_caller_port_to_irqhandler(unsigned int caller_port, unbind_from_irq(irq); return (error); } + if (port != -1) + unmask_evtchn(port); if (irqp) *irqp = irq; @@ -473,9 +474,10 @@ bind_listening_port_to_irqhandler(unsigned int remote_domain, unsigned long irqflags, unsigned int *irqp) { unsigned int irq; + int port = -1; int error; - irq = bind_listening_port_to_irq(remote_domain); + irq = bind_listening_port_to_irq(remote_domain, &port); intr_register_source(&xp->xp_pins[irq].xp_intsrc); error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, &xp->xp_pins[irq].xp_cookie); @@ -483,6 +485,8 @@ bind_listening_port_to_irqhandler(unsigned int remote_domain, unbind_from_irq(irq); return (error); } + if (port != -1) + unmask_evtchn(port); if (irqp) *irqp = irq; @@ -496,9 +500,10 @@ bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, unsigned int *irqp) { unsigned int irq; + int port = -1; int error; - irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port); + irq = bind_interdomain_evtchn_to_irq(remote_domain, remote_port, &port); intr_register_source(&xp->xp_pins[irq].xp_intsrc); error = intr_add_handler(devname, irq, NULL, handler, arg, irqflags, &xp->xp_pins[irq].xp_cookie); @@ -506,6 +511,8 @@ bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, unbind_from_irq(irq); return (error); } + if (port != -1) + unmask_evtchn(port); if (irqp) *irqp = irq; @@ -518,9 +525,10 @@ bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, void *arg, unsigned long irqflags, unsigned int *irqp) { unsigned int irq; + int port = -1; int error; - irq = bind_virq_to_irq(virq, cpu); + irq = bind_virq_to_irq(virq, cpu, &port); intr_register_source(&xp->xp_pins[irq].xp_intsrc); error = intr_add_handler(devname, irq, filter, handler, arg, irqflags, &xp->xp_pins[irq].xp_cookie); @@ -528,6 +536,8 @@ bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, unbind_from_irq(irq); return (error); } + if (port != -1) + unmask_evtchn(port); if (irqp) *irqp = irq; @@ -540,9 +550,10 @@ bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, unsigned long irqflags, unsigned int *irqp) { unsigned int irq; + int port = -1; int error; - irq = bind_ipi_to_irq(ipi, cpu); + irq = bind_ipi_to_irq(ipi, cpu, &port); intr_register_source(&xp->xp_pins[irq].xp_intsrc); error = intr_add_handler(devname, irq, filter, NULL, NULL, irqflags, &xp->xp_pins[irq].xp_cookie); @@ -550,6 +561,8 @@ bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, unbind_from_irq(irq); return (error); } + if (port != -1) + unmask_evtchn(port); if (irqp) *irqp = irq; |