summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/arm/arm/busdma_machdep.c494
1 files changed, 296 insertions, 198 deletions
diff --git a/sys/arm/arm/busdma_machdep.c b/sys/arm/arm/busdma_machdep.c
index ecda7f9..038014b 100644
--- a/sys/arm/arm/busdma_machdep.c
+++ b/sys/arm/arm/busdma_machdep.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 2012 Ian Lepore
* Copyright (c) 2004 Olivier Houchard
* Copyright (c) 2002 Peter Grehan
* Copyright (c) 1997, 1998 Justin T. Gibbs.
@@ -32,7 +33,23 @@
__FBSDID("$FreeBSD$");
/*
- * ARM bus dma support routines
+ * ARM bus dma support routines.
+ *
+ * XXX Things to investigate / fix some day...
+ * - What is the earliest that this API can be called? Could there be any
+ * fallout from changing the SYSINIT() order from SI_SUB_VM to SI_SUB_KMEM?
+ * - The manpage mentions the BUS_DMA_NOWAIT flag only in the context of the
+ * bus_dmamap_load() function. This code has historically (and still does)
+ * honor it in bus_dmamem_alloc(). If we got rid of that we could lose some
+ * error checking because some resource management calls would become WAITOK
+ * and thus "cannot fail."
+ * - The decisions made by _bus_dma_can_bounce() should be made once, at tag
+ * creation time, and the result stored in the tag.
+ * - It should be possible to take some shortcuts when mapping a buffer we know
+ * came from the uma(9) allocators based on what we know about such buffers
+ * (aligned, contiguous, etc).
+ * - The allocation of bounce pages could probably be cleaned up, then we could
+ * retire arm_remap_nocache().
*/
#define _ARM32_BUS_DMA_PRIVATE
@@ -40,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/bus.h>
+#include <sys/busdma_bufalloc.h>
#include <sys/interrupt.h>
#include <sys/lock.h>
#include <sys/proc.h>
@@ -50,7 +68,10 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/sysctl.h>
+#include <vm/uma.h>
#include <vm/vm.h>
+#include <vm/vm_extern.h>
+#include <vm/vm_kern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
@@ -81,7 +102,6 @@ struct bus_dma_tag {
int map_count;
bus_dma_lock_t *lockfunc;
void *lockfuncarg;
- bus_dma_segment_t *segments;
/*
* DMA range for this tag. If the page doesn't fall within
* one of these ranges, an error is returned. The caller
@@ -91,6 +111,13 @@ struct bus_dma_tag {
struct arm32_dma_range *ranges;
int _nranges;
struct bounce_zone *bounce_zone;
+ /*
+ * Most tags need one or two segments, and can use the local tagsegs
+ * array. For tags with a larger limit, we'll allocate a bigger array
+ * on first use.
+ */
+ bus_dma_segment_t *segments;
+ bus_dma_segment_t tagsegs[2];
};
struct bounce_page {
@@ -134,7 +161,7 @@ SYSCTL_INT(_hw_busdma, OID_AUTO, total_bpages, CTLFLAG_RD, &total_bpages, 0,
#define DMAMAP_LINEAR 0x1
#define DMAMAP_MBUF 0x2
#define DMAMAP_UIO 0x4
-#define DMAMAP_ALLOCATED 0x10
+#define DMAMAP_CACHE_ALIGNED 0x10
#define DMAMAP_TYPE_MASK (DMAMAP_LINEAR|DMAMAP_MBUF|DMAMAP_UIO)
#define DMAMAP_COHERENT 0x8
struct bus_dmamap {
@@ -144,9 +171,6 @@ struct bus_dmamap {
bus_dma_tag_t dmat;
int flags;
void *buffer;
- void *origbuffer;
- void *allocbuffer;
- TAILQ_ENTRY(bus_dmamap) freelist;
int len;
STAILQ_ENTRY(bus_dmamap) links;
bus_dmamap_callback_t *callback;
@@ -157,12 +181,6 @@ struct bus_dmamap {
static STAILQ_HEAD(, bus_dmamap) bounce_map_waitinglist;
static STAILQ_HEAD(, bus_dmamap) bounce_map_callbacklist;
-static TAILQ_HEAD(,bus_dmamap) dmamap_freelist =
- TAILQ_HEAD_INITIALIZER(dmamap_freelist);
-
-#define BUSDMA_STATIC_MAPS 500
-static struct bus_dmamap map_pool[BUSDMA_STATIC_MAPS];
-
static struct mtx busdma_mtx;
MTX_SYSINIT(busdma_mtx, &busdma_mtx, "busdma lock", MTX_DEF);
@@ -179,6 +197,87 @@ static void free_bounce_page(bus_dma_tag_t dmat, struct bounce_page *bpage);
/* Default tag, as most drivers provide no parent tag. */
bus_dma_tag_t arm_root_dma_tag;
+//----------------------------------------------------------------------------
+// Begin block of code useful to transplant to other implementations.
+
+static uma_zone_t dmamap_zone; /* Cache of struct bus_dmamap items */
+
+static busdma_bufalloc_t coherent_allocator; /* Cache of coherent buffers */
+static busdma_bufalloc_t standard_allocator; /* Cache of standard buffers */
+
+/*
+ * This is the ctor function passed to uma_zcreate() for the pool of dma maps.
+ * It'll need platform-specific changes if this code is copied.
+ */
+static int
+dmamap_ctor(void *mem, int size, void *arg, int flags)
+{
+ bus_dmamap_t map;
+ bus_dma_tag_t dmat;
+
+ map = (bus_dmamap_t)mem;
+ dmat = (bus_dma_tag_t)arg;
+
+ dmat->map_count++;
+
+ map->dmat = dmat;
+ map->flags = 0;
+ STAILQ_INIT(&map->bpages);
+
+ return (0);
+}
+
+/*
+ * This is the dtor function passed to uma_zcreate() for the pool of dma maps.
+ * It may need platform-specific changes if this code is copied .
+ */
+static void
+dmamap_dtor(void *mem, int size, void *arg)
+{
+ bus_dmamap_t map;
+
+ map = (bus_dmamap_t)mem;
+
+ map->dmat->map_count--;
+}
+
+static void
+busdma_init(void *dummy)
+{
+
+ /* Create a cache of maps for bus_dmamap_create(). */
+ dmamap_zone = uma_zcreate("dma maps", sizeof(struct bus_dmamap),
+ dmamap_ctor, dmamap_dtor, NULL, NULL, UMA_ALIGN_PTR, 0);
+
+ /* Create a cache of buffers in standard (cacheable) memory. */
+ standard_allocator = busdma_bufalloc_create("buffer",
+ arm_dcache_align, /* minimum_alignment */
+ NULL, /* uma_alloc func */
+ NULL, /* uma_free func */
+ 0); /* uma_zcreate_flags */
+
+ /*
+ * Create a cache of buffers in uncacheable memory, to implement the
+ * BUS_DMA_COHERENT (and potentially BUS_DMA_NOCACHE) flag.
+ */
+ coherent_allocator = busdma_bufalloc_create("coherent",
+ arm_dcache_align, /* minimum_alignment */
+ busdma_bufalloc_alloc_uncacheable,
+ busdma_bufalloc_free_uncacheable,
+ 0); /* uma_zcreate_flags */
+}
+
+/*
+ * This init historically used SI_SUB_VM, but now the init code requires
+ * malloc(9) using M_DEVBUF memory, which is set up later than SI_SUB_VM, by
+ * SI_SUB_KMEM and SI_ORDER_SECOND, so we'll go right after that by using
+ * SI_SUB_KMEM and SI_ORDER_THIRD.
+ */
+SYSINIT(busdma, SI_SUB_KMEM, SI_ORDER_THIRD, busdma_init, NULL);
+
+// End block of code useful to transplant to other implementations.
+//----------------------------------------------------------------------------
+
/*
* Return true if a match is made.
*
@@ -206,30 +305,26 @@ run_filter(bus_dma_tag_t dmat, bus_addr_t paddr)
return (retval);
}
-static void
-arm_dmamap_freelist_init(void *dummy)
-{
- int i;
-
- for (i = 0; i < BUSDMA_STATIC_MAPS; i++)
- TAILQ_INSERT_HEAD(&dmamap_freelist, &map_pool[i], freelist);
-}
-
-SYSINIT(busdma, SI_SUB_VM, SI_ORDER_ANY, arm_dmamap_freelist_init, NULL);
-
/*
- * Check to see if the specified page is in an allowed DMA range.
+ * This routine checks the exclusion zone constraints from a tag against the
+ * physical RAM available on the machine. If a tag specifies an exclusion zone
+ * but there's no RAM in that zone, then we avoid allocating resources to bounce
+ * a request, and we can use any memory allocator (as opposed to needing
+ * kmem_alloc_contig() just because it can allocate pages in an address range).
+ *
+ * Most tags have BUS_SPACE_MAXADDR or BUS_SPACE_MAXADDR_32BIT (they are the
+ * same value on 32-bit architectures) as their lowaddr constraint, and we can't
+ * possibly have RAM at an address higher than the highest address we can
+ * express, so we take a fast out.
*/
-
-static __inline int
-bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dma_segment_t *segs,
- bus_dmamap_t map, void *buf, bus_size_t buflen, struct pmap *pmap,
- int flags, vm_offset_t *lastaddrp, int *segp);
-
static __inline int
_bus_dma_can_bounce(vm_offset_t lowaddr, vm_offset_t highaddr)
{
int i;
+
+ if (lowaddr >= BUS_SPACE_MAXADDR)
+ return (0);
+
for (i = 0; phys_avail[i] && phys_avail[i + 1]; i += 2) {
if ((lowaddr >= phys_avail[i] && lowaddr <= phys_avail[i + 1])
|| (lowaddr < phys_avail[i] &&
@@ -294,38 +389,6 @@ dflt_lock(void *arg, bus_dma_lock_op_t op)
#endif
}
-static __inline bus_dmamap_t
-_busdma_alloc_dmamap(void)
-{
- bus_dmamap_t map;
-
- mtx_lock(&busdma_mtx);
- map = TAILQ_FIRST(&dmamap_freelist);
- if (map)
- TAILQ_REMOVE(&dmamap_freelist, map, freelist);
- mtx_unlock(&busdma_mtx);
- if (!map) {
- map = malloc(sizeof(*map), M_DEVBUF, M_NOWAIT | M_ZERO);
- if (map)
- map->flags = DMAMAP_ALLOCATED;
- } else
- map->flags = 0;
- STAILQ_INIT(&map->bpages);
- return (map);
-}
-
-static __inline void
-_busdma_free_dmamap(bus_dmamap_t map)
-{
- if (map->flags & DMAMAP_ALLOCATED)
- free(map, M_DEVBUF);
- else {
- mtx_lock(&busdma_mtx);
- TAILQ_INSERT_HEAD(&dmamap_freelist, map, freelist);
- mtx_unlock(&busdma_mtx);
- }
-}
-
/*
* Allocate a device specific dma_tag.
*/
@@ -354,7 +417,7 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
}
newtag->parent = parent;
- newtag->alignment = alignment;
+ newtag->alignment = alignment ? alignment : 1;
newtag->boundary = boundary;
newtag->lowaddr = trunc_page((vm_offset_t)lowaddr) + (PAGE_SIZE - 1);
newtag->highaddr = trunc_page((vm_offset_t)highaddr) + (PAGE_SIZE - 1);
@@ -375,9 +438,19 @@ bus_dma_tag_create(bus_dma_tag_t parent, bus_size_t alignment,
newtag->lockfunc = dflt_lock;
newtag->lockfuncarg = NULL;
}
- newtag->segments = NULL;
-
- /*
+ /*
+ * If all the segments we need fit into the local tagsegs array, set the
+ * pointer now. Otherwise NULL the pointer and an array of segments
+ * will be allocated later, on first use. We don't pre-allocate now
+ * because some tags exist just to pass contraints to children in the
+ * device hierarchy, and they tend to use BUS_SPACE_UNRESTRICTED and we
+ * sure don't want to try to allocate an array for that.
+ */
+ if (newtag->nsegments <= nitems(newtag->tagsegs))
+ newtag->segments = newtag->tagsegs;
+ else
+ newtag->segments = NULL;
+ /*
* Take into account any restrictions imposed by our parent tag
*/
if (parent != NULL) {
@@ -450,6 +523,7 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat)
#endif
if (dmat != NULL) {
+
if (dmat->map_count != 0)
return (EBUSY);
@@ -459,7 +533,8 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat)
parent = dmat->parent;
atomic_subtract_int(&dmat->ref_count, 1);
if (dmat->ref_count == 0) {
- if (dmat->segments != NULL)
+ if (dmat->segments != NULL &&
+ dmat->segments != dmat->tagsegs)
free(dmat->segments, M_DEVBUF);
free(dmat, M_DEVBUF);
/*
@@ -485,29 +560,19 @@ bus_dma_tag_destroy(bus_dma_tag_t dmat)
int
bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
{
- bus_dmamap_t newmap;
+ bus_dmamap_t map;
int error = 0;
- if (dmat->segments == NULL) {
- dmat->segments = (bus_dma_segment_t *)malloc(
- sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF,
- M_NOWAIT);
- if (dmat->segments == NULL) {
- CTR3(KTR_BUSDMA, "%s: tag %p error %d",
- __func__, dmat, ENOMEM);
- return (ENOMEM);
- }
- }
+ map = uma_zalloc_arg(dmamap_zone, dmat, M_WAITOK);
+ *mapp = map;
- newmap = _busdma_alloc_dmamap();
- if (newmap == NULL) {
- CTR3(KTR_BUSDMA, "%s: tag %p error %d", __func__, dmat, ENOMEM);
- return (ENOMEM);
- }
- *mapp = newmap;
- newmap->dmat = dmat;
- newmap->allocbuffer = NULL;
- dmat->map_count++;
+ /*
+ * If the tag's segments haven't been allocated yet we need to do it
+ * now, because we can't sleep for resources at map load time.
+ */
+ if (dmat->segments == NULL)
+ dmat->segments = malloc(dmat->nsegments *
+ sizeof(*dmat->segments), M_DEVBUF, M_WAITOK);
/*
* Bouncing might be required if the driver asks for an active
@@ -522,7 +587,7 @@ bus_dmamap_create(bus_dma_tag_t dmat, int flags, bus_dmamap_t *mapp)
if (dmat->bounce_zone == NULL) {
if ((error = alloc_bounce_zone(dmat)) != 0) {
- _busdma_free_dmamap(newmap);
+ uma_zfree(dmamap_zone, map);
*mapp = NULL;
return (error);
}
@@ -575,118 +640,129 @@ bus_dmamap_destroy(bus_dma_tag_t dmat, bus_dmamap_t map)
__func__, dmat, EBUSY);
return (EBUSY);
}
- _busdma_free_dmamap(map);
+ uma_zfree(dmamap_zone, map);
if (dmat->bounce_zone)
dmat->bounce_zone->map_count--;
- dmat->map_count--;
CTR2(KTR_BUSDMA, "%s: tag %p error 0", __func__, dmat);
return (0);
}
/*
- * Allocate a piece of memory that can be efficiently mapped into
- * bus device space based on the constraints lited in the dma tag.
- * A dmamap to for use with dmamap_load is also allocated.
+ * Allocate a piece of memory that can be efficiently mapped into bus device
+ * space based on the constraints listed in the dma tag. Returns a pointer to
+ * the allocated memory, and a pointer to an associated bus_dmamap.
*/
int
-bus_dmamem_alloc(bus_dma_tag_t dmat, void** vaddr, int flags,
+bus_dmamem_alloc(bus_dma_tag_t dmat, void **vaddrp, int flags,
bus_dmamap_t *mapp)
{
- bus_dmamap_t newmap = NULL;
-
+ void * vaddr;
+ struct busdma_bufzone *bufzone;
+ busdma_bufalloc_t ba;
+ bus_dmamap_t map;
int mflags;
+ vm_memattr_t memattr;
if (flags & BUS_DMA_NOWAIT)
mflags = M_NOWAIT;
else
mflags = M_WAITOK;
- if (dmat->segments == NULL) {
- dmat->segments = (bus_dma_segment_t *)malloc(
- sizeof(bus_dma_segment_t) * dmat->nsegments, M_DEVBUF,
- mflags);
- if (dmat->segments == NULL) {
- CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
- __func__, dmat, dmat->flags, ENOMEM);
- return (ENOMEM);
- }
+
+ /*
+ * If the tag's segments haven't been allocated yet we need to do it
+ * now, because we can't sleep for resources at map load time.
+ */
+ if (dmat->segments == NULL)
+ dmat->segments = malloc(dmat->nsegments *
+ sizeof(*dmat->segments), M_DEVBUF, mflags);
+
+ map = uma_zalloc_arg(dmamap_zone, dmat, mflags);
+ if (map == NULL)
+ return (ENOMEM);
+
+ if (flags & BUS_DMA_COHERENT) {
+ memattr = VM_MEMATTR_UNCACHEABLE;
+ ba = coherent_allocator;
+ map->flags |= DMAMAP_COHERENT;
+ } else {
+ memattr = VM_MEMATTR_DEFAULT;
+ ba = standard_allocator;
}
+ /* All buffers we allocate are cache-aligned. */
+ map->flags |= DMAMAP_CACHE_ALIGNED;
+
if (flags & BUS_DMA_ZERO)
mflags |= M_ZERO;
- newmap = _busdma_alloc_dmamap();
- if (newmap == NULL) {
- CTR4(KTR_BUSDMA, "%s: tag %p tag flags 0x%x error %d",
- __func__, dmat, dmat->flags, ENOMEM);
- return (ENOMEM);
+ /*
+ * Try to find a bufzone in the allocator that holds a cache of buffers
+ * of the right size for this request. If the buffer is too big to be
+ * held in the allocator cache, this returns NULL.
+ */
+ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
+
+ /*
+ * Allocate the buffer from the uma(9) allocator if...
+ * - It's small enough to be in the allocator (bufzone not NULL).
+ * - The alignment constraint isn't larger than the allocation size
+ * (the allocator aligns buffers to their size boundaries).
+ * - There's no need to handle lowaddr/highaddr exclusion zones.
+ * else allocate non-contiguous pages if...
+ * - The page count that could get allocated doesn't exceed nsegments.
+ * - The alignment constraint isn't larger than a page boundary.
+ * - There are no boundary-crossing constraints.
+ * else allocate a block of contiguous pages because one or more of the
+ * constraints is something that only the contig allocator can fulfill.
+ */
+ if (bufzone != NULL && dmat->alignment <= bufzone->size &&
+ !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) {
+ vaddr = uma_zalloc(bufzone->umazone, mflags);
+ } else if (dmat->nsegments >= btoc(dmat->maxsize) &&
+ dmat->alignment <= PAGE_SIZE && dmat->boundary == 0) {
+ vaddr = (void *)kmem_alloc_attr(kernel_map, dmat->maxsize,
+ mflags, 0, dmat->lowaddr, memattr);
+ } else {
+ vaddr = (void *)kmem_alloc_contig(kernel_map, dmat->maxsize,
+ mflags, 0, dmat->lowaddr, dmat->alignment, dmat->boundary,
+ memattr);
}
- dmat->map_count++;
- *mapp = newmap;
- newmap->dmat = dmat;
-
- if (dmat->maxsize <= PAGE_SIZE &&
- (dmat->alignment < dmat->maxsize) &&
- !_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr)) {
- *vaddr = malloc(dmat->maxsize, M_DEVBUF, mflags);
- } else {
- /*
- * XXX Use Contigmalloc until it is merged into this facility
- * and handles multi-seg allocations. Nobody is doing
- * multi-seg allocations yet though.
- */
- *vaddr = contigmalloc(dmat->maxsize, M_DEVBUF, mflags,
- 0ul, dmat->lowaddr, dmat->alignment? dmat->alignment : 1ul,
- dmat->boundary);
- }
- if (*vaddr == NULL) {
- if (newmap != NULL) {
- _busdma_free_dmamap(newmap);
- dmat->map_count--;
- }
- *mapp = NULL;
- return (ENOMEM);
+
+ if (vaddr == NULL) {
+ uma_zfree(dmamap_zone, map);
+ map = NULL;
}
- if (flags & BUS_DMA_COHERENT) {
- void *tmpaddr = arm_remap_nocache(
- (void *)((vm_offset_t)*vaddr &~ PAGE_MASK),
- dmat->maxsize + ((vm_offset_t)*vaddr & PAGE_MASK));
-
- if (tmpaddr) {
- tmpaddr = (void *)((vm_offset_t)(tmpaddr) +
- ((vm_offset_t)*vaddr & PAGE_MASK));
- newmap->origbuffer = *vaddr;
- newmap->allocbuffer = tmpaddr;
- *vaddr = tmpaddr;
- } else
- newmap->origbuffer = newmap->allocbuffer = NULL;
- } else
- newmap->origbuffer = newmap->allocbuffer = NULL;
- return (0);
+
+ *vaddrp = vaddr;
+ *mapp = map;
+
+ return (vaddr == NULL ? ENOMEM : 0);
}
/*
- * Free a piece of memory and it's allocated dmamap, that was allocated
- * via bus_dmamem_alloc. Make the same choice for free/contigfree.
+ * Free a piece of memory that was allocated via bus_dmamem_alloc, along with
+ * its associated map.
*/
void
bus_dmamem_free(bus_dma_tag_t dmat, void *vaddr, bus_dmamap_t map)
{
- if (map->allocbuffer) {
- KASSERT(map->allocbuffer == vaddr,
- ("Trying to freeing the wrong DMA buffer"));
- vaddr = map->origbuffer;
- arm_unmap_nocache(map->allocbuffer,
- dmat->maxsize + ((vm_offset_t)vaddr & PAGE_MASK));
- }
- if (dmat->maxsize <= PAGE_SIZE &&
- dmat->alignment < dmat->maxsize &&
+ struct busdma_bufzone *bufzone;
+ busdma_bufalloc_t ba;
+
+ if (map->flags & DMAMAP_COHERENT)
+ ba = coherent_allocator;
+ else
+ ba = standard_allocator;
+ uma_zfree(dmamap_zone, map);
+
+ /* Be careful not to access map from here on. */
+
+ bufzone = busdma_bufalloc_findzone(ba, dmat->maxsize);
+
+ if (bufzone != NULL && dmat->alignment <= bufzone->size &&
!_bus_dma_can_bounce(dmat->lowaddr, dmat->highaddr))
- free(vaddr, M_DEVBUF);
- else {
- contigfree(vaddr, dmat->maxsize, M_DEVBUF);
- }
- dmat->map_count--;
- _busdma_free_dmamap(map);
- CTR3(KTR_BUSDMA, "%s: tag %p flags 0x%x", __func__, dmat, dmat->flags);
+ uma_zfree(bufzone->umazone, vaddr);
+ else
+ kmem_free(kernel_map, (vm_offset_t)vaddr, dmat->maxsize);
}
static int
@@ -795,10 +871,6 @@ bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dma_segment_t *segs,
else
curaddr = (*pde & L1_S_FRAME) |
(vaddr & L1_S_OFFSET);
- if (*pde & L1_S_CACHE_MASK) {
- map->flags &=
- ~DMAMAP_COHERENT;
- }
} else {
pte = *ptep;
KASSERT((pte & L2_TYPE_MASK) != L2_TYPE_INV,
@@ -807,18 +879,9 @@ bus_dmamap_load_buffer(bus_dma_tag_t dmat, bus_dma_segment_t *segs,
== L2_TYPE_L)) {
curaddr = (pte & L2_L_FRAME) |
(vaddr & L2_L_OFFSET);
- if (pte & L2_L_CACHE_MASK) {
- map->flags &=
- ~DMAMAP_COHERENT;
-
- }
} else {
curaddr = (pte & L2_S_FRAME) |
(vaddr & L2_S_OFFSET);
- if (pte & L2_S_CACHE_MASK) {
- map->flags &=
- ~DMAMAP_COHERENT;
- }
}
}
} else {
@@ -914,7 +977,7 @@ bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
map->callback = callback;
map->callback_arg = callback_arg;
map->flags &= ~DMAMAP_TYPE_MASK;
- map->flags |= DMAMAP_LINEAR|DMAMAP_COHERENT;
+ map->flags |= DMAMAP_LINEAR;
map->buffer = buf;
map->len = buflen;
error = bus_dmamap_load_buffer(dmat,
@@ -935,6 +998,15 @@ bus_dmamap_load(bus_dma_tag_t dmat, bus_dmamap_t map, void *buf,
/*
* Like bus_dmamap_load(), but for mbufs.
+ *
+ * Note that the manpage states that BUS_DMA_NOWAIT is implied for mbufs.
+ *
+ * We know that the way the system allocates and uses mbufs implies that we can
+ * treat them as DMAMAP_CACHE_ALIGNED in terms of handling partial cache line
+ * flushes. Even though the flush may reference the data area within the mbuf
+ * that isn't aligned to a cache line, we know the overall mbuf itself is
+ * properly aligned, and we know that the CPU will not touch the header fields
+ * before the data area while the DMA is in progress.
*/
int
bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
@@ -945,8 +1017,9 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
M_ASSERTPKTHDR(m0);
+ flags |= BUS_DMA_NOWAIT;
map->flags &= ~DMAMAP_TYPE_MASK;
- map->flags |= DMAMAP_MBUF | DMAMAP_COHERENT;
+ map->flags |= DMAMAP_MBUF | DMAMAP_CACHE_ALIGNED;
map->buffer = m0;
map->len = 0;
if (m0->m_pkthdr.len <= dmat->maxsize) {
@@ -969,7 +1042,7 @@ bus_dmamap_load_mbuf(bus_dma_tag_t dmat, bus_dmamap_t map, struct mbuf *m0,
/*
* force "no valid mappings" on error in callback.
*/
- (*callback)(callback_arg, dmat->segments, 0, 0, error);
+ (*callback)(callback_arg, NULL, 0, 0, error);
} else {
(*callback)(callback_arg, dmat->segments, nsegs + 1,
m0->m_pkthdr.len, error);
@@ -991,7 +1064,7 @@ bus_dmamap_load_mbuf_sg(bus_dma_tag_t dmat, bus_dmamap_t map,
flags |= BUS_DMA_NOWAIT;
*nsegs = -1;
map->flags &= ~DMAMAP_TYPE_MASK;
- map->flags |= DMAMAP_MBUF | DMAMAP_COHERENT;
+ map->flags |= DMAMAP_MBUF | DMAMAP_CACHE_ALIGNED;
map->buffer = m0;
map->len = 0;
if (m0->m_pkthdr.len <= dmat->maxsize) {
@@ -1035,7 +1108,7 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio,
resid = uio->uio_resid;
iov = uio->uio_iov;
map->flags &= ~DMAMAP_TYPE_MASK;
- map->flags |= DMAMAP_UIO|DMAMAP_COHERENT;
+ map->flags |= DMAMAP_UIO;
map->buffer = uio;
map->len = 0;
@@ -1058,7 +1131,7 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio,
caddr_t addr = (caddr_t) iov[i].iov_base;
if (minlen > 0) {
- error = bus_dmamap_load_buffer(dmat, dmat->segments,
+ error = bus_dmamap_load_buffer(dmat, dmat->segments,
map, addr, minlen, pmap, flags, &lastaddr, &nsegs);
map->len += minlen;
@@ -1070,7 +1143,7 @@ bus_dmamap_load_uio(bus_dma_tag_t dmat, bus_dmamap_t map, struct uio *uio,
/*
* force "no valid mappings" on error in callback.
*/
- (*callback)(callback_arg, dmat->segments, 0, 0, error);
+ (*callback)(callback_arg, NULL, 0, 0, error);
} else {
(*callback)(callback_arg, dmat->segments, nsegs+1,
uio->uio_resid, error);
@@ -1098,7 +1171,7 @@ _bus_dmamap_unload(bus_dma_tag_t dmat, bus_dmamap_t map)
}
static void
-bus_dmamap_sync_buf(void *buf, int len, bus_dmasync_op_t op)
+bus_dmamap_sync_buf(void *buf, int len, bus_dmasync_op_t op, int bufaligned)
{
char _tmp_cl[arm_dcache_align], _tmp_clend[arm_dcache_align];
register_t s;
@@ -1108,7 +1181,25 @@ bus_dmamap_sync_buf(void *buf, int len, bus_dmasync_op_t op)
cpu_dcache_wb_range((vm_offset_t)buf, len);
cpu_l2cache_wb_range((vm_offset_t)buf, len);
}
+
+ /*
+ * If the caller promises the buffer is properly aligned to a cache line
+ * (even if the call parms make it look like it isn't) we can avoid
+ * attempting to preserve the non-DMA part of the cache line in the
+ * POSTREAD case, but we MUST still do a writeback in the PREREAD case.
+ *
+ * This covers the case of mbufs, where we know how they're aligned and
+ * know the CPU doesn't touch the header in front of the DMA data area
+ * during the IO, but it may have touched it right before invoking the
+ * sync, so a PREREAD writeback is required.
+ *
+ * It also handles buffers we created in bus_dmamem_alloc(), which are
+ * always aligned and padded to cache line size even if the IO length
+ * isn't a multiple of cache line size. In this case the PREREAD
+ * writeback probably isn't required, but it's harmless.
+ */
partial = (((vm_offset_t)buf) | len) & arm_dcache_align_mask;
+
if (op & BUS_DMASYNC_PREREAD) {
if (!(op & BUS_DMASYNC_PREWRITE) && !partial) {
cpu_dcache_inv_range((vm_offset_t)buf, len);
@@ -1119,7 +1210,7 @@ bus_dmamap_sync_buf(void *buf, int len, bus_dmasync_op_t op)
}
}
if (op & BUS_DMASYNC_POSTREAD) {
- if (partial) {
+ if (partial && !bufaligned) {
s = intr_disable();
if ((vm_offset_t)buf & arm_dcache_align_mask)
memcpy(_tmp_cl, (void *)((vm_offset_t)buf &
@@ -1133,7 +1224,7 @@ bus_dmamap_sync_buf(void *buf, int len, bus_dmasync_op_t op)
}
cpu_dcache_inv_range((vm_offset_t)buf, len);
cpu_l2cache_inv_range((vm_offset_t)buf, len);
- if (partial) {
+ if (partial && !bufaligned) {
if ((vm_offset_t)buf & arm_dcache_align_mask)
memcpy((void *)((vm_offset_t)buf &
~arm_dcache_align_mask), _tmp_cl,
@@ -1204,25 +1295,29 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
struct uio *uio;
int resid;
struct iovec *iov;
-
+ int bufaligned;
+
if (op == BUS_DMASYNC_POSTWRITE)
return;
+ if (map->flags & DMAMAP_COHERENT)
+ goto drain;
if (STAILQ_FIRST(&map->bpages))
_bus_dmamap_sync_bp(dmat, map, op);
- if (map->flags & DMAMAP_COHERENT)
- return;
CTR3(KTR_BUSDMA, "%s: op %x flags %x", __func__, op, map->flags);
+ bufaligned = (map->flags & DMAMAP_CACHE_ALIGNED);
switch(map->flags & DMAMAP_TYPE_MASK) {
case DMAMAP_LINEAR:
if (!(_bus_dma_buf_is_in_bp(map, map->buffer, map->len)))
- bus_dmamap_sync_buf(map->buffer, map->len, op);
+ bus_dmamap_sync_buf(map->buffer, map->len, op,
+ bufaligned);
break;
case DMAMAP_MBUF:
m = map->buffer;
while (m) {
if (m->m_len > 0 &&
!(_bus_dma_buf_is_in_bp(map, m->m_data, m->m_len)))
- bus_dmamap_sync_buf(m->m_data, m->m_len, op);
+ bus_dmamap_sync_buf(m->m_data, m->m_len, op,
+ bufaligned);
m = m->m_next;
}
break;
@@ -1237,7 +1332,7 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
if (!_bus_dma_buf_is_in_bp(map, iov[i].iov_base,
minlen))
bus_dmamap_sync_buf(iov[i].iov_base,
- minlen, op);
+ minlen, op, bufaligned);
resid -= minlen;
}
}
@@ -1245,6 +1340,9 @@ _bus_dmamap_sync(bus_dma_tag_t dmat, bus_dmamap_t map, bus_dmasync_op_t op)
default:
break;
}
+
+drain:
+
cpu_drain_writebuf();
}
OpenPOWER on IntegriCloud