diff options
39 files changed, 1018 insertions, 1911 deletions
diff --git a/Documentation/dmatest.txt b/Documentation/dmatest.txt index a2b5663..dd77a81 100644 --- a/Documentation/dmatest.txt +++ b/Documentation/dmatest.txt @@ -15,39 +15,48 @@ be built as module or inside kernel. Let's consider those cases. Part 2 - When dmatest is built as a module... -After mounting debugfs and loading the module, the /sys/kernel/debug/dmatest -folder with nodes will be created. There are two important files located. First -is the 'run' node that controls run and stop phases of the test, and the second -one, 'results', is used to get the test case results. - -Note that in this case test will not run on load automatically. - Example of usage: + % modprobe dmatest channel=dma0chan0 timeout=2000 iterations=1 run=1 + +...or: + % modprobe dmatest % echo dma0chan0 > /sys/module/dmatest/parameters/channel % echo 2000 > /sys/module/dmatest/parameters/timeout % echo 1 > /sys/module/dmatest/parameters/iterations - % echo 1 > /sys/kernel/debug/dmatest/run + % echo 1 > /sys/module/dmatest/parameters/run + +...or on the kernel command line: + + dmatest.channel=dma0chan0 dmatest.timeout=2000 dmatest.iterations=1 dmatest.run=1 Hint: available channel list could be extracted by running the following command: % ls -1 /sys/class/dma/ -After a while you will start to get messages about current status or error like -in the original code. +Once started a message like "dmatest: Started 1 threads using dma0chan0" is +emitted. After that only test failure messages are reported until the test +stops. Note that running a new test will not stop any in progress test. -The following command should return actual state of the test. - % cat /sys/kernel/debug/dmatest/run - -To wait for test done the user may perform a busy loop that checks the state. - - % while [ $(cat /sys/kernel/debug/dmatest/run) = "Y" ] - > do - > echo -n "." - > sleep 1 - > done - > echo +The following command returns the state of the test. + % cat /sys/module/dmatest/parameters/run + +To wait for test completion userpace can poll 'run' until it is false, or use +the wait parameter. Specifying 'wait=1' when loading the module causes module +initialization to pause until a test run has completed, while reading +/sys/module/dmatest/parameters/wait waits for any running test to complete +before returning. For example, the following scripts wait for 42 tests +to complete before exiting. Note that if 'iterations' is set to 'infinite' then +waiting is disabled. + +Example: + % modprobe dmatest run=1 iterations=42 wait=1 + % modprobe -r dmatest +...or: + % modprobe dmatest run=1 iterations=42 + % cat /sys/module/dmatest/parameters/wait + % modprobe -r dmatest Part 3 - When built-in in the kernel... @@ -62,21 +71,22 @@ case. You always could check them at run-time by running Part 4 - Gathering the test results -The module provides a storage for the test results in the memory. The gathered -data could be used after test is done. +Test results are printed to the kernel log buffer with the format: -The special file 'results' in the debugfs represents gathered data of the in -progress test. The messages collected are printed to the kernel log as well. +"dmatest: result <channel>: <test id>: '<error msg>' with src_off=<val> dst_off=<val> len=<val> (<err code>)" Example of output: - % cat /sys/kernel/debug/dmatest/results - dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0) + % dmesg | tail -n 1 + dmatest: result dma0chan0-copy0: #1: No errors with src_off=0x7bf dst_off=0x8ad len=0x3fea (0) The message format is unified across the different types of errors. A number in the parens represents additional information, e.g. error code, error counter, -or status. +or status. A test thread also emits a summary line at completion listing the +number of tests executed, number that failed, and a result code. -Comparison between buffers is stored to the dedicated structure. +Example: + % dmesg | tail -n 1 + dmatest: dma0chan0-copy0: summary 1 test, 0 failures 1000 iops 100000 KB/s (0) -Note that the verify result is now accessible only via file 'results' in the -debugfs. +The details of a data miscompare error are also emitted, but do not follow the +above format. diff --git a/arch/arm/include/asm/hardware/iop3xx-adma.h b/arch/arm/include/asm/hardware/iop3xx-adma.h index 9b28f12..240b29e 100644 --- a/arch/arm/include/asm/hardware/iop3xx-adma.h +++ b/arch/arm/include/asm/hardware/iop3xx-adma.h @@ -393,36 +393,6 @@ static inline int iop_chan_zero_sum_slot_count(size_t len, int src_cnt, return slot_cnt; } -static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) -{ - return 0; -} - -static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - union iop3xx_desc hw_desc = { .ptr = desc->hw_desc, }; - - switch (chan->device->id) { - case DMA0_ID: - case DMA1_ID: - return hw_desc.dma->dest_addr; - case AAU_ID: - return hw_desc.aau->dest_addr; - default: - BUG(); - } - return 0; -} - - -static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - BUG(); - return 0; -} - static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { diff --git a/arch/arm/include/asm/hardware/iop_adma.h b/arch/arm/include/asm/hardware/iop_adma.h index 122f86d..250760e 100644 --- a/arch/arm/include/asm/hardware/iop_adma.h +++ b/arch/arm/include/asm/hardware/iop_adma.h @@ -82,8 +82,6 @@ struct iop_adma_chan { * @slot_cnt: total slots used in an transaction (group of operations) * @slots_per_op: number of slots per operation * @idx: pool index - * @unmap_src_cnt: number of xor sources - * @unmap_len: transaction bytecount * @tx_list: list of descriptors that are associated with one operation * @async_tx: support for the async_tx api * @group_list: list of slots that make up a multi-descriptor transaction @@ -99,8 +97,6 @@ struct iop_adma_desc_slot { u16 slot_cnt; u16 slots_per_op; u16 idx; - u16 unmap_src_cnt; - size_t unmap_len; struct list_head tx_list; struct dma_async_tx_descriptor async_tx; union { diff --git a/arch/arm/mach-iop13xx/include/mach/adma.h b/arch/arm/mach-iop13xx/include/mach/adma.h index 6d3782d..a86fd0e 100644 --- a/arch/arm/mach-iop13xx/include/mach/adma.h +++ b/arch/arm/mach-iop13xx/include/mach/adma.h @@ -218,20 +218,6 @@ iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) #define iop_chan_pq_slot_count iop_chan_xor_slot_count #define iop_chan_pq_zero_sum_slot_count iop_chan_xor_slot_count -static inline u32 iop_desc_get_dest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; - return hw_desc->dest_addr; -} - -static inline u32 iop_desc_get_qdest_addr(struct iop_adma_desc_slot *desc, - struct iop_adma_chan *chan) -{ - struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; - return hw_desc->q_dest_addr; -} - static inline u32 iop_desc_get_byte_count(struct iop_adma_desc_slot *desc, struct iop_adma_chan *chan) { @@ -350,18 +336,6 @@ iop_desc_init_pq(struct iop_adma_desc_slot *desc, int src_cnt, hw_desc->desc_ctrl = u_desc_ctrl.value; } -static inline int iop_desc_is_pq(struct iop_adma_desc_slot *desc) -{ - struct iop13xx_adma_desc_hw *hw_desc = desc->hw_desc; - union { - u32 value; - struct iop13xx_adma_desc_ctrl field; - } u_desc_ctrl; - - u_desc_ctrl.value = hw_desc->desc_ctrl; - return u_desc_ctrl.field.pq_xfer_en; -} - static inline void iop_desc_init_pq_zero_sum(struct iop_adma_desc_slot *desc, int src_cnt, unsigned long flags) diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index 9e62fef..f8c0b8d 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -50,33 +50,36 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, &dest, 1, &src, 1, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; + struct dmaengine_unmap_data *unmap = NULL; - if (device && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { - dma_addr_t dma_dest, dma_src; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOIO); + + if (unmap && is_dma_copy_aligned(device, src_offset, dest_offset, len)) { unsigned long dma_prep_flags = 0; if (submit->cb_fn) dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; - dma_dest = dma_map_page(device->dev, dest, dest_offset, len, - DMA_FROM_DEVICE); - - dma_src = dma_map_page(device->dev, src, src_offset, len, - DMA_TO_DEVICE); - - tx = device->device_prep_dma_memcpy(chan, dma_dest, dma_src, - len, dma_prep_flags); - if (!tx) { - dma_unmap_page(device->dev, dma_dest, len, - DMA_FROM_DEVICE); - dma_unmap_page(device->dev, dma_src, len, - DMA_TO_DEVICE); - } + + unmap->to_cnt = 1; + unmap->addr[0] = dma_map_page(device->dev, src, src_offset, len, + DMA_TO_DEVICE); + unmap->from_cnt = 1; + unmap->addr[1] = dma_map_page(device->dev, dest, dest_offset, len, + DMA_FROM_DEVICE); + unmap->len = len; + + tx = device->device_prep_dma_memcpy(chan, unmap->addr[1], + unmap->addr[0], len, + dma_prep_flags); } if (tx) { pr_debug("%s: (async) len: %zu\n", __func__, len); + + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); } else { void *dest_buf, *src_buf; @@ -96,6 +99,8 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, async_tx_sync_epilog(submit); } + dmaengine_unmap_put(unmap); + return tx; } EXPORT_SYMBOL_GPL(async_memcpy); diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c index 91d5d38..d05327c 100644 --- a/crypto/async_tx/async_pq.c +++ b/crypto/async_tx/async_pq.c @@ -46,49 +46,24 @@ static struct page *pq_scribble_page; * do_async_gen_syndrome - asynchronously calculate P and/or Q */ static __async_inline struct dma_async_tx_descriptor * -do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, - const unsigned char *scfs, unsigned int offset, int disks, - size_t len, dma_addr_t *dma_src, +do_async_gen_syndrome(struct dma_chan *chan, + const unsigned char *scfs, int disks, + struct dmaengine_unmap_data *unmap, + enum dma_ctrl_flags dma_flags, struct async_submit_ctl *submit) { struct dma_async_tx_descriptor *tx = NULL; struct dma_device *dma = chan->device; - enum dma_ctrl_flags dma_flags = 0; enum async_tx_flags flags_orig = submit->flags; dma_async_tx_callback cb_fn_orig = submit->cb_fn; dma_async_tx_callback cb_param_orig = submit->cb_param; int src_cnt = disks - 2; - unsigned char coefs[src_cnt]; unsigned short pq_src_cnt; dma_addr_t dma_dest[2]; int src_off = 0; - int idx; - int i; - /* DMAs use destinations as sources, so use BIDIRECTIONAL mapping */ - if (P(blocks, disks)) - dma_dest[0] = dma_map_page(dma->dev, P(blocks, disks), offset, - len, DMA_BIDIRECTIONAL); - else - dma_flags |= DMA_PREP_PQ_DISABLE_P; - if (Q(blocks, disks)) - dma_dest[1] = dma_map_page(dma->dev, Q(blocks, disks), offset, - len, DMA_BIDIRECTIONAL); - else - dma_flags |= DMA_PREP_PQ_DISABLE_Q; - - /* convert source addresses being careful to collapse 'empty' - * sources and update the coefficients accordingly - */ - for (i = 0, idx = 0; i < src_cnt; i++) { - if (blocks[i] == NULL) - continue; - dma_src[idx] = dma_map_page(dma->dev, blocks[i], offset, len, - DMA_TO_DEVICE); - coefs[idx] = scfs[i]; - idx++; - } - src_cnt = idx; + if (submit->flags & ASYNC_TX_FENCE) + dma_flags |= DMA_PREP_FENCE; while (src_cnt > 0) { submit->flags = flags_orig; @@ -100,28 +75,25 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, if (src_cnt > pq_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; submit->flags |= ASYNC_TX_FENCE; - dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; } else { - dma_flags &= ~DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = cb_fn_orig; submit->cb_param = cb_param_orig; if (cb_fn_orig) dma_flags |= DMA_PREP_INTERRUPT; } - if (submit->flags & ASYNC_TX_FENCE) - dma_flags |= DMA_PREP_FENCE; - /* Since we have clobbered the src_list we are committed - * to doing this asynchronously. Drivers force forward - * progress in case they can not provide a descriptor + /* Drivers force forward progress in case they can not provide + * a descriptor */ for (;;) { + dma_dest[0] = unmap->addr[disks - 2]; + dma_dest[1] = unmap->addr[disks - 1]; tx = dma->device_prep_dma_pq(chan, dma_dest, - &dma_src[src_off], + &unmap->addr[src_off], pq_src_cnt, - &coefs[src_off], len, + &scfs[src_off], unmap->len, dma_flags); if (likely(tx)) break; @@ -129,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks, dma_async_issue_pending(chan); } + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); submit->depend_tx = tx; @@ -188,10 +161,6 @@ do_sync_gen_syndrome(struct page **blocks, unsigned int offset, int disks, * set to NULL those buffers will be replaced with the raid6_zero_page * in the synchronous path and omitted in the hardware-asynchronous * path. - * - * 'blocks' note: if submit->scribble is NULL then the contents of - * 'blocks' may be overwritten to perform address conversions - * (dma_map_page() or page_address()). */ struct dma_async_tx_descriptor * async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, @@ -202,26 +171,69 @@ async_gen_syndrome(struct page **blocks, unsigned int offset, int disks, &P(blocks, disks), 2, blocks, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; - dma_addr_t *dma_src = NULL; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(disks > 255 || !(P(blocks, disks) || Q(blocks, disks))); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) blocks; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); - if (dma_src && device && + if (unmap && (src_cnt <= dma_maxpq(device, 0) || dma_maxpq(device, DMA_PREP_CONTINUE) > 0) && is_dma_pq_aligned(device, offset, 0, len)) { + struct dma_async_tx_descriptor *tx; + enum dma_ctrl_flags dma_flags = 0; + unsigned char coefs[src_cnt]; + int i, j; + /* run the p+q asynchronously */ pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); - return do_async_gen_syndrome(chan, blocks, raid6_gfexp, offset, - disks, len, dma_src, submit); + + /* convert source addresses being careful to collapse 'empty' + * sources and update the coefficients accordingly + */ + unmap->len = len; + for (i = 0, j = 0; i < src_cnt; i++) { + if (blocks[i] == NULL) + continue; + unmap->addr[j] = dma_map_page(device->dev, blocks[i], offset, + len, DMA_TO_DEVICE); + coefs[j] = raid6_gfexp[i]; + unmap->to_cnt++; + j++; + } + + /* + * DMAs use destinations as sources, + * so use BIDIRECTIONAL mapping + */ + unmap->bidi_cnt++; + if (P(blocks, disks)) + unmap->addr[j++] = dma_map_page(device->dev, P(blocks, disks), + offset, len, DMA_BIDIRECTIONAL); + else { + unmap->addr[j++] = 0; + dma_flags |= DMA_PREP_PQ_DISABLE_P; + } + + unmap->bidi_cnt++; + if (Q(blocks, disks)) + unmap->addr[j++] = dma_map_page(device->dev, Q(blocks, disks), + offset, len, DMA_BIDIRECTIONAL); + else { + unmap->addr[j++] = 0; + dma_flags |= DMA_PREP_PQ_DISABLE_Q; + } + + tx = do_async_gen_syndrome(chan, coefs, j, unmap, dma_flags, submit); + dmaengine_unmap_put(unmap); + return tx; } + dmaengine_unmap_put(unmap); + /* run the pq synchronously */ pr_debug("%s: (sync) disks: %d len: %zu\n", __func__, disks, len); @@ -277,50 +289,60 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, struct dma_async_tx_descriptor *tx; unsigned char coefs[disks-2]; enum dma_ctrl_flags dma_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0; - dma_addr_t *dma_src = NULL; - int src_cnt = 0; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(disks < 4); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) blocks; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, disks, GFP_NOIO); - if (dma_src && device && disks <= dma_maxpq(device, 0) && + if (unmap && disks <= dma_maxpq(device, 0) && is_dma_pq_aligned(device, offset, 0, len)) { struct device *dev = device->dev; - dma_addr_t *pq = &dma_src[disks-2]; - int i; + dma_addr_t pq[2]; + int i, j = 0, src_cnt = 0; pr_debug("%s: (async) disks: %d len: %zu\n", __func__, disks, len); - if (!P(blocks, disks)) + + unmap->len = len; + for (i = 0; i < disks-2; i++) + if (likely(blocks[i])) { + unmap->addr[j] = dma_map_page(dev, blocks[i], + offset, len, + DMA_TO_DEVICE); + coefs[j] = raid6_gfexp[i]; + unmap->to_cnt++; + src_cnt++; + j++; + } + + if (!P(blocks, disks)) { + pq[0] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_P; - else + } else { pq[0] = dma_map_page(dev, P(blocks, disks), offset, len, DMA_TO_DEVICE); - if (!Q(blocks, disks)) + unmap->addr[j++] = pq[0]; + unmap->to_cnt++; + } + if (!Q(blocks, disks)) { + pq[1] = 0; dma_flags |= DMA_PREP_PQ_DISABLE_Q; - else + } else { pq[1] = dma_map_page(dev, Q(blocks, disks), offset, len, DMA_TO_DEVICE); + unmap->addr[j++] = pq[1]; + unmap->to_cnt++; + } if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - for (i = 0; i < disks-2; i++) - if (likely(blocks[i])) { - dma_src[src_cnt] = dma_map_page(dev, blocks[i], - offset, len, - DMA_TO_DEVICE); - coefs[src_cnt] = raid6_gfexp[i]; - src_cnt++; - } - for (;;) { - tx = device->device_prep_dma_pq_val(chan, pq, dma_src, + tx = device->device_prep_dma_pq_val(chan, pq, + unmap->addr, src_cnt, coefs, len, pqres, @@ -330,6 +352,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks, async_tx_quiesce(&submit->depend_tx); dma_async_issue_pending(chan); } + + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); return tx; diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c index a9f08a6..934a849 100644 --- a/crypto/async_tx/async_raid6_recov.c +++ b/crypto/async_tx/async_raid6_recov.c @@ -26,6 +26,7 @@ #include <linux/dma-mapping.h> #include <linux/raid/pq.h> #include <linux/async_tx.h> +#include <linux/dmaengine.h> static struct dma_async_tx_descriptor * async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, @@ -34,35 +35,45 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef, struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, srcs, 2, len); struct dma_device *dma = chan ? chan->device : NULL; + struct dmaengine_unmap_data *unmap = NULL; const u8 *amul, *bmul; u8 ax, bx; u8 *a, *b, *c; - if (dma) { - dma_addr_t dma_dest[2]; - dma_addr_t dma_src[2]; + if (dma) + unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO); + + if (unmap) { struct device *dev = dma->dev; + dma_addr_t pq[2]; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); - dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); - dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); - tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 2, coef, + unmap->addr[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE); + unmap->addr[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE); + unmap->to_cnt = 2; + + unmap->addr[2] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + unmap->bidi_cnt = 1; + /* engine only looks at Q, but expects it to follow P */ + pq[1] = unmap->addr[2]; + + unmap->len = len; + tx = dma->device_prep_dma_pq(chan, pq, unmap->addr, 2, coef, len, dma_flags); if (tx) { + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); + dmaengine_unmap_put(unmap); return tx; } /* could not get a descriptor, unmap and fall through to * the synchronous path */ - dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); - dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); - dma_unmap_page(dev, dma_src[1], len, DMA_TO_DEVICE); + dmaengine_unmap_put(unmap); } /* run the operation synchronously */ @@ -89,23 +100,38 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, struct dma_chan *chan = async_tx_find_channel(submit, DMA_PQ, &dest, 1, &src, 1, len); struct dma_device *dma = chan ? chan->device : NULL; + struct dmaengine_unmap_data *unmap = NULL; const u8 *qmul; /* Q multiplier table */ u8 *d, *s; - if (dma) { + if (dma) + unmap = dmaengine_get_unmap_data(dma->dev, 3, GFP_NOIO); + + if (unmap) { dma_addr_t dma_dest[2]; - dma_addr_t dma_src[1]; struct device *dev = dma->dev; struct dma_async_tx_descriptor *tx; enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); - dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); - tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef, - len, dma_flags); + unmap->addr[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE); + unmap->to_cnt++; + unmap->addr[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL); + dma_dest[1] = unmap->addr[1]; + unmap->bidi_cnt++; + unmap->len = len; + + /* this looks funny, but the engine looks for Q at + * dma_dest[1] and ignores dma_dest[0] as a dest + * due to DMA_PREP_PQ_DISABLE_P + */ + tx = dma->device_prep_dma_pq(chan, dma_dest, unmap->addr, + 1, &coef, len, dma_flags); + if (tx) { + dma_set_unmap(tx, unmap); + dmaengine_unmap_put(unmap); async_tx_submit(chan, tx, submit); return tx; } @@ -113,8 +139,7 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len, /* could not get a descriptor, unmap and fall through to * the synchronous path */ - dma_unmap_page(dev, dma_dest[1], len, DMA_BIDIRECTIONAL); - dma_unmap_page(dev, dma_src[0], len, DMA_TO_DEVICE); + dmaengine_unmap_put(unmap); } /* no channel available, or failed to allocate a descriptor, so diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 8ade0a0..3c562f5 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -33,48 +33,31 @@ /* do_async_xor - dma map the pages and perform the xor with an engine */ static __async_inline struct dma_async_tx_descriptor * -do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, int src_cnt, size_t len, dma_addr_t *dma_src, +do_async_xor(struct dma_chan *chan, struct dmaengine_unmap_data *unmap, struct async_submit_ctl *submit) { struct dma_device *dma = chan->device; struct dma_async_tx_descriptor *tx = NULL; - int src_off = 0; - int i; dma_async_tx_callback cb_fn_orig = submit->cb_fn; void *cb_param_orig = submit->cb_param; enum async_tx_flags flags_orig = submit->flags; - enum dma_ctrl_flags dma_flags; - int xor_src_cnt = 0; - dma_addr_t dma_dest; - - /* map the dest bidrectional in case it is re-used as a source */ - dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_BIDIRECTIONAL); - for (i = 0; i < src_cnt; i++) { - /* only map the dest once */ - if (!src_list[i]) - continue; - if (unlikely(src_list[i] == dest)) { - dma_src[xor_src_cnt++] = dma_dest; - continue; - } - dma_src[xor_src_cnt++] = dma_map_page(dma->dev, src_list[i], offset, - len, DMA_TO_DEVICE); - } - src_cnt = xor_src_cnt; + enum dma_ctrl_flags dma_flags = 0; + int src_cnt = unmap->to_cnt; + int xor_src_cnt; + dma_addr_t dma_dest = unmap->addr[unmap->to_cnt]; + dma_addr_t *src_list = unmap->addr; while (src_cnt) { + dma_addr_t tmp; + submit->flags = flags_orig; - dma_flags = 0; xor_src_cnt = min(src_cnt, (int)dma->max_xor); - /* if we are submitting additional xors, leave the chain open, - * clear the callback parameters, and leave the destination - * buffer mapped + /* if we are submitting additional xors, leave the chain open + * and clear the callback parameters */ if (src_cnt > xor_src_cnt) { submit->flags &= ~ASYNC_TX_ACK; submit->flags |= ASYNC_TX_FENCE; - dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; submit->cb_fn = NULL; submit->cb_param = NULL; } else { @@ -85,12 +68,18 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, dma_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_flags |= DMA_PREP_FENCE; - /* Since we have clobbered the src_list we are committed - * to doing this asynchronously. Drivers force forward progress - * in case they can not provide a descriptor + + /* Drivers force forward progress in case they can not provide a + * descriptor */ - tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], - xor_src_cnt, len, dma_flags); + tmp = src_list[0]; + if (src_list > unmap->addr) + src_list[0] = dma_dest; + tx = dma->device_prep_dma_xor(chan, dma_dest, src_list, + xor_src_cnt, unmap->len, + dma_flags); + src_list[0] = tmp; + if (unlikely(!tx)) async_tx_quiesce(&submit->depend_tx); @@ -99,22 +88,21 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, while (unlikely(!tx)) { dma_async_issue_pending(chan); tx = dma->device_prep_dma_xor(chan, dma_dest, - &dma_src[src_off], - xor_src_cnt, len, + src_list, + xor_src_cnt, unmap->len, dma_flags); } + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); submit->depend_tx = tx; if (src_cnt > xor_src_cnt) { /* drop completed sources */ src_cnt -= xor_src_cnt; - src_off += xor_src_cnt; - /* use the intermediate result a source */ - dma_src[--src_off] = dma_dest; src_cnt++; + src_list += xor_src_cnt - 1; } else break; } @@ -189,22 +177,40 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = async_tx_find_channel(submit, DMA_XOR, &dest, 1, src_list, src_cnt, len); - dma_addr_t *dma_src = NULL; + struct dma_device *device = chan ? chan->device : NULL; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(src_cnt <= 1); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) src_list; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, src_cnt+1, GFP_NOIO); + + if (unmap && is_dma_xor_aligned(device, offset, 0, len)) { + struct dma_async_tx_descriptor *tx; + int i, j; - if (dma_src && chan && is_dma_xor_aligned(chan->device, offset, 0, len)) { /* run the xor asynchronously */ pr_debug("%s (async): len: %zu\n", __func__, len); - return do_async_xor(chan, dest, src_list, offset, src_cnt, len, - dma_src, submit); + unmap->len = len; + for (i = 0, j = 0; i < src_cnt; i++) { + if (!src_list[i]) + continue; + unmap->to_cnt++; + unmap->addr[j++] = dma_map_page(device->dev, src_list[i], + offset, len, DMA_TO_DEVICE); + } + + /* map it bidirectional as it may be re-used as a source */ + unmap->addr[j] = dma_map_page(device->dev, dest, offset, len, + DMA_BIDIRECTIONAL); + unmap->bidi_cnt = 1; + + tx = do_async_xor(chan, unmap, submit); + dmaengine_unmap_put(unmap); + return tx; } else { + dmaengine_unmap_put(unmap); /* run the xor synchronously */ pr_debug("%s (sync): len: %zu\n", __func__, len); WARN_ONCE(chan, "%s: no space for dma address conversion\n", @@ -268,16 +274,14 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = xor_val_chan(submit, dest, src_list, src_cnt, len); struct dma_device *device = chan ? chan->device : NULL; struct dma_async_tx_descriptor *tx = NULL; - dma_addr_t *dma_src = NULL; + struct dmaengine_unmap_data *unmap = NULL; BUG_ON(src_cnt <= 1); - if (submit->scribble) - dma_src = submit->scribble; - else if (sizeof(dma_addr_t) <= sizeof(struct page *)) - dma_src = (dma_addr_t *) src_list; + if (device) + unmap = dmaengine_get_unmap_data(device->dev, src_cnt, GFP_NOIO); - if (dma_src && device && src_cnt <= device->max_xor && + if (unmap && src_cnt <= device->max_xor && is_dma_xor_aligned(device, offset, 0, len)) { unsigned long dma_prep_flags = 0; int i; @@ -288,11 +292,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, dma_prep_flags |= DMA_PREP_INTERRUPT; if (submit->flags & ASYNC_TX_FENCE) dma_prep_flags |= DMA_PREP_FENCE; - for (i = 0; i < src_cnt; i++) - dma_src[i] = dma_map_page(device->dev, src_list[i], - offset, len, DMA_TO_DEVICE); - tx = device->device_prep_dma_xor_val(chan, dma_src, src_cnt, + for (i = 0; i < src_cnt; i++) { + unmap->addr[i] = dma_map_page(device->dev, src_list[i], + offset, len, DMA_TO_DEVICE); + unmap->to_cnt++; + } + unmap->len = len; + + tx = device->device_prep_dma_xor_val(chan, unmap->addr, src_cnt, len, result, dma_prep_flags); if (unlikely(!tx)) { @@ -301,11 +309,11 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, while (!tx) { dma_async_issue_pending(chan); tx = device->device_prep_dma_xor_val(chan, - dma_src, src_cnt, len, result, + unmap->addr, src_cnt, len, result, dma_prep_flags); } } - + dma_set_unmap(tx, unmap); async_tx_submit(chan, tx, submit); } else { enum async_tx_flags flags_orig = submit->flags; @@ -327,6 +335,7 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset, async_tx_sync_epilog(submit); submit->flags = flags_orig; } + dmaengine_unmap_put(unmap); return tx; } diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 4a92bac..dad95f4 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -28,7 +28,7 @@ #undef pr #define pr(fmt, args...) pr_info("raid6test: " fmt, ##args) -#define NDISKS 16 /* Including P and Q */ +#define NDISKS 64 /* Including P and Q */ static struct page *dataptrs[NDISKS]; static addr_conv_t addr_conv[NDISKS]; @@ -219,6 +219,14 @@ static int raid6_test(void) err += test(11, &tests); err += test(12, &tests); } + + /* the 24 disk case is special for ioatdma as it is the boudary point + * at which it needs to switch from 8-source ops to 16-source + * ops for continuation (assumes DMA_HAS_PQ_CONTINUE is not set) + */ + if (NDISKS > 24) + err += test(24, &tests); + err += test(NDISKS, &tests); pr("\n"); diff --git a/drivers/ata/pata_arasan_cf.c b/drivers/ata/pata_arasan_cf.c index 853f610..e88690e 100644 --- a/drivers/ata/pata_arasan_cf.c +++ b/drivers/ata/pata_arasan_cf.c @@ -396,8 +396,7 @@ dma_xfer(struct arasan_cf_dev *acdev, dma_addr_t src, dma_addr_t dest, u32 len) struct dma_async_tx_descriptor *tx; struct dma_chan *chan = acdev->dma_chan; dma_cookie_t cookie; - unsigned long flags = DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP | - DMA_COMPL_SKIP_DEST_UNMAP; + unsigned long flags = DMA_PREP_INTERRUPT; int ret = 0; tx = chan->device->device_prep_dma_memcpy(chan, dest, src, len, flags); diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c index 4ee6533..f2917e2 100644 --- a/drivers/dma/amba-pl08x.c +++ b/drivers/dma/amba-pl08x.c @@ -1164,42 +1164,12 @@ static void pl08x_free_txd(struct pl08x_driver_data *pl08x, kfree(txd); } -static void pl08x_unmap_buffers(struct pl08x_txd *txd) -{ - struct device *dev = txd->vd.tx.chan->device->dev; - struct pl08x_sg *dsg; - - if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->vd.tx.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - list_for_each_entry(dsg, &txd->dsg_list, node) - dma_unmap_single(dev, dsg->src_addr, dsg->len, - DMA_TO_DEVICE); - else { - list_for_each_entry(dsg, &txd->dsg_list, node) - dma_unmap_page(dev, dsg->src_addr, dsg->len, - DMA_TO_DEVICE); - } - } - if (!(txd->vd.tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->vd.tx.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - list_for_each_entry(dsg, &txd->dsg_list, node) - dma_unmap_single(dev, dsg->dst_addr, dsg->len, - DMA_FROM_DEVICE); - else - list_for_each_entry(dsg, &txd->dsg_list, node) - dma_unmap_page(dev, dsg->dst_addr, dsg->len, - DMA_FROM_DEVICE); - } -} - static void pl08x_desc_free(struct virt_dma_desc *vd) { struct pl08x_txd *txd = to_pl08x_txd(&vd->tx); struct pl08x_dma_chan *plchan = to_pl08x_chan(vd->tx.chan); - if (!plchan->slave) - pl08x_unmap_buffers(txd); - + dma_descriptor_unmap(txd); if (!txd->done) pl08x_release_mux(plchan); diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 1ef7457..e2c04dc 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -344,31 +344,7 @@ atc_chain_complete(struct at_dma_chan *atchan, struct at_desc *desc) /* move myself to free_list */ list_move(&desc->desc_node, &atchan->free_list); - /* unmap dma addresses (not on slave channels) */ - if (!atchan->chan_common.private) { - struct device *parent = chan2parent(&atchan->chan_common); - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(parent, - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - else - dma_unmap_page(parent, - desc->lli.daddr, - desc->len, DMA_FROM_DEVICE); - } - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(parent, - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); - else - dma_unmap_page(parent, - desc->lli.saddr, - desc->len, DMA_TO_DEVICE); - } - } - + dma_descriptor_unmap(txd); /* for cyclic transfers, * no need to replay callback function while stopping */ if (!atc_chan_is_cyclic(atchan)) { diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 81d8765..ea806bd 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -65,6 +65,7 @@ #include <linux/acpi.h> #include <linux/acpi_dma.h> #include <linux/of_dma.h> +#include <linux/mempool.h> static DEFINE_MUTEX(dma_list_mutex); static DEFINE_IDR(dma_idr); @@ -901,98 +902,132 @@ void dma_async_device_unregister(struct dma_device *device) } EXPORT_SYMBOL(dma_async_device_unregister); -/** - * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses - * @chan: DMA channel to offload copy to - * @dest: destination address (virtual) - * @src: source address (virtual) - * @len: length - * - * Both @dest and @src must be mappable to a bus address according to the - * DMA mapping API rules for streaming mappings. - * Both @dest and @src must stay memory resident (kernel memory or locked - * user space pages). - */ -dma_cookie_t -dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, - void *src, size_t len) -{ - struct dma_device *dev = chan->device; - struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - unsigned long flags; +struct dmaengine_unmap_pool { + struct kmem_cache *cache; + const char *name; + mempool_t *pool; + size_t size; +}; - dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); - dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); - flags = DMA_CTRL_ACK | - DMA_COMPL_SRC_UNMAP_SINGLE | - DMA_COMPL_DEST_UNMAP_SINGLE; - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); +#define __UNMAP_POOL(x) { .size = x, .name = "dmaengine-unmap-" __stringify(x) } +static struct dmaengine_unmap_pool unmap_pool[] = { + __UNMAP_POOL(2), + #if IS_ENABLED(CONFIG_ASYNC_TX_DMA) + __UNMAP_POOL(16), + __UNMAP_POOL(128), + __UNMAP_POOL(256), + #endif +}; - if (!tx) { - dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); - dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE); - return -ENOMEM; +static struct dmaengine_unmap_pool *__get_unmap_pool(int nr) +{ + int order = get_count_order(nr); + + switch (order) { + case 0 ... 1: + return &unmap_pool[0]; + case 2 ... 4: + return &unmap_pool[1]; + case 5 ... 7: + return &unmap_pool[2]; + case 8: + return &unmap_pool[3]; + default: + BUG(); + return NULL; } +} - tx->callback = NULL; - cookie = tx->tx_submit(tx); +static void dmaengine_unmap(struct kref *kref) +{ + struct dmaengine_unmap_data *unmap = container_of(kref, typeof(*unmap), kref); + struct device *dev = unmap->dev; + int cnt, i; + + cnt = unmap->to_cnt; + for (i = 0; i < cnt; i++) + dma_unmap_page(dev, unmap->addr[i], unmap->len, + DMA_TO_DEVICE); + cnt += unmap->from_cnt; + for (; i < cnt; i++) + dma_unmap_page(dev, unmap->addr[i], unmap->len, + DMA_FROM_DEVICE); + cnt += unmap->bidi_cnt; + for (; i < cnt; i++) { + if (unmap->addr[i] == 0) + continue; + dma_unmap_page(dev, unmap->addr[i], unmap->len, + DMA_BIDIRECTIONAL); + } + mempool_free(unmap, __get_unmap_pool(cnt)->pool); +} - preempt_disable(); - __this_cpu_add(chan->local->bytes_transferred, len); - __this_cpu_inc(chan->local->memcpy_count); - preempt_enable(); +void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap) +{ + if (unmap) + kref_put(&unmap->kref, dmaengine_unmap); +} +EXPORT_SYMBOL_GPL(dmaengine_unmap_put); - return cookie; +static void dmaengine_destroy_unmap_pool(void) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) { + struct dmaengine_unmap_pool *p = &unmap_pool[i]; + + if (p->pool) + mempool_destroy(p->pool); + p->pool = NULL; + if (p->cache) + kmem_cache_destroy(p->cache); + p->cache = NULL; + } } -EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); -/** - * dma_async_memcpy_buf_to_pg - offloaded copy from address to page - * @chan: DMA channel to offload copy to - * @page: destination page - * @offset: offset in page to copy to - * @kdata: source address (virtual) - * @len: length - * - * Both @page/@offset and @kdata must be mappable to a bus address according - * to the DMA mapping API rules for streaming mappings. - * Both @page/@offset and @kdata must stay memory resident (kernel memory or - * locked user space pages) - */ -dma_cookie_t -dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, - unsigned int offset, void *kdata, size_t len) +static int __init dmaengine_init_unmap_pool(void) { - struct dma_device *dev = chan->device; - struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; - dma_cookie_t cookie; - unsigned long flags; + int i; - dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); - dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); - flags = DMA_CTRL_ACK | DMA_COMPL_SRC_UNMAP_SINGLE; - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); + for (i = 0; i < ARRAY_SIZE(unmap_pool); i++) { + struct dmaengine_unmap_pool *p = &unmap_pool[i]; + size_t size; - if (!tx) { - dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); - dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE); - return -ENOMEM; + size = sizeof(struct dmaengine_unmap_data) + + sizeof(dma_addr_t) * p->size; + + p->cache = kmem_cache_create(p->name, size, 0, + SLAB_HWCACHE_ALIGN, NULL); + if (!p->cache) + break; + p->pool = mempool_create_slab_pool(1, p->cache); + if (!p->pool) + break; } - tx->callback = NULL; - cookie = tx->tx_submit(tx); + if (i == ARRAY_SIZE(unmap_pool)) + return 0; - preempt_disable(); - __this_cpu_add(chan->local->bytes_transferred, len); - __this_cpu_inc(chan->local->memcpy_count); - preempt_enable(); + dmaengine_destroy_unmap_pool(); + return -ENOMEM; +} - return cookie; +struct dmaengine_unmap_data * +dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags) +{ + struct dmaengine_unmap_data *unmap; + + unmap = mempool_alloc(__get_unmap_pool(nr)->pool, flags); + if (!unmap) + return NULL; + + memset(unmap, 0, sizeof(*unmap)); + kref_init(&unmap->kref); + unmap->dev = dev; + + return unmap; } -EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); +EXPORT_SYMBOL(dmaengine_get_unmap_data); /** * dma_async_memcpy_pg_to_pg - offloaded copy from page to page @@ -1015,24 +1050,33 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, { struct dma_device *dev = chan->device; struct dma_async_tx_descriptor *tx; - dma_addr_t dma_dest, dma_src; + struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; unsigned long flags; - dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); - dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len, - DMA_FROM_DEVICE); + unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOIO); + if (!unmap) + return -ENOMEM; + + unmap->to_cnt = 1; + unmap->from_cnt = 1; + unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len, + DMA_TO_DEVICE); + unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len, + DMA_FROM_DEVICE); + unmap->len = len; flags = DMA_CTRL_ACK; - tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, flags); + tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0], + len, flags); if (!tx) { - dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE); - dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + dmaengine_unmap_put(unmap); return -ENOMEM; } - tx->callback = NULL; + dma_set_unmap(tx, unmap); cookie = tx->tx_submit(tx); + dmaengine_unmap_put(unmap); preempt_disable(); __this_cpu_add(chan->local->bytes_transferred, len); @@ -1043,6 +1087,52 @@ dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, } EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); +/** + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses + * @chan: DMA channel to offload copy to + * @dest: destination address (virtual) + * @src: source address (virtual) + * @len: length + * + * Both @dest and @src must be mappable to a bus address according to the + * DMA mapping API rules for streaming mappings. + * Both @dest and @src must stay memory resident (kernel memory or locked + * user space pages). + */ +dma_cookie_t +dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, + void *src, size_t len) +{ + return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest), + (unsigned long) dest & ~PAGE_MASK, + virt_to_page(src), + (unsigned long) src & ~PAGE_MASK, len); +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy from address to page + * @chan: DMA channel to offload copy to + * @page: destination page + * @offset: offset in page to copy to + * @kdata: source address (virtual) + * @len: length + * + * Both @page/@offset and @kdata must be mappable to a bus address according + * to the DMA mapping API rules for streaming mappings. + * Both @page/@offset and @kdata must stay memory resident (kernel memory or + * locked user space pages) + */ +dma_cookie_t +dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, + unsigned int offset, void *kdata, size_t len) +{ + return dma_async_memcpy_pg_to_pg(chan, page, offset, + virt_to_page(kdata), + (unsigned long) kdata & ~PAGE_MASK, len); +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); + void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, struct dma_chan *chan) { @@ -1116,6 +1206,10 @@ EXPORT_SYMBOL_GPL(dma_run_dependencies); static int __init dma_bus_init(void) { + int err = dmaengine_init_unmap_pool(); + + if (err) + return err; return class_register(&dma_devclass); } arch_initcall(dma_bus_init); diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c index 59e287f..20f9a3a 100644 --- a/drivers/dma/dmatest.c +++ b/drivers/dma/dmatest.c @@ -8,6 +8,8 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include <linux/delay.h> #include <linux/dma-mapping.h> #include <linux/dmaengine.h> @@ -19,10 +21,6 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/wait.h> -#include <linux/ctype.h> -#include <linux/debugfs.h> -#include <linux/uaccess.h> -#include <linux/seq_file.h> static unsigned int test_buf_size = 16384; module_param(test_buf_size, uint, S_IRUGO | S_IWUSR); @@ -68,92 +66,13 @@ module_param(timeout, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(timeout, "Transfer Timeout in msec (default: 3000), " "Pass -1 for infinite timeout"); -/* Maximum amount of mismatched bytes in buffer to print */ -#define MAX_ERROR_COUNT 32 - -/* - * Initialization patterns. All bytes in the source buffer has bit 7 - * set, all bytes in the destination buffer has bit 7 cleared. - * - * Bit 6 is set for all bytes which are to be copied by the DMA - * engine. Bit 5 is set for all bytes which are to be overwritten by - * the DMA engine. - * - * The remaining bits are the inverse of a counter which increments by - * one for each byte address. - */ -#define PATTERN_SRC 0x80 -#define PATTERN_DST 0x00 -#define PATTERN_COPY 0x40 -#define PATTERN_OVERWRITE 0x20 -#define PATTERN_COUNT_MASK 0x1f - -enum dmatest_error_type { - DMATEST_ET_OK, - DMATEST_ET_MAP_SRC, - DMATEST_ET_MAP_DST, - DMATEST_ET_PREP, - DMATEST_ET_SUBMIT, - DMATEST_ET_TIMEOUT, - DMATEST_ET_DMA_ERROR, - DMATEST_ET_DMA_IN_PROGRESS, - DMATEST_ET_VERIFY, - DMATEST_ET_VERIFY_BUF, -}; - -struct dmatest_verify_buffer { - unsigned int index; - u8 expected; - u8 actual; -}; - -struct dmatest_verify_result { - unsigned int error_count; - struct dmatest_verify_buffer data[MAX_ERROR_COUNT]; - u8 pattern; - bool is_srcbuf; -}; - -struct dmatest_thread_result { - struct list_head node; - unsigned int n; - unsigned int src_off; - unsigned int dst_off; - unsigned int len; - enum dmatest_error_type type; - union { - unsigned long data; - dma_cookie_t cookie; - enum dma_status status; - int error; - struct dmatest_verify_result *vr; - }; -}; - -struct dmatest_result { - struct list_head node; - char *name; - struct list_head results; -}; - -struct dmatest_info; - -struct dmatest_thread { - struct list_head node; - struct dmatest_info *info; - struct task_struct *task; - struct dma_chan *chan; - u8 **srcs; - u8 **dsts; - enum dma_transaction_type type; - bool done; -}; +static bool noverify; +module_param(noverify, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(noverify, "Disable random data setup and verification"); -struct dmatest_chan { - struct list_head node; - struct dma_chan *chan; - struct list_head threads; -}; +static bool verbose; +module_param(verbose, bool, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(verbose, "Enable \"success\" result messages (default: off)"); /** * struct dmatest_params - test parameters. @@ -177,6 +96,7 @@ struct dmatest_params { unsigned int xor_sources; unsigned int pq_sources; int timeout; + bool noverify; }; /** @@ -184,7 +104,7 @@ struct dmatest_params { * @params: test parameters * @lock: access protection to the fields of this structure */ -struct dmatest_info { +static struct dmatest_info { /* Test parameters */ struct dmatest_params params; @@ -192,16 +112,95 @@ struct dmatest_info { struct list_head channels; unsigned int nr_channels; struct mutex lock; + bool did_init; +} test_info = { + .channels = LIST_HEAD_INIT(test_info.channels), + .lock = __MUTEX_INITIALIZER(test_info.lock), +}; + +static int dmatest_run_set(const char *val, const struct kernel_param *kp); +static int dmatest_run_get(char *val, const struct kernel_param *kp); +static struct kernel_param_ops run_ops = { + .set = dmatest_run_set, + .get = dmatest_run_get, +}; +static bool dmatest_run; +module_param_cb(run, &run_ops, &dmatest_run, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(run, "Run the test (default: false)"); + +/* Maximum amount of mismatched bytes in buffer to print */ +#define MAX_ERROR_COUNT 32 + +/* + * Initialization patterns. All bytes in the source buffer has bit 7 + * set, all bytes in the destination buffer has bit 7 cleared. + * + * Bit 6 is set for all bytes which are to be copied by the DMA + * engine. Bit 5 is set for all bytes which are to be overwritten by + * the DMA engine. + * + * The remaining bits are the inverse of a counter which increments by + * one for each byte address. + */ +#define PATTERN_SRC 0x80 +#define PATTERN_DST 0x00 +#define PATTERN_COPY 0x40 +#define PATTERN_OVERWRITE 0x20 +#define PATTERN_COUNT_MASK 0x1f - /* debugfs related stuff */ - struct dentry *root; +struct dmatest_thread { + struct list_head node; + struct dmatest_info *info; + struct task_struct *task; + struct dma_chan *chan; + u8 **srcs; + u8 **dsts; + enum dma_transaction_type type; + bool done; +}; - /* Test results */ - struct list_head results; - struct mutex results_lock; +struct dmatest_chan { + struct list_head node; + struct dma_chan *chan; + struct list_head threads; }; -static struct dmatest_info test_info; +static DECLARE_WAIT_QUEUE_HEAD(thread_wait); +static bool wait; + +static bool is_threaded_test_run(struct dmatest_info *info) +{ + struct dmatest_chan *dtc; + + list_for_each_entry(dtc, &info->channels, node) { + struct dmatest_thread *thread; + + list_for_each_entry(thread, &dtc->threads, node) { + if (!thread->done) + return true; + } + } + + return false; +} + +static int dmatest_wait_get(char *val, const struct kernel_param *kp) +{ + struct dmatest_info *info = &test_info; + struct dmatest_params *params = &info->params; + + if (params->iterations) + wait_event(thread_wait, !is_threaded_test_run(info)); + wait = true; + return param_get_bool(val, kp); +} + +static struct kernel_param_ops wait_ops = { + .get = dmatest_wait_get, + .set = param_set_bool, +}; +module_param_cb(wait, &wait_ops, &wait, S_IRUGO); +MODULE_PARM_DESC(wait, "Wait for tests to complete (default: false)"); static bool dmatest_match_channel(struct dmatest_params *params, struct dma_chan *chan) @@ -223,7 +222,7 @@ static unsigned long dmatest_random(void) { unsigned long buf; - get_random_bytes(&buf, sizeof(buf)); + prandom_bytes(&buf, sizeof(buf)); return buf; } @@ -262,9 +261,31 @@ static void dmatest_init_dsts(u8 **bufs, unsigned int start, unsigned int len, } } -static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs, - unsigned int start, unsigned int end, unsigned int counter, - u8 pattern, bool is_srcbuf) +static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, + unsigned int counter, bool is_srcbuf) +{ + u8 diff = actual ^ pattern; + u8 expected = pattern | (~counter & PATTERN_COUNT_MASK); + const char *thread_name = current->comm; + + if (is_srcbuf) + pr_warn("%s: srcbuf[0x%x] overwritten! Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if ((pattern & PATTERN_COPY) + && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) + pr_warn("%s: dstbuf[0x%x] not copied! Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if (diff & PATTERN_SRC) + pr_warn("%s: dstbuf[0x%x] was copied! Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else + pr_warn("%s: dstbuf[0x%x] mismatch! Expected %02x, got %02x\n", + thread_name, index, expected, actual); +} + +static unsigned int dmatest_verify(u8 **bufs, unsigned int start, + unsigned int end, unsigned int counter, u8 pattern, + bool is_srcbuf) { unsigned int i; unsigned int error_count = 0; @@ -272,7 +293,6 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs, u8 expected; u8 *buf; unsigned int counter_orig = counter; - struct dmatest_verify_buffer *vb; for (; (buf = *bufs); bufs++) { counter = counter_orig; @@ -280,12 +300,9 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs, actual = buf[i]; expected = pattern | (~counter & PATTERN_COUNT_MASK); if (actual != expected) { - if (error_count < MAX_ERROR_COUNT && vr) { - vb = &vr->data[error_count]; - vb->index = i; - vb->expected = expected; - vb->actual = actual; - } + if (error_count < MAX_ERROR_COUNT) + dmatest_mismatch(actual, pattern, i, + counter, is_srcbuf); error_count++; } counter++; @@ -293,7 +310,7 @@ static unsigned int dmatest_verify(struct dmatest_verify_result *vr, u8 **bufs, } if (error_count > MAX_ERROR_COUNT) - pr_warning("%s: %u errors suppressed\n", + pr_warn("%s: %u errors suppressed\n", current->comm, error_count - MAX_ERROR_COUNT); return error_count; @@ -313,20 +330,6 @@ static void dmatest_callback(void *arg) wake_up_all(done->wait); } -static inline void unmap_src(struct device *dev, dma_addr_t *addr, size_t len, - unsigned int count) -{ - while (count--) - dma_unmap_single(dev, addr[count], len, DMA_TO_DEVICE); -} - -static inline void unmap_dst(struct device *dev, dma_addr_t *addr, size_t len, - unsigned int count) -{ - while (count--) - dma_unmap_single(dev, addr[count], len, DMA_BIDIRECTIONAL); -} - static unsigned int min_odd(unsigned int x, unsigned int y) { unsigned int val = min(x, y); @@ -334,172 +337,49 @@ static unsigned int min_odd(unsigned int x, unsigned int y) return val % 2 ? val : val - 1; } -static char *verify_result_get_one(struct dmatest_verify_result *vr, - unsigned int i) +static void result(const char *err, unsigned int n, unsigned int src_off, + unsigned int dst_off, unsigned int len, unsigned long data) { - struct dmatest_verify_buffer *vb = &vr->data[i]; - u8 diff = vb->actual ^ vr->pattern; - static char buf[512]; - char *msg; - - if (vr->is_srcbuf) - msg = "srcbuf overwritten!"; - else if ((vr->pattern & PATTERN_COPY) - && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) - msg = "dstbuf not copied!"; - else if (diff & PATTERN_SRC) - msg = "dstbuf was copied!"; - else - msg = "dstbuf mismatch!"; - - snprintf(buf, sizeof(buf) - 1, "%s [0x%x] Expected %02x, got %02x", msg, - vb->index, vb->expected, vb->actual); - - return buf; + pr_info("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)", + current->comm, n, err, src_off, dst_off, len, data); } -static char *thread_result_get(const char *name, - struct dmatest_thread_result *tr) +static void dbg_result(const char *err, unsigned int n, unsigned int src_off, + unsigned int dst_off, unsigned int len, + unsigned long data) { - static const char * const messages[] = { - [DMATEST_ET_OK] = "No errors", - [DMATEST_ET_MAP_SRC] = "src mapping error", - [DMATEST_ET_MAP_DST] = "dst mapping error", - [DMATEST_ET_PREP] = "prep error", - [DMATEST_ET_SUBMIT] = "submit error", - [DMATEST_ET_TIMEOUT] = "test timed out", - [DMATEST_ET_DMA_ERROR] = - "got completion callback (DMA_ERROR)", - [DMATEST_ET_DMA_IN_PROGRESS] = - "got completion callback (DMA_IN_PROGRESS)", - [DMATEST_ET_VERIFY] = "errors", - [DMATEST_ET_VERIFY_BUF] = "verify errors", - }; - static char buf[512]; - - snprintf(buf, sizeof(buf) - 1, - "%s: #%u: %s with src_off=0x%x ""dst_off=0x%x len=0x%x (%lu)", - name, tr->n, messages[tr->type], tr->src_off, tr->dst_off, - tr->len, tr->data); - - return buf; + pr_debug("%s: result #%u: '%s' with src_off=0x%x dst_off=0x%x len=0x%x (%lu)", + current->comm, n, err, src_off, dst_off, len, data); } -static int thread_result_add(struct dmatest_info *info, - struct dmatest_result *r, enum dmatest_error_type type, - unsigned int n, unsigned int src_off, unsigned int dst_off, - unsigned int len, unsigned long data) -{ - struct dmatest_thread_result *tr; - - tr = kzalloc(sizeof(*tr), GFP_KERNEL); - if (!tr) - return -ENOMEM; - - tr->type = type; - tr->n = n; - tr->src_off = src_off; - tr->dst_off = dst_off; - tr->len = len; - tr->data = data; +#define verbose_result(err, n, src_off, dst_off, len, data) ({ \ + if (verbose) \ + result(err, n, src_off, dst_off, len, data); \ + else \ + dbg_result(err, n, src_off, dst_off, len, data); \ +}) - mutex_lock(&info->results_lock); - list_add_tail(&tr->node, &r->results); - mutex_unlock(&info->results_lock); - - if (tr->type == DMATEST_ET_OK) - pr_debug("%s\n", thread_result_get(r->name, tr)); - else - pr_warn("%s\n", thread_result_get(r->name, tr)); - - return 0; -} - -static unsigned int verify_result_add(struct dmatest_info *info, - struct dmatest_result *r, unsigned int n, - unsigned int src_off, unsigned int dst_off, unsigned int len, - u8 **bufs, int whence, unsigned int counter, u8 pattern, - bool is_srcbuf) +static unsigned long long dmatest_persec(s64 runtime, unsigned int val) { - struct dmatest_verify_result *vr; - unsigned int error_count; - unsigned int buf_off = is_srcbuf ? src_off : dst_off; - unsigned int start, end; - - if (whence < 0) { - start = 0; - end = buf_off; - } else if (whence > 0) { - start = buf_off + len; - end = info->params.buf_size; - } else { - start = buf_off; - end = buf_off + len; - } + unsigned long long per_sec = 1000000; - vr = kmalloc(sizeof(*vr), GFP_KERNEL); - if (!vr) { - pr_warn("dmatest: No memory to store verify result\n"); - return dmatest_verify(NULL, bufs, start, end, counter, pattern, - is_srcbuf); - } - - vr->pattern = pattern; - vr->is_srcbuf = is_srcbuf; - - error_count = dmatest_verify(vr, bufs, start, end, counter, pattern, - is_srcbuf); - if (error_count) { - vr->error_count = error_count; - thread_result_add(info, r, DMATEST_ET_VERIFY_BUF, n, src_off, - dst_off, len, (unsigned long)vr); - return error_count; - } - - kfree(vr); - return 0; -} - -static void result_free(struct dmatest_info *info, const char *name) -{ - struct dmatest_result *r, *_r; - - mutex_lock(&info->results_lock); - list_for_each_entry_safe(r, _r, &info->results, node) { - struct dmatest_thread_result *tr, *_tr; - - if (name && strcmp(r->name, name)) - continue; - - list_for_each_entry_safe(tr, _tr, &r->results, node) { - if (tr->type == DMATEST_ET_VERIFY_BUF) - kfree(tr->vr); - list_del(&tr->node); - kfree(tr); - } + if (runtime <= 0) + return 0; - kfree(r->name); - list_del(&r->node); - kfree(r); + /* drop precision until runtime is 32-bits */ + while (runtime > UINT_MAX) { + runtime >>= 1; + per_sec <<= 1; } - mutex_unlock(&info->results_lock); + per_sec *= val; + do_div(per_sec, runtime); + return per_sec; } -static struct dmatest_result *result_init(struct dmatest_info *info, - const char *name) +static unsigned long long dmatest_KBs(s64 runtime, unsigned long long len) { - struct dmatest_result *r; - - r = kzalloc(sizeof(*r), GFP_KERNEL); - if (r) { - r->name = kstrdup(name, GFP_KERNEL); - INIT_LIST_HEAD(&r->results); - mutex_lock(&info->results_lock); - list_add_tail(&r->node, &info->results); - mutex_unlock(&info->results_lock); - } - return r; + return dmatest_persec(runtime, len >> 10); } /* @@ -525,7 +405,6 @@ static int dmatest_func(void *data) struct dmatest_params *params; struct dma_chan *chan; struct dma_device *dev; - const char *thread_name; unsigned int src_off, dst_off, len; unsigned int error_count; unsigned int failed_tests = 0; @@ -538,9 +417,10 @@ static int dmatest_func(void *data) int src_cnt; int dst_cnt; int i; - struct dmatest_result *result; + ktime_t ktime; + s64 runtime = 0; + unsigned long long total_len = 0; - thread_name = current->comm; set_freezable(); ret = -ENOMEM; @@ -570,10 +450,6 @@ static int dmatest_func(void *data) } else goto err_thread_type; - result = result_init(info, thread_name); - if (!result) - goto err_srcs; - thread->srcs = kcalloc(src_cnt+1, sizeof(u8 *), GFP_KERNEL); if (!thread->srcs) goto err_srcs; @@ -597,17 +473,17 @@ static int dmatest_func(void *data) set_user_nice(current, 10); /* - * src buffers are freed by the DMAEngine code with dma_unmap_single() - * dst buffers are freed by ourselves below + * src and dst buffers are freed by ourselves below */ - flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT - | DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SRC_UNMAP_SINGLE; + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; + ktime = ktime_get(); while (!kthread_should_stop() && !(params->iterations && total_tests >= params->iterations)) { struct dma_async_tx_descriptor *tx = NULL; - dma_addr_t dma_srcs[src_cnt]; - dma_addr_t dma_dsts[dst_cnt]; + struct dmaengine_unmap_data *um; + dma_addr_t srcs[src_cnt]; + dma_addr_t *dsts; u8 align = 0; total_tests++; @@ -626,81 +502,103 @@ static int dmatest_func(void *data) break; } - len = dmatest_random() % params->buf_size + 1; + if (params->noverify) { + len = params->buf_size; + src_off = 0; + dst_off = 0; + } else { + len = dmatest_random() % params->buf_size + 1; + len = (len >> align) << align; + if (!len) + len = 1 << align; + src_off = dmatest_random() % (params->buf_size - len + 1); + dst_off = dmatest_random() % (params->buf_size - len + 1); + + src_off = (src_off >> align) << align; + dst_off = (dst_off >> align) << align; + + dmatest_init_srcs(thread->srcs, src_off, len, + params->buf_size); + dmatest_init_dsts(thread->dsts, dst_off, len, + params->buf_size); + } + len = (len >> align) << align; if (!len) len = 1 << align; - src_off = dmatest_random() % (params->buf_size - len + 1); - dst_off = dmatest_random() % (params->buf_size - len + 1); + total_len += len; - src_off = (src_off >> align) << align; - dst_off = (dst_off >> align) << align; - - dmatest_init_srcs(thread->srcs, src_off, len, params->buf_size); - dmatest_init_dsts(thread->dsts, dst_off, len, params->buf_size); + um = dmaengine_get_unmap_data(dev->dev, src_cnt+dst_cnt, + GFP_KERNEL); + if (!um) { + failed_tests++; + result("unmap data NULL", total_tests, + src_off, dst_off, len, ret); + continue; + } + um->len = params->buf_size; for (i = 0; i < src_cnt; i++) { - u8 *buf = thread->srcs[i] + src_off; - - dma_srcs[i] = dma_map_single(dev->dev, buf, len, - DMA_TO_DEVICE); - ret = dma_mapping_error(dev->dev, dma_srcs[i]); + unsigned long buf = (unsigned long) thread->srcs[i]; + struct page *pg = virt_to_page(buf); + unsigned pg_off = buf & ~PAGE_MASK; + + um->addr[i] = dma_map_page(dev->dev, pg, pg_off, + um->len, DMA_TO_DEVICE); + srcs[i] = um->addr[i] + src_off; + ret = dma_mapping_error(dev->dev, um->addr[i]); if (ret) { - unmap_src(dev->dev, dma_srcs, len, i); - thread_result_add(info, result, - DMATEST_ET_MAP_SRC, - total_tests, src_off, dst_off, - len, ret); + dmaengine_unmap_put(um); + result("src mapping error", total_tests, + src_off, dst_off, len, ret); failed_tests++; continue; } + um->to_cnt++; } /* map with DMA_BIDIRECTIONAL to force writeback/invalidate */ + dsts = &um->addr[src_cnt]; for (i = 0; i < dst_cnt; i++) { - dma_dsts[i] = dma_map_single(dev->dev, thread->dsts[i], - params->buf_size, - DMA_BIDIRECTIONAL); - ret = dma_mapping_error(dev->dev, dma_dsts[i]); + unsigned long buf = (unsigned long) thread->dsts[i]; + struct page *pg = virt_to_page(buf); + unsigned pg_off = buf & ~PAGE_MASK; + + dsts[i] = dma_map_page(dev->dev, pg, pg_off, um->len, + DMA_BIDIRECTIONAL); + ret = dma_mapping_error(dev->dev, dsts[i]); if (ret) { - unmap_src(dev->dev, dma_srcs, len, src_cnt); - unmap_dst(dev->dev, dma_dsts, params->buf_size, - i); - thread_result_add(info, result, - DMATEST_ET_MAP_DST, - total_tests, src_off, dst_off, - len, ret); + dmaengine_unmap_put(um); + result("dst mapping error", total_tests, + src_off, dst_off, len, ret); failed_tests++; continue; } + um->bidi_cnt++; } if (thread->type == DMA_MEMCPY) tx = dev->device_prep_dma_memcpy(chan, - dma_dsts[0] + dst_off, - dma_srcs[0], len, - flags); + dsts[0] + dst_off, + srcs[0], len, flags); else if (thread->type == DMA_XOR) tx = dev->device_prep_dma_xor(chan, - dma_dsts[0] + dst_off, - dma_srcs, src_cnt, + dsts[0] + dst_off, + srcs, src_cnt, len, flags); else if (thread->type == DMA_PQ) { dma_addr_t dma_pq[dst_cnt]; for (i = 0; i < dst_cnt; i++) - dma_pq[i] = dma_dsts[i] + dst_off; - tx = dev->device_prep_dma_pq(chan, dma_pq, dma_srcs, + dma_pq[i] = dsts[i] + dst_off; + tx = dev->device_prep_dma_pq(chan, dma_pq, srcs, src_cnt, pq_coefs, len, flags); } if (!tx) { - unmap_src(dev->dev, dma_srcs, len, src_cnt); - unmap_dst(dev->dev, dma_dsts, params->buf_size, - dst_cnt); - thread_result_add(info, result, DMATEST_ET_PREP, - total_tests, src_off, dst_off, - len, 0); + dmaengine_unmap_put(um); + result("prep error", total_tests, src_off, + dst_off, len, ret); msleep(100); failed_tests++; continue; @@ -712,9 +610,9 @@ static int dmatest_func(void *data) cookie = tx->tx_submit(tx); if (dma_submit_error(cookie)) { - thread_result_add(info, result, DMATEST_ET_SUBMIT, - total_tests, src_off, dst_off, - len, cookie); + dmaengine_unmap_put(um); + result("submit error", total_tests, src_off, + dst_off, len, ret); msleep(100); failed_tests++; continue; @@ -735,59 +633,59 @@ static int dmatest_func(void *data) * free it this time?" dancing. For now, just * leave it dangling. */ - thread_result_add(info, result, DMATEST_ET_TIMEOUT, - total_tests, src_off, dst_off, - len, 0); + dmaengine_unmap_put(um); + result("test timed out", total_tests, src_off, dst_off, + len, 0); failed_tests++; continue; } else if (status != DMA_COMPLETE) { - enum dmatest_error_type type = (status == DMA_ERROR) ? - DMATEST_ET_DMA_ERROR : DMATEST_ET_DMA_IN_PROGRESS; - thread_result_add(info, result, type, - total_tests, src_off, dst_off, - len, status); + dmaengine_unmap_put(um); + result(status == DMA_ERROR ? + "completion error status" : + "completion busy status", total_tests, src_off, + dst_off, len, ret); failed_tests++; continue; } - /* Unmap by myself (see DMA_COMPL_SKIP_DEST_UNMAP above) */ - unmap_dst(dev->dev, dma_dsts, params->buf_size, dst_cnt); + dmaengine_unmap_put(um); - error_count = 0; + if (params->noverify) { + verbose_result("test passed", total_tests, src_off, + dst_off, len, 0); + continue; + } - pr_debug("%s: verifying source buffer...\n", thread_name); - error_count += verify_result_add(info, result, total_tests, - src_off, dst_off, len, thread->srcs, -1, + pr_debug("%s: verifying source buffer...\n", current->comm); + error_count = dmatest_verify(thread->srcs, 0, src_off, 0, PATTERN_SRC, true); - error_count += verify_result_add(info, result, total_tests, - src_off, dst_off, len, thread->srcs, 0, - src_off, PATTERN_SRC | PATTERN_COPY, true); - error_count += verify_result_add(info, result, total_tests, - src_off, dst_off, len, thread->srcs, 1, - src_off + len, PATTERN_SRC, true); - - pr_debug("%s: verifying dest buffer...\n", thread_name); - error_count += verify_result_add(info, result, total_tests, - src_off, dst_off, len, thread->dsts, -1, + error_count += dmatest_verify(thread->srcs, src_off, + src_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, true); + error_count += dmatest_verify(thread->srcs, src_off + len, + params->buf_size, src_off + len, + PATTERN_SRC, true); + + pr_debug("%s: verifying dest buffer...\n", current->comm); + error_count += dmatest_verify(thread->dsts, 0, dst_off, 0, PATTERN_DST, false); - error_count += verify_result_add(info, result, total_tests, - src_off, dst_off, len, thread->dsts, 0, - src_off, PATTERN_SRC | PATTERN_COPY, false); - error_count += verify_result_add(info, result, total_tests, - src_off, dst_off, len, thread->dsts, 1, - dst_off + len, PATTERN_DST, false); + error_count += dmatest_verify(thread->dsts, dst_off, + dst_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, false); + error_count += dmatest_verify(thread->dsts, dst_off + len, + params->buf_size, dst_off + len, + PATTERN_DST, false); if (error_count) { - thread_result_add(info, result, DMATEST_ET_VERIFY, - total_tests, src_off, dst_off, - len, error_count); + result("data error", total_tests, src_off, dst_off, + len, error_count); failed_tests++; } else { - thread_result_add(info, result, DMATEST_ET_OK, - total_tests, src_off, dst_off, - len, 0); + verbose_result("test passed", total_tests, src_off, + dst_off, len, 0); } } + runtime = ktime_us_delta(ktime_get(), ktime); ret = 0; for (i = 0; thread->dsts[i]; i++) @@ -802,20 +700,17 @@ err_srcbuf: err_srcs: kfree(pq_coefs); err_thread_type: - pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", - thread_name, total_tests, failed_tests, ret); + pr_info("%s: summary %u tests, %u failures %llu iops %llu KB/s (%d)\n", + current->comm, total_tests, failed_tests, + dmatest_persec(runtime, total_tests), + dmatest_KBs(runtime, total_len), ret); /* terminate all transfers on specified channels */ if (ret) dmaengine_terminate_all(chan); thread->done = true; - - if (params->iterations > 0) - while (!kthread_should_stop()) { - DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait_dmatest_exit); - interruptible_sleep_on(&wait_dmatest_exit); - } + wake_up(&thread_wait); return ret; } @@ -828,9 +723,10 @@ static void dmatest_cleanup_channel(struct dmatest_chan *dtc) list_for_each_entry_safe(thread, _thread, &dtc->threads, node) { ret = kthread_stop(thread->task); - pr_debug("dmatest: thread %s exited with status %d\n", - thread->task->comm, ret); + pr_debug("thread %s exited with status %d\n", + thread->task->comm, ret); list_del(&thread->node); + put_task_struct(thread->task); kfree(thread); } @@ -861,27 +757,27 @@ static int dmatest_add_threads(struct dmatest_info *info, for (i = 0; i < params->threads_per_chan; i++) { thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); if (!thread) { - pr_warning("dmatest: No memory for %s-%s%u\n", - dma_chan_name(chan), op, i); - + pr_warn("No memory for %s-%s%u\n", + dma_chan_name(chan), op, i); break; } thread->info = info; thread->chan = dtc->chan; thread->type = type; smp_wmb(); - thread->task = kthread_run(dmatest_func, thread, "%s-%s%u", + thread->task = kthread_create(dmatest_func, thread, "%s-%s%u", dma_chan_name(chan), op, i); if (IS_ERR(thread->task)) { - pr_warning("dmatest: Failed to run thread %s-%s%u\n", - dma_chan_name(chan), op, i); + pr_warn("Failed to create thread %s-%s%u\n", + dma_chan_name(chan), op, i); kfree(thread); break; } /* srcbuf and dstbuf are allocated by the thread itself */ - + get_task_struct(thread->task); list_add_tail(&thread->node, &dtc->threads); + wake_up_process(thread->task); } return i; @@ -897,7 +793,7 @@ static int dmatest_add_channel(struct dmatest_info *info, dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL); if (!dtc) { - pr_warning("dmatest: No memory for %s\n", dma_chan_name(chan)); + pr_warn("No memory for %s\n", dma_chan_name(chan)); return -ENOMEM; } @@ -917,7 +813,7 @@ static int dmatest_add_channel(struct dmatest_info *info, thread_count += cnt > 0 ? cnt : 0; } - pr_info("dmatest: Started %u threads using %s\n", + pr_info("Started %u threads using %s\n", thread_count, dma_chan_name(chan)); list_add_tail(&dtc->node, &info->channels); @@ -937,20 +833,20 @@ static bool filter(struct dma_chan *chan, void *param) return true; } -static int __run_threaded_test(struct dmatest_info *info) +static void request_channels(struct dmatest_info *info, + enum dma_transaction_type type) { dma_cap_mask_t mask; - struct dma_chan *chan; - struct dmatest_params *params = &info->params; - int err = 0; dma_cap_zero(mask); - dma_cap_set(DMA_MEMCPY, mask); + dma_cap_set(type, mask); for (;;) { + struct dmatest_params *params = &info->params; + struct dma_chan *chan; + chan = dma_request_channel(mask, filter, params); if (chan) { - err = dmatest_add_channel(info, chan); - if (err) { + if (dmatest_add_channel(info, chan)) { dma_release_channel(chan); break; /* add_channel failed, punt */ } @@ -960,22 +856,30 @@ static int __run_threaded_test(struct dmatest_info *info) info->nr_channels >= params->max_channels) break; /* we have all we need */ } - return err; } -#ifndef MODULE -static int run_threaded_test(struct dmatest_info *info) +static void run_threaded_test(struct dmatest_info *info) { - int ret; + struct dmatest_params *params = &info->params; - mutex_lock(&info->lock); - ret = __run_threaded_test(info); - mutex_unlock(&info->lock); - return ret; + /* Copy test parameters */ + params->buf_size = test_buf_size; + strlcpy(params->channel, strim(test_channel), sizeof(params->channel)); + strlcpy(params->device, strim(test_device), sizeof(params->device)); + params->threads_per_chan = threads_per_chan; + params->max_channels = max_channels; + params->iterations = iterations; + params->xor_sources = xor_sources; + params->pq_sources = pq_sources; + params->timeout = timeout; + params->noverify = noverify; + + request_channels(info, DMA_MEMCPY); + request_channels(info, DMA_XOR); + request_channels(info, DMA_PQ); } -#endif -static void __stop_threaded_test(struct dmatest_info *info) +static void stop_threaded_test(struct dmatest_info *info) { struct dmatest_chan *dtc, *_dtc; struct dma_chan *chan; @@ -984,203 +888,86 @@ static void __stop_threaded_test(struct dmatest_info *info) list_del(&dtc->node); chan = dtc->chan; dmatest_cleanup_channel(dtc); - pr_debug("dmatest: dropped channel %s\n", dma_chan_name(chan)); + pr_debug("dropped channel %s\n", dma_chan_name(chan)); dma_release_channel(chan); } info->nr_channels = 0; } -static void stop_threaded_test(struct dmatest_info *info) +static void restart_threaded_test(struct dmatest_info *info, bool run) { - mutex_lock(&info->lock); - __stop_threaded_test(info); - mutex_unlock(&info->lock); -} - -static int __restart_threaded_test(struct dmatest_info *info, bool run) -{ - struct dmatest_params *params = &info->params; + /* we might be called early to set run=, defer running until all + * parameters have been evaluated + */ + if (!info->did_init) + return; /* Stop any running test first */ - __stop_threaded_test(info); - - if (run == false) - return 0; - - /* Clear results from previous run */ - result_free(info, NULL); - - /* Copy test parameters */ - params->buf_size = test_buf_size; - strlcpy(params->channel, strim(test_channel), sizeof(params->channel)); - strlcpy(params->device, strim(test_device), sizeof(params->device)); - params->threads_per_chan = threads_per_chan; - params->max_channels = max_channels; - params->iterations = iterations; - params->xor_sources = xor_sources; - params->pq_sources = pq_sources; - params->timeout = timeout; + stop_threaded_test(info); /* Run test with new parameters */ - return __run_threaded_test(info); -} - -static bool __is_threaded_test_run(struct dmatest_info *info) -{ - struct dmatest_chan *dtc; - - list_for_each_entry(dtc, &info->channels, node) { - struct dmatest_thread *thread; - - list_for_each_entry(thread, &dtc->threads, node) { - if (!thread->done) - return true; - } - } - - return false; + run_threaded_test(info); } -static ssize_t dtf_read_run(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) +static int dmatest_run_get(char *val, const struct kernel_param *kp) { - struct dmatest_info *info = file->private_data; - char buf[3]; + struct dmatest_info *info = &test_info; mutex_lock(&info->lock); - - if (__is_threaded_test_run(info)) { - buf[0] = 'Y'; + if (is_threaded_test_run(info)) { + dmatest_run = true; } else { - __stop_threaded_test(info); - buf[0] = 'N'; + stop_threaded_test(info); + dmatest_run = false; } - mutex_unlock(&info->lock); - buf[1] = '\n'; - buf[2] = 0x00; - return simple_read_from_buffer(user_buf, count, ppos, buf, 2); -} - -static ssize_t dtf_write_run(struct file *file, const char __user *user_buf, - size_t count, loff_t *ppos) -{ - struct dmatest_info *info = file->private_data; - char buf[16]; - bool bv; - int ret = 0; - if (copy_from_user(buf, user_buf, min(count, (sizeof(buf) - 1)))) - return -EFAULT; - - if (strtobool(buf, &bv) == 0) { - mutex_lock(&info->lock); - - if (__is_threaded_test_run(info)) - ret = -EBUSY; - else - ret = __restart_threaded_test(info, bv); - - mutex_unlock(&info->lock); - } - - return ret ? ret : count; + return param_get_bool(val, kp); } -static const struct file_operations dtf_run_fops = { - .read = dtf_read_run, - .write = dtf_write_run, - .open = simple_open, - .llseek = default_llseek, -}; - -static int dtf_results_show(struct seq_file *sf, void *data) +static int dmatest_run_set(const char *val, const struct kernel_param *kp) { - struct dmatest_info *info = sf->private; - struct dmatest_result *result; - struct dmatest_thread_result *tr; - unsigned int i; + struct dmatest_info *info = &test_info; + int ret; - mutex_lock(&info->results_lock); - list_for_each_entry(result, &info->results, node) { - list_for_each_entry(tr, &result->results, node) { - seq_printf(sf, "%s\n", - thread_result_get(result->name, tr)); - if (tr->type == DMATEST_ET_VERIFY_BUF) { - for (i = 0; i < tr->vr->error_count; i++) { - seq_printf(sf, "\t%s\n", - verify_result_get_one(tr->vr, i)); - } - } - } + mutex_lock(&info->lock); + ret = param_set_bool(val, kp); + if (ret) { + mutex_unlock(&info->lock); + return ret; } - mutex_unlock(&info->results_lock); - return 0; -} - -static int dtf_results_open(struct inode *inode, struct file *file) -{ - return single_open(file, dtf_results_show, inode->i_private); -} - -static const struct file_operations dtf_results_fops = { - .open = dtf_results_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int dmatest_register_dbgfs(struct dmatest_info *info) -{ - struct dentry *d; - - d = debugfs_create_dir("dmatest", NULL); - if (IS_ERR(d)) - return PTR_ERR(d); - if (!d) - goto err_root; + if (is_threaded_test_run(info)) + ret = -EBUSY; + else if (dmatest_run) + restart_threaded_test(info, dmatest_run); - info->root = d; - - /* Run or stop threaded test */ - debugfs_create_file("run", S_IWUSR | S_IRUGO, info->root, info, - &dtf_run_fops); - - /* Results of test in progress */ - debugfs_create_file("results", S_IRUGO, info->root, info, - &dtf_results_fops); - - return 0; + mutex_unlock(&info->lock); -err_root: - pr_err("dmatest: Failed to initialize debugfs\n"); - return -ENOMEM; + return ret; } static int __init dmatest_init(void) { struct dmatest_info *info = &test_info; - int ret; - - memset(info, 0, sizeof(*info)); + struct dmatest_params *params = &info->params; - mutex_init(&info->lock); - INIT_LIST_HEAD(&info->channels); + if (dmatest_run) { + mutex_lock(&info->lock); + run_threaded_test(info); + mutex_unlock(&info->lock); + } - mutex_init(&info->results_lock); - INIT_LIST_HEAD(&info->results); + if (params->iterations && wait) + wait_event(thread_wait, !is_threaded_test_run(info)); - ret = dmatest_register_dbgfs(info); - if (ret) - return ret; + /* module parameters are stable, inittime tests are started, + * let userspace take over 'run' control + */ + info->did_init = true; -#ifdef MODULE return 0; -#else - return run_threaded_test(info); -#endif } /* when compiled-in wait for drivers to load first */ late_initcall(dmatest_init); @@ -1189,9 +976,9 @@ static void __exit dmatest_exit(void) { struct dmatest_info *info = &test_info; - debugfs_remove_recursive(info->root); + mutex_lock(&info->lock); stop_threaded_test(info); - result_free(info, NULL); + mutex_unlock(&info->lock); } module_exit(dmatest_exit); diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c index 2c29331..7516be4 100644 --- a/drivers/dma/dw/core.c +++ b/drivers/dma/dw/core.c @@ -85,10 +85,6 @@ static struct device *chan2dev(struct dma_chan *chan) { return &chan->dev->device; } -static struct device *chan2parent(struct dma_chan *chan) -{ - return chan->dev->device.parent; -} static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) { @@ -311,26 +307,7 @@ dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc, list_splice_init(&desc->tx_list, &dwc->free_list); list_move(&desc->desc_node, &dwc->free_list); - if (!is_slave_direction(dwc->direction)) { - struct device *parent = chan2parent(&dwc->chan); - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(parent, desc->lli.dar, - desc->total_len, DMA_FROM_DEVICE); - else - dma_unmap_page(parent, desc->lli.dar, - desc->total_len, DMA_FROM_DEVICE); - } - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(parent, desc->lli.sar, - desc->total_len, DMA_TO_DEVICE); - else - dma_unmap_page(parent, desc->lli.sar, - desc->total_len, DMA_TO_DEVICE); - } - } - + dma_descriptor_unmap(txd); spin_unlock_irqrestore(&dwc->lock, flags); if (callback) diff --git a/drivers/dma/ep93xx_dma.c b/drivers/dma/ep93xx_dma.c index 591cd8c..cb4bf68 100644 --- a/drivers/dma/ep93xx_dma.c +++ b/drivers/dma/ep93xx_dma.c @@ -733,28 +733,6 @@ static void ep93xx_dma_advance_work(struct ep93xx_dma_chan *edmac) spin_unlock_irqrestore(&edmac->lock, flags); } -static void ep93xx_dma_unmap_buffers(struct ep93xx_dma_desc *desc) -{ - struct device *dev = desc->txd.chan->device->dev; - - if (!(desc->txd.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (desc->txd.flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(dev, desc->src_addr, desc->size, - DMA_TO_DEVICE); - else - dma_unmap_page(dev, desc->src_addr, desc->size, - DMA_TO_DEVICE); - } - if (!(desc->txd.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (desc->txd.flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(dev, desc->dst_addr, desc->size, - DMA_FROM_DEVICE); - else - dma_unmap_page(dev, desc->dst_addr, desc->size, - DMA_FROM_DEVICE); - } -} - static void ep93xx_dma_tasklet(unsigned long data) { struct ep93xx_dma_chan *edmac = (struct ep93xx_dma_chan *)data; @@ -787,13 +765,7 @@ static void ep93xx_dma_tasklet(unsigned long data) /* Now we can release all the chained descriptors */ list_for_each_entry_safe(desc, d, &list, node) { - /* - * For the memcpy channels the API requires us to unmap the - * buffers unless requested otherwise. - */ - if (!edmac->chan.private) - ep93xx_dma_unmap_buffers(desc); - + dma_descriptor_unmap(&desc->txd); ep93xx_dma_desc_put(edmac, desc); } diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 4e89812..bd7f888 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -868,22 +868,7 @@ static void fsldma_cleanup_descriptor(struct fsldma_chan *chan, /* Run any dependencies */ dma_run_dependencies(txd); - /* Unmap the dst buffer, if requested */ - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(dev, dst, len, DMA_FROM_DEVICE); - else - dma_unmap_page(dev, dst, len, DMA_FROM_DEVICE); - } - - /* Unmap the src buffer, if requested */ - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(dev, src, len, DMA_TO_DEVICE); - else - dma_unmap_page(dev, src, len, DMA_TO_DEVICE); - } - + dma_descriptor_unmap(txd); #ifdef FSL_DMA_LD_DEBUG chan_dbg(chan, "LD %p free\n", desc); #endif diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index a0f0fce..1a49c7776 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -531,21 +531,6 @@ static void ioat1_cleanup_event(unsigned long data) writew(IOAT_CHANCTRL_RUN, ioat->base.reg_base + IOAT_CHANCTRL_OFFSET); } -void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, - size_t len, struct ioat_dma_descriptor *hw) -{ - struct pci_dev *pdev = chan->device->pdev; - size_t offset = len - hw->size; - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) - ioat_unmap(pdev, hw->dst_addr - offset, len, - PCI_DMA_FROMDEVICE, flags, 1); - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) - ioat_unmap(pdev, hw->src_addr - offset, len, - PCI_DMA_TODEVICE, flags, 0); -} - dma_addr_t ioat_get_current_completion(struct ioat_chan_common *chan) { dma_addr_t phys_complete; @@ -602,7 +587,7 @@ static void __cleanup(struct ioat_dma_chan *ioat, dma_addr_t phys_complete) dump_desc_dbg(ioat, desc); if (tx->cookie) { dma_cookie_complete(tx); - ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + dma_descriptor_unmap(tx); ioat->active -= desc->hw->tx_cnt; if (tx->callback) { tx->callback(tx->callback_param); @@ -833,8 +818,7 @@ int ioat_dma_self_test(struct ioatdma_device *device) dma_src = dma_map_single(dev, src, IOAT_TEST_SIZE, DMA_TO_DEVICE); dma_dest = dma_map_single(dev, dest, IOAT_TEST_SIZE, DMA_FROM_DEVICE); - flags = DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP | - DMA_PREP_INTERRUPT; + flags = DMA_PREP_INTERRUPT; tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, IOAT_TEST_SIZE, flags); if (!tx) { @@ -885,8 +869,7 @@ static char ioat_interrupt_style[32] = "msix"; module_param_string(ioat_interrupt_style, ioat_interrupt_style, sizeof(ioat_interrupt_style), 0644); MODULE_PARM_DESC(ioat_interrupt_style, - "set ioat interrupt style: msix (default), " - "msix-single-vector, msi, intx)"); + "set ioat interrupt style: msix (default), msi, intx"); /** * ioat_dma_setup_interrupts - setup interrupt handler @@ -904,8 +887,6 @@ int ioat_dma_setup_interrupts(struct ioatdma_device *device) if (!strcmp(ioat_interrupt_style, "msix")) goto msix; - if (!strcmp(ioat_interrupt_style, "msix-single-vector")) - goto msix_single_vector; if (!strcmp(ioat_interrupt_style, "msi")) goto msi; if (!strcmp(ioat_interrupt_style, "intx")) @@ -920,10 +901,8 @@ msix: device->msix_entries[i].entry = i; err = pci_enable_msix(pdev, device->msix_entries, msixcnt); - if (err < 0) + if (err) goto msi; - if (err > 0) - goto msix_single_vector; for (i = 0; i < msixcnt; i++) { msix = &device->msix_entries[i]; @@ -937,29 +916,13 @@ msix: chan = ioat_chan_by_index(device, j); devm_free_irq(dev, msix->vector, chan); } - goto msix_single_vector; + goto msi; } } intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; device->irq_mode = IOAT_MSIX; goto done; -msix_single_vector: - msix = &device->msix_entries[0]; - msix->entry = 0; - err = pci_enable_msix(pdev, device->msix_entries, 1); - if (err) - goto msi; - - err = devm_request_irq(dev, msix->vector, ioat_dma_do_interrupt, 0, - "ioat-msix", device); - if (err) { - pci_disable_msix(pdev); - goto msi; - } - device->irq_mode = IOAT_MSIX_SINGLE; - goto done; - msi: err = pci_enable_msi(pdev); if (err) @@ -971,7 +934,7 @@ msi: pci_disable_msi(pdev); goto intx; } - device->irq_mode = IOAT_MSIX; + device->irq_mode = IOAT_MSI; goto done; intx: diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 54fb7b9..11fb877 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -52,7 +52,6 @@ enum ioat_irq_mode { IOAT_NOIRQ = 0, IOAT_MSIX, - IOAT_MSIX_SINGLE, IOAT_MSI, IOAT_INTX }; @@ -83,7 +82,6 @@ struct ioatdma_device { struct pci_pool *completion_pool; #define MAX_SED_POOLS 5 struct dma_pool *sed_hw_pool[MAX_SED_POOLS]; - struct kmem_cache *sed_pool; struct dma_device common; u8 version; struct msix_entry msix_entries[4]; @@ -342,16 +340,6 @@ static inline bool is_ioat_bug(unsigned long err) return !!err; } -static inline void ioat_unmap(struct pci_dev *pdev, dma_addr_t addr, size_t len, - int direction, enum dma_ctrl_flags flags, bool dst) -{ - if ((dst && (flags & DMA_COMPL_DEST_UNMAP_SINGLE)) || - (!dst && (flags & DMA_COMPL_SRC_UNMAP_SINGLE))) - pci_unmap_single(pdev, addr, len, direction); - else - pci_unmap_page(pdev, addr, len, direction); -} - int ioat_probe(struct ioatdma_device *device); int ioat_register(struct ioatdma_device *device); int ioat1_dma_probe(struct ioatdma_device *dev, int dca); @@ -363,8 +351,6 @@ void ioat_init_channel(struct ioatdma_device *device, struct ioat_chan_common *chan, int idx); enum dma_status ioat_dma_tx_status(struct dma_chan *c, dma_cookie_t cookie, struct dma_tx_state *txstate); -void ioat_dma_unmap(struct ioat_chan_common *chan, enum dma_ctrl_flags flags, - size_t len, struct ioat_dma_descriptor *hw); bool ioat_cleanup_preamble(struct ioat_chan_common *chan, dma_addr_t *phys_complete); void ioat_kobject_add(struct ioatdma_device *device, struct kobj_type *type); diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index b925e1b..5d3affe 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -148,7 +148,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) tx = &desc->txd; dump_desc_dbg(ioat, desc); if (tx->cookie) { - ioat_dma_unmap(chan, tx->flags, desc->len, desc->hw); + dma_descriptor_unmap(tx); dma_cookie_complete(tx); if (tx->callback) { tx->callback(tx->callback_param); diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 212d584..4702927 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -157,7 +157,6 @@ static inline void ioat2_set_chainaddr(struct ioat2_dma_chan *ioat, u64 addr) int ioat2_dma_probe(struct ioatdma_device *dev, int dca); int ioat3_dma_probe(struct ioatdma_device *dev, int dca); -void ioat3_dma_remove(struct ioatdma_device *dev); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); int ioat2_check_space_lock(struct ioat2_dma_chan *ioat, int num_descs); diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 806b4ce..820817e9 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -67,6 +67,8 @@ #include "dma.h" #include "dma_v2.h" +extern struct kmem_cache *ioat3_sed_cache; + /* ioat hardware assumes at least two sources for raid operations */ #define src_cnt_to_sw(x) ((x) + 2) #define src_cnt_to_hw(x) ((x) - 2) @@ -87,22 +89,8 @@ static const u8 pq_idx_to_field[] = { 1, 4, 5, 0, 1, 2, 4, 5 }; static const u8 pq16_idx_to_field[] = { 1, 4, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6 }; -/* - * technically sources 1 and 2 do not require SED, but the op will have - * at least 9 descriptors so that's irrelevant. - */ -static const u8 pq16_idx_to_sed[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, - 1, 1, 1, 1, 1, 1, 1 }; - static void ioat3_eh(struct ioat2_dma_chan *ioat); -static dma_addr_t xor_get_src(struct ioat_raw_descriptor *descs[2], int idx) -{ - struct ioat_raw_descriptor *raw = descs[xor_idx_to_desc >> idx & 1]; - - return raw->field[xor_idx_to_field[idx]]; -} - static void xor_set_src(struct ioat_raw_descriptor *descs[2], dma_addr_t addr, u32 offset, int idx) { @@ -135,12 +123,6 @@ static void pq_set_src(struct ioat_raw_descriptor *descs[2], pq->coef[idx] = coef; } -static int sed_get_pq16_pool_idx(int src_cnt) -{ - - return pq16_idx_to_sed[src_cnt]; -} - static bool is_jf_ioat(struct pci_dev *pdev) { switch (pdev->device) { @@ -272,7 +254,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) struct ioat_sed_ent *sed; gfp_t flags = __GFP_ZERO | GFP_ATOMIC; - sed = kmem_cache_alloc(device->sed_pool, flags); + sed = kmem_cache_alloc(ioat3_sed_cache, flags); if (!sed) return NULL; @@ -280,7 +262,7 @@ ioat3_alloc_sed(struct ioatdma_device *device, unsigned int hw_pool) sed->hw = dma_pool_alloc(device->sed_hw_pool[hw_pool], flags, &sed->dma); if (!sed->hw) { - kmem_cache_free(device->sed_pool, sed); + kmem_cache_free(ioat3_sed_cache, sed); return NULL; } @@ -293,165 +275,7 @@ static void ioat3_free_sed(struct ioatdma_device *device, struct ioat_sed_ent *s return; dma_pool_free(device->sed_hw_pool[sed->hw_pool], sed->hw, sed->dma); - kmem_cache_free(device->sed_pool, sed); -} - -static void ioat3_dma_unmap(struct ioat2_dma_chan *ioat, - struct ioat_ring_ent *desc, int idx) -{ - struct ioat_chan_common *chan = &ioat->base; - struct pci_dev *pdev = chan->device->pdev; - size_t len = desc->len; - size_t offset = len - desc->hw->size; - struct dma_async_tx_descriptor *tx = &desc->txd; - enum dma_ctrl_flags flags = tx->flags; - - switch (desc->hw->ctl_f.op) { - case IOAT_OP_COPY: - if (!desc->hw->ctl_f.null) /* skip 'interrupt' ops */ - ioat_dma_unmap(chan, flags, len, desc->hw); - break; - case IOAT_OP_XOR_VAL: - case IOAT_OP_XOR: { - struct ioat_xor_descriptor *xor = desc->xor; - struct ioat_ring_ent *ext; - struct ioat_xor_ext_descriptor *xor_ex = NULL; - int src_cnt = src_cnt_to_sw(xor->ctl_f.src_cnt); - struct ioat_raw_descriptor *descs[2]; - int i; - - if (src_cnt > 5) { - ext = ioat2_get_ring_ent(ioat, idx + 1); - xor_ex = ext->xor_ex; - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - descs[0] = (struct ioat_raw_descriptor *) xor; - descs[1] = (struct ioat_raw_descriptor *) xor_ex; - for (i = 0; i < src_cnt; i++) { - dma_addr_t src = xor_get_src(descs, i); - - ioat_unmap(pdev, src - offset, len, - PCI_DMA_TODEVICE, flags, 0); - } - - /* dest is a source in xor validate operations */ - if (xor->ctl_f.op == IOAT_OP_XOR_VAL) { - ioat_unmap(pdev, xor->dst_addr - offset, len, - PCI_DMA_TODEVICE, flags, 1); - break; - } - } - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) - ioat_unmap(pdev, xor->dst_addr - offset, len, - PCI_DMA_FROMDEVICE, flags, 1); - break; - } - case IOAT_OP_PQ_VAL: - case IOAT_OP_PQ: { - struct ioat_pq_descriptor *pq = desc->pq; - struct ioat_ring_ent *ext; - struct ioat_pq_ext_descriptor *pq_ex = NULL; - int src_cnt = src_cnt_to_sw(pq->ctl_f.src_cnt); - struct ioat_raw_descriptor *descs[2]; - int i; - - if (src_cnt > 3) { - ext = ioat2_get_ring_ent(ioat, idx + 1); - pq_ex = ext->pq_ex; - } - - /* in the 'continue' case don't unmap the dests as sources */ - if (dmaf_p_disabled_continue(flags)) - src_cnt--; - else if (dmaf_continue(flags)) - src_cnt -= 3; - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - descs[0] = (struct ioat_raw_descriptor *) pq; - descs[1] = (struct ioat_raw_descriptor *) pq_ex; - for (i = 0; i < src_cnt; i++) { - dma_addr_t src = pq_get_src(descs, i); - - ioat_unmap(pdev, src - offset, len, - PCI_DMA_TODEVICE, flags, 0); - } - - /* the dests are sources in pq validate operations */ - if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { - if (!(flags & DMA_PREP_PQ_DISABLE_P)) - ioat_unmap(pdev, pq->p_addr - offset, - len, PCI_DMA_TODEVICE, flags, 0); - if (!(flags & DMA_PREP_PQ_DISABLE_Q)) - ioat_unmap(pdev, pq->q_addr - offset, - len, PCI_DMA_TODEVICE, flags, 0); - break; - } - } - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (!(flags & DMA_PREP_PQ_DISABLE_P)) - ioat_unmap(pdev, pq->p_addr - offset, len, - PCI_DMA_BIDIRECTIONAL, flags, 1); - if (!(flags & DMA_PREP_PQ_DISABLE_Q)) - ioat_unmap(pdev, pq->q_addr - offset, len, - PCI_DMA_BIDIRECTIONAL, flags, 1); - } - break; - } - case IOAT_OP_PQ_16S: - case IOAT_OP_PQ_VAL_16S: { - struct ioat_pq_descriptor *pq = desc->pq; - int src_cnt = src16_cnt_to_sw(pq->ctl_f.src_cnt); - struct ioat_raw_descriptor *descs[4]; - int i; - - /* in the 'continue' case don't unmap the dests as sources */ - if (dmaf_p_disabled_continue(flags)) - src_cnt--; - else if (dmaf_continue(flags)) - src_cnt -= 3; - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - descs[0] = (struct ioat_raw_descriptor *)pq; - descs[1] = (struct ioat_raw_descriptor *)(desc->sed->hw); - descs[2] = (struct ioat_raw_descriptor *)(&desc->sed->hw->b[0]); - for (i = 0; i < src_cnt; i++) { - dma_addr_t src = pq16_get_src(descs, i); - - ioat_unmap(pdev, src - offset, len, - PCI_DMA_TODEVICE, flags, 0); - } - - /* the dests are sources in pq validate operations */ - if (pq->ctl_f.op == IOAT_OP_XOR_VAL) { - if (!(flags & DMA_PREP_PQ_DISABLE_P)) - ioat_unmap(pdev, pq->p_addr - offset, - len, PCI_DMA_TODEVICE, - flags, 0); - if (!(flags & DMA_PREP_PQ_DISABLE_Q)) - ioat_unmap(pdev, pq->q_addr - offset, - len, PCI_DMA_TODEVICE, - flags, 0); - break; - } - } - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - if (!(flags & DMA_PREP_PQ_DISABLE_P)) - ioat_unmap(pdev, pq->p_addr - offset, len, - PCI_DMA_BIDIRECTIONAL, flags, 1); - if (!(flags & DMA_PREP_PQ_DISABLE_Q)) - ioat_unmap(pdev, pq->q_addr - offset, len, - PCI_DMA_BIDIRECTIONAL, flags, 1); - } - break; - } - default: - dev_err(&pdev->dev, "%s: unknown op type: %#x\n", - __func__, desc->hw->ctl_f.op); - } + kmem_cache_free(ioat3_sed_cache, sed); } static bool desc_has_ext(struct ioat_ring_ent *desc) @@ -577,7 +401,7 @@ static void __cleanup(struct ioat2_dma_chan *ioat, dma_addr_t phys_complete) tx = &desc->txd; if (tx->cookie) { dma_cookie_complete(tx); - ioat3_dma_unmap(ioat, desc, idx + i); + dma_descriptor_unmap(tx); if (tx->callback) { tx->callback(tx->callback_param); tx->callback = NULL; @@ -1129,9 +953,6 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, u8 op; int i, s, idx, num_descs; - /* this function only handles src_cnt 9 - 16 */ - BUG_ON(src_cnt < 9); - /* this function is only called with 9-16 sources */ op = result ? IOAT_OP_PQ_VAL_16S : IOAT_OP_PQ_16S; @@ -1159,8 +980,7 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, descs[0] = (struct ioat_raw_descriptor *) pq; - desc->sed = ioat3_alloc_sed(device, - sed_get_pq16_pool_idx(src_cnt)); + desc->sed = ioat3_alloc_sed(device, (src_cnt-2) >> 3); if (!desc->sed) { dev_err(to_dev(chan), "%s: no free sed entries\n", __func__); @@ -1218,13 +1038,21 @@ __ioat3_prep_pq16_lock(struct dma_chan *c, enum sum_check_flags *result, return &desc->txd; } +static int src_cnt_flags(unsigned int src_cnt, unsigned long flags) +{ + if (dmaf_p_disabled_continue(flags)) + return src_cnt + 1; + else if (dmaf_continue(flags)) + return src_cnt + 3; + else + return src_cnt; +} + static struct dma_async_tx_descriptor * ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, unsigned long flags) { - struct dma_device *dma = chan->device; - /* specify valid address for disabled result */ if (flags & DMA_PREP_PQ_DISABLE_P) dst[0] = dst[1]; @@ -1244,7 +1072,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, single_source_coef[0] = scf[0]; single_source_coef[1] = 0; - return (src_cnt > 8) && (dma->max_pq > 8) ? + return src_cnt_flags(src_cnt, flags) > 8 ? __ioat3_prep_pq16_lock(chan, NULL, dst, single_source, 2, single_source_coef, len, flags) : @@ -1252,7 +1080,7 @@ ioat3_prep_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, single_source_coef, len, flags); } else { - return (src_cnt > 8) && (dma->max_pq > 8) ? + return src_cnt_flags(src_cnt, flags) > 8 ? __ioat3_prep_pq16_lock(chan, NULL, dst, src, src_cnt, scf, len, flags) : __ioat3_prep_pq_lock(chan, NULL, dst, src, src_cnt, @@ -1265,8 +1093,6 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, unsigned int src_cnt, const unsigned char *scf, size_t len, enum sum_check_flags *pqres, unsigned long flags) { - struct dma_device *dma = chan->device; - /* specify valid address for disabled result */ if (flags & DMA_PREP_PQ_DISABLE_P) pq[0] = pq[1]; @@ -1278,7 +1104,7 @@ ioat3_prep_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, */ *pqres = 0; - return (src_cnt > 8) && (dma->max_pq > 8) ? + return src_cnt_flags(src_cnt, flags) > 8 ? __ioat3_prep_pq16_lock(chan, pqres, pq, src, src_cnt, scf, len, flags) : __ioat3_prep_pq_lock(chan, pqres, pq, src, src_cnt, scf, len, @@ -1289,7 +1115,6 @@ static struct dma_async_tx_descriptor * ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, unsigned int src_cnt, size_t len, unsigned long flags) { - struct dma_device *dma = chan->device; unsigned char scf[src_cnt]; dma_addr_t pq[2]; @@ -1298,7 +1123,7 @@ ioat3_prep_pqxor(struct dma_chan *chan, dma_addr_t dst, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = dst; /* specify valid address for disabled result */ - return (src_cnt > 8) && (dma->max_pq > 8) ? + return src_cnt_flags(src_cnt, flags) > 8 ? __ioat3_prep_pq16_lock(chan, NULL, pq, src, src_cnt, scf, len, flags) : __ioat3_prep_pq_lock(chan, NULL, pq, src, src_cnt, scf, len, @@ -1310,7 +1135,6 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, unsigned int src_cnt, size_t len, enum sum_check_flags *result, unsigned long flags) { - struct dma_device *dma = chan->device; unsigned char scf[src_cnt]; dma_addr_t pq[2]; @@ -1324,8 +1148,7 @@ ioat3_prep_pqxor_val(struct dma_chan *chan, dma_addr_t *src, flags |= DMA_PREP_PQ_DISABLE_Q; pq[1] = pq[0]; /* specify valid address for disabled result */ - - return (src_cnt > 8) && (dma->max_pq > 8) ? + return src_cnt_flags(src_cnt, flags) > 8 ? __ioat3_prep_pq16_lock(chan, result, pq, &src[1], src_cnt - 1, scf, len, flags) : __ioat3_prep_pq_lock(chan, result, pq, &src[1], src_cnt - 1, @@ -1444,9 +1267,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) DMA_TO_DEVICE); tx = dma->device_prep_dma_xor(dma_chan, dest_dma, dma_srcs, IOAT_NUM_SRC_TEST, PAGE_SIZE, - DMA_PREP_INTERRUPT | - DMA_COMPL_SKIP_SRC_UNMAP | - DMA_COMPL_SKIP_DEST_UNMAP); + DMA_PREP_INTERRUPT); if (!tx) { dev_err(dev, "Self-test xor prep failed\n"); @@ -1507,9 +1328,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) DMA_TO_DEVICE); tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT | - DMA_COMPL_SKIP_SRC_UNMAP | - DMA_COMPL_SKIP_DEST_UNMAP); + &xor_val_result, DMA_PREP_INTERRUPT); if (!tx) { dev_err(dev, "Self-test zero prep failed\n"); err = -ENODEV; @@ -1545,6 +1364,8 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) goto free_resources; } + memset(page_address(dest), 0, PAGE_SIZE); + /* test for non-zero parity sum */ op = IOAT_OP_XOR_VAL; @@ -1554,9 +1375,7 @@ static int ioat_xor_val_self_test(struct ioatdma_device *device) DMA_TO_DEVICE); tx = dma->device_prep_dma_xor_val(dma_chan, dma_srcs, IOAT_NUM_SRC_TEST + 1, PAGE_SIZE, - &xor_val_result, DMA_PREP_INTERRUPT | - DMA_COMPL_SKIP_SRC_UNMAP | - DMA_COMPL_SKIP_DEST_UNMAP); + &xor_val_result, DMA_PREP_INTERRUPT); if (!tx) { dev_err(dev, "Self-test 2nd zero prep failed\n"); err = -ENODEV; @@ -1630,52 +1449,36 @@ static int ioat3_dma_self_test(struct ioatdma_device *device) static int ioat3_irq_reinit(struct ioatdma_device *device) { - int msixcnt = device->common.chancnt; struct pci_dev *pdev = device->pdev; - int i; - struct msix_entry *msix; - struct ioat_chan_common *chan; - int err = 0; + int irq = pdev->irq, i; + + if (!is_bwd_ioat(pdev)) + return 0; switch (device->irq_mode) { case IOAT_MSIX: + for (i = 0; i < device->common.chancnt; i++) { + struct msix_entry *msix = &device->msix_entries[i]; + struct ioat_chan_common *chan; - for (i = 0; i < msixcnt; i++) { - msix = &device->msix_entries[i]; chan = ioat_chan_by_index(device, i); devm_free_irq(&pdev->dev, msix->vector, chan); } pci_disable_msix(pdev); break; - - case IOAT_MSIX_SINGLE: - msix = &device->msix_entries[0]; - chan = ioat_chan_by_index(device, 0); - devm_free_irq(&pdev->dev, msix->vector, chan); - pci_disable_msix(pdev); - break; - case IOAT_MSI: - chan = ioat_chan_by_index(device, 0); - devm_free_irq(&pdev->dev, pdev->irq, chan); pci_disable_msi(pdev); - break; - + /* fall through */ case IOAT_INTX: - chan = ioat_chan_by_index(device, 0); - devm_free_irq(&pdev->dev, pdev->irq, chan); + devm_free_irq(&pdev->dev, irq, device); break; - default: return 0; } - device->irq_mode = IOAT_NOIRQ; - err = ioat_dma_setup_interrupts(device); - - return err; + return ioat_dma_setup_interrupts(device); } static int ioat3_reset_hw(struct ioat_chan_common *chan) @@ -1718,14 +1521,12 @@ static int ioat3_reset_hw(struct ioat_chan_common *chan) } err = ioat2_reset_sync(chan, msecs_to_jiffies(200)); - if (err) { - dev_err(&pdev->dev, "Failed to reset!\n"); - return err; - } - - if (device->irq_mode != IOAT_NOIRQ && is_bwd_ioat(pdev)) + if (!err) err = ioat3_irq_reinit(device); + if (err) + dev_err(&pdev->dev, "Failed to reset: %d\n", err); + return err; } @@ -1835,21 +1636,15 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) char pool_name[14]; int i; - /* allocate sw descriptor pool for SED */ - device->sed_pool = kmem_cache_create("ioat_sed", - sizeof(struct ioat_sed_ent), 0, 0, NULL); - if (!device->sed_pool) - return -ENOMEM; - for (i = 0; i < MAX_SED_POOLS; i++) { snprintf(pool_name, 14, "ioat_hw%d_sed", i); /* allocate SED DMA pool */ - device->sed_hw_pool[i] = dma_pool_create(pool_name, + device->sed_hw_pool[i] = dmam_pool_create(pool_name, &pdev->dev, SED_SIZE * (i + 1), 64, 0); if (!device->sed_hw_pool[i]) - goto sed_pool_cleanup; + return -ENOMEM; } } @@ -1875,28 +1670,4 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca) device->dca = ioat3_dca_init(pdev, device->reg_base); return 0; - -sed_pool_cleanup: - if (device->sed_pool) { - int i; - kmem_cache_destroy(device->sed_pool); - - for (i = 0; i < MAX_SED_POOLS; i++) - if (device->sed_hw_pool[i]) - dma_pool_destroy(device->sed_hw_pool[i]); - } - - return -ENOMEM; -} - -void ioat3_dma_remove(struct ioatdma_device *device) -{ - if (device->sed_pool) { - int i; - kmem_cache_destroy(device->sed_pool); - - for (i = 0; i < MAX_SED_POOLS; i++) - if (device->sed_hw_pool[i]) - dma_pool_destroy(device->sed_hw_pool[i]); - } } diff --git a/drivers/dma/ioat/pci.c b/drivers/dma/ioat/pci.c index 2c8d560..1d051cd 100644 --- a/drivers/dma/ioat/pci.c +++ b/drivers/dma/ioat/pci.c @@ -123,6 +123,7 @@ module_param(ioat_dca_enabled, int, 0644); MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); struct kmem_cache *ioat2_cache; +struct kmem_cache *ioat3_sed_cache; #define DRV_NAME "ioatdma" @@ -207,9 +208,6 @@ static void ioat_remove(struct pci_dev *pdev) if (!device) return; - if (device->version >= IOAT_VER_3_0) - ioat3_dma_remove(device); - dev_err(&pdev->dev, "Removing dma and dca services\n"); if (device->dca) { unregister_dca_provider(device->dca, &pdev->dev); @@ -221,7 +219,7 @@ static void ioat_remove(struct pci_dev *pdev) static int __init ioat_init_module(void) { - int err; + int err = -ENOMEM; pr_info("%s: Intel(R) QuickData Technology Driver %s\n", DRV_NAME, IOAT_DMA_VERSION); @@ -231,9 +229,21 @@ static int __init ioat_init_module(void) if (!ioat2_cache) return -ENOMEM; + ioat3_sed_cache = KMEM_CACHE(ioat_sed_ent, 0); + if (!ioat3_sed_cache) + goto err_ioat2_cache; + err = pci_register_driver(&ioat_pci_driver); if (err) - kmem_cache_destroy(ioat2_cache); + goto err_ioat3_cache; + + return 0; + + err_ioat3_cache: + kmem_cache_destroy(ioat3_sed_cache); + + err_ioat2_cache: + kmem_cache_destroy(ioat2_cache); return err; } diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 408fe6b..c56137b 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -61,80 +61,6 @@ static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) } } -static void -iop_desc_unmap(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) -{ - struct dma_async_tx_descriptor *tx = &desc->async_tx; - struct iop_adma_desc_slot *unmap = desc->group_head; - struct device *dev = &iop_chan->device->pdev->dev; - u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = tx->flags; - u32 src_cnt; - dma_addr_t addr; - dma_addr_t dest; - - src_cnt = unmap->unmap_src_cnt; - dest = iop_desc_get_dest_addr(unmap, iop_chan); - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - enum dma_data_direction dir; - - if (src_cnt > 1) /* is xor? */ - dir = DMA_BIDIRECTIONAL; - else - dir = DMA_FROM_DEVICE; - - dma_unmap_page(dev, dest, len, dir); - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - while (src_cnt--) { - addr = iop_desc_get_src_addr(unmap, iop_chan, src_cnt); - if (addr == dest) - continue; - dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); - } - } - desc->group_head = NULL; -} - -static void -iop_desc_unmap_pq(struct iop_adma_chan *iop_chan, struct iop_adma_desc_slot *desc) -{ - struct dma_async_tx_descriptor *tx = &desc->async_tx; - struct iop_adma_desc_slot *unmap = desc->group_head; - struct device *dev = &iop_chan->device->pdev->dev; - u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = tx->flags; - u32 src_cnt = unmap->unmap_src_cnt; - dma_addr_t pdest = iop_desc_get_dest_addr(unmap, iop_chan); - dma_addr_t qdest = iop_desc_get_qdest_addr(unmap, iop_chan); - int i; - - if (tx->flags & DMA_PREP_CONTINUE) - src_cnt -= 3; - - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP) && !desc->pq_check_result) { - dma_unmap_page(dev, pdest, len, DMA_BIDIRECTIONAL); - dma_unmap_page(dev, qdest, len, DMA_BIDIRECTIONAL); - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - dma_addr_t addr; - - for (i = 0; i < src_cnt; i++) { - addr = iop_desc_get_src_addr(unmap, iop_chan, i); - dma_unmap_page(dev, addr, len, DMA_TO_DEVICE); - } - if (desc->pq_check_result) { - dma_unmap_page(dev, pdest, len, DMA_TO_DEVICE); - dma_unmap_page(dev, qdest, len, DMA_TO_DEVICE); - } - } - - desc->group_head = NULL; -} - - static dma_cookie_t iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct iop_adma_chan *iop_chan, dma_cookie_t cookie) @@ -152,15 +78,9 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, if (tx->callback) tx->callback(tx->callback_param); - /* unmap dma addresses - * (unmap_single vs unmap_page?) - */ - if (desc->group_head && desc->unmap_len) { - if (iop_desc_is_pq(desc)) - iop_desc_unmap_pq(iop_chan, desc); - else - iop_desc_unmap(iop_chan, desc); - } + dma_descriptor_unmap(tx); + if (desc->group_head) + desc->group_head = NULL; } /* run dependent operations */ @@ -591,7 +511,6 @@ iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) if (sw_desc) { grp_start = sw_desc->group_head; iop_desc_init_interrupt(grp_start, iop_chan); - grp_start->unmap_len = 0; sw_desc->async_tx.flags = flags; } spin_unlock_bh(&iop_chan->lock); @@ -623,8 +542,6 @@ iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, iop_desc_set_byte_count(grp_start, iop_chan, len); iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); iop_desc_set_memcpy_src_addr(grp_start, dma_src); - sw_desc->unmap_src_cnt = 1; - sw_desc->unmap_len = len; sw_desc->async_tx.flags = flags; } spin_unlock_bh(&iop_chan->lock); @@ -657,8 +574,6 @@ iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, iop_desc_init_xor(grp_start, src_cnt, flags); iop_desc_set_byte_count(grp_start, iop_chan, len); iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); - sw_desc->unmap_src_cnt = src_cnt; - sw_desc->unmap_len = len; sw_desc->async_tx.flags = flags; while (src_cnt--) iop_desc_set_xor_src_addr(grp_start, src_cnt, @@ -694,8 +609,6 @@ iop_adma_prep_dma_xor_val(struct dma_chan *chan, dma_addr_t *dma_src, grp_start->xor_check_result = result; pr_debug("\t%s: grp_start->xor_check_result: %p\n", __func__, grp_start->xor_check_result); - sw_desc->unmap_src_cnt = src_cnt; - sw_desc->unmap_len = len; sw_desc->async_tx.flags = flags; while (src_cnt--) iop_desc_set_zero_sum_src_addr(grp_start, src_cnt, @@ -748,8 +661,6 @@ iop_adma_prep_dma_pq(struct dma_chan *chan, dma_addr_t *dst, dma_addr_t *src, dst[0] = dst[1] & 0x7; iop_desc_set_pq_addr(g, dst); - sw_desc->unmap_src_cnt = src_cnt; - sw_desc->unmap_len = len; sw_desc->async_tx.flags = flags; for (i = 0; i < src_cnt; i++) iop_desc_set_pq_src_addr(g, i, src[i], scf[i]); @@ -804,8 +715,6 @@ iop_adma_prep_dma_pq_val(struct dma_chan *chan, dma_addr_t *pq, dma_addr_t *src, g->pq_check_result = pqres; pr_debug("\t%s: g->pq_check_result: %p\n", __func__, g->pq_check_result); - sw_desc->unmap_src_cnt = src_cnt+2; - sw_desc->unmap_len = len; sw_desc->async_tx.flags = flags; while (src_cnt--) iop_desc_set_pq_zero_sum_src_addr(g, src_cnt, diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c index 8d5bce9..7807f0e 100644 --- a/drivers/dma/mv_xor.c +++ b/drivers/dma/mv_xor.c @@ -60,14 +60,6 @@ static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc) return hw_desc->phy_dest_addr; } -static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc, - int src_idx) -{ - struct mv_xor_desc *hw_desc = desc->hw_desc; - return hw_desc->phy_src_addr[mv_phy_src_idx(src_idx)]; -} - - static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc, u32 byte_count) { @@ -278,42 +270,9 @@ mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, desc->async_tx.callback( desc->async_tx.callback_param); - /* unmap dma addresses - * (unmap_single vs unmap_page?) - */ - if (desc->group_head && desc->unmap_len) { - struct mv_xor_desc_slot *unmap = desc->group_head; - struct device *dev = mv_chan_to_devp(mv_chan); - u32 len = unmap->unmap_len; - enum dma_ctrl_flags flags = desc->async_tx.flags; - u32 src_cnt; - dma_addr_t addr; - dma_addr_t dest; - - src_cnt = unmap->unmap_src_cnt; - dest = mv_desc_get_dest_addr(unmap); - if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - enum dma_data_direction dir; - - if (src_cnt > 1) /* is xor ? */ - dir = DMA_BIDIRECTIONAL; - else - dir = DMA_FROM_DEVICE; - dma_unmap_page(dev, dest, len, dir); - } - - if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - while (src_cnt--) { - addr = mv_desc_get_src_addr(unmap, - src_cnt); - if (addr == dest) - continue; - dma_unmap_page(dev, addr, len, - DMA_TO_DEVICE); - } - } + dma_descriptor_unmap(&desc->async_tx); + if (desc->group_head) desc->group_head = NULL; - } } /* run dependent operations */ @@ -1076,10 +1035,7 @@ mv_xor_channel_add(struct mv_xor_device *xordev, } mv_chan->mmr_base = xordev->xor_base; - if (!mv_chan->mmr_base) { - ret = -ENOMEM; - goto err_free_dma; - } + mv_chan->mmr_high_base = xordev->xor_high_base; tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long) mv_chan); @@ -1138,7 +1094,7 @@ static void mv_xor_conf_mbus_windows(struct mv_xor_device *xordev, const struct mbus_dram_target_info *dram) { - void __iomem *base = xordev->xor_base; + void __iomem *base = xordev->xor_high_base; u32 win_enable = 0; int i; diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h index 06b067f..d074922 100644 --- a/drivers/dma/mv_xor.h +++ b/drivers/dma/mv_xor.h @@ -34,13 +34,13 @@ #define XOR_OPERATION_MODE_MEMCPY 2 #define XOR_DESCRIPTOR_SWAP BIT(14) -#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4)) -#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4)) -#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4)) -#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4)) -#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4)) -#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0) -#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4) +#define XOR_CURR_DESC(chan) (chan->mmr_high_base + 0x10 + (chan->idx * 4)) +#define XOR_NEXT_DESC(chan) (chan->mmr_high_base + 0x00 + (chan->idx * 4)) +#define XOR_BYTE_COUNT(chan) (chan->mmr_high_base + 0x20 + (chan->idx * 4)) +#define XOR_DEST_POINTER(chan) (chan->mmr_high_base + 0xB0 + (chan->idx * 4)) +#define XOR_BLOCK_SIZE(chan) (chan->mmr_high_base + 0xC0 + (chan->idx * 4)) +#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_high_base + 0xE0) +#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_high_base + 0xE4) #define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4)) #define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4)) @@ -50,11 +50,11 @@ #define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60) #define XOR_INTR_MASK_VALUE 0x3F5 -#define WINDOW_BASE(w) (0x250 + ((w) << 2)) -#define WINDOW_SIZE(w) (0x270 + ((w) << 2)) -#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2)) -#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2)) -#define WINDOW_OVERRIDE_CTRL(chan) (0x2A0 + ((chan) << 2)) +#define WINDOW_BASE(w) (0x50 + ((w) << 2)) +#define WINDOW_SIZE(w) (0x70 + ((w) << 2)) +#define WINDOW_REMAP_HIGH(w) (0x90 + ((w) << 2)) +#define WINDOW_BAR_ENABLE(chan) (0x40 + ((chan) << 2)) +#define WINDOW_OVERRIDE_CTRL(chan) (0xA0 + ((chan) << 2)) struct mv_xor_device { void __iomem *xor_base; @@ -82,6 +82,7 @@ struct mv_xor_chan { int pending; spinlock_t lock; /* protects the descriptor slot pool */ void __iomem *mmr_base; + void __iomem *mmr_high_base; unsigned int idx; int irq; enum dma_transaction_type current_type; diff --git a/drivers/dma/pl330.c b/drivers/dma/pl330.c index 0ee2c54..877d7bb 100644 --- a/drivers/dma/pl330.c +++ b/drivers/dma/pl330.c @@ -2268,6 +2268,8 @@ static void pl330_tasklet(unsigned long data) list_move_tail(&desc->node, &pch->dmac->desc_pool); } + dma_descriptor_unmap(&desc->txd); + if (callback) { spin_unlock_irqrestore(&pch->lock, flags); callback(callback_param); diff --git a/drivers/dma/ppc4xx/adma.c b/drivers/dma/ppc4xx/adma.c index 60e02ae..3005938 100644 --- a/drivers/dma/ppc4xx/adma.c +++ b/drivers/dma/ppc4xx/adma.c @@ -802,218 +802,6 @@ static void ppc440spe_desc_set_link(struct ppc440spe_adma_chan *chan, } /** - * ppc440spe_desc_get_src_addr - extract the source address from the descriptor - */ -static u32 ppc440spe_desc_get_src_addr(struct ppc440spe_adma_desc_slot *desc, - struct ppc440spe_adma_chan *chan, int src_idx) -{ - struct dma_cdb *dma_hw_desc; - struct xor_cb *xor_hw_desc; - - switch (chan->device->id) { - case PPC440SPE_DMA0_ID: - case PPC440SPE_DMA1_ID: - dma_hw_desc = desc->hw_desc; - /* May have 0, 1, 2, or 3 sources */ - switch (dma_hw_desc->opc) { - case DMA_CDB_OPC_NO_OP: - case DMA_CDB_OPC_DFILL128: - return 0; - case DMA_CDB_OPC_DCHECK128: - if (unlikely(src_idx)) { - printk(KERN_ERR "%s: try to get %d source for" - " DCHECK128\n", __func__, src_idx); - BUG(); - } - return le32_to_cpu(dma_hw_desc->sg1l); - case DMA_CDB_OPC_MULTICAST: - case DMA_CDB_OPC_MV_SG1_SG2: - if (unlikely(src_idx > 2)) { - printk(KERN_ERR "%s: try to get %d source from" - " DMA descr\n", __func__, src_idx); - BUG(); - } - if (src_idx) { - if (le32_to_cpu(dma_hw_desc->sg1u) & - DMA_CUED_XOR_WIN_MSK) { - u8 region; - - if (src_idx == 1) - return le32_to_cpu( - dma_hw_desc->sg1l) + - desc->unmap_len; - - region = (le32_to_cpu( - dma_hw_desc->sg1u)) >> - DMA_CUED_REGION_OFF; - - region &= DMA_CUED_REGION_MSK; - switch (region) { - case DMA_RXOR123: - return le32_to_cpu( - dma_hw_desc->sg1l) + - (desc->unmap_len << 1); - case DMA_RXOR124: - return le32_to_cpu( - dma_hw_desc->sg1l) + - (desc->unmap_len * 3); - case DMA_RXOR125: - return le32_to_cpu( - dma_hw_desc->sg1l) + - (desc->unmap_len << 2); - default: - printk(KERN_ERR - "%s: try to" - " get src3 for region %02x" - "PPC440SPE_DESC_RXOR12?\n", - __func__, region); - BUG(); - } - } else { - printk(KERN_ERR - "%s: try to get %d" - " source for non-cued descr\n", - __func__, src_idx); - BUG(); - } - } - return le32_to_cpu(dma_hw_desc->sg1l); - default: - printk(KERN_ERR "%s: unknown OPC 0x%02x\n", - __func__, dma_hw_desc->opc); - BUG(); - } - return le32_to_cpu(dma_hw_desc->sg1l); - case PPC440SPE_XOR_ID: - /* May have up to 16 sources */ - xor_hw_desc = desc->hw_desc; - return xor_hw_desc->ops[src_idx].l; - } - return 0; -} - -/** - * ppc440spe_desc_get_dest_addr - extract the destination address from the - * descriptor - */ -static u32 ppc440spe_desc_get_dest_addr(struct ppc440spe_adma_desc_slot *desc, - struct ppc440spe_adma_chan *chan, int idx) -{ - struct dma_cdb *dma_hw_desc; - struct xor_cb *xor_hw_desc; - - switch (chan->device->id) { - case PPC440SPE_DMA0_ID: - case PPC440SPE_DMA1_ID: - dma_hw_desc = desc->hw_desc; - - if (likely(!idx)) - return le32_to_cpu(dma_hw_desc->sg2l); - return le32_to_cpu(dma_hw_desc->sg3l); - case PPC440SPE_XOR_ID: - xor_hw_desc = desc->hw_desc; - return xor_hw_desc->cbtal; - } - return 0; -} - -/** - * ppc440spe_desc_get_src_num - extract the number of source addresses from - * the descriptor - */ -static u32 ppc440spe_desc_get_src_num(struct ppc440spe_adma_desc_slot *desc, - struct ppc440spe_adma_chan *chan) -{ - struct dma_cdb *dma_hw_desc; - struct xor_cb *xor_hw_desc; - - switch (chan->device->id) { - case PPC440SPE_DMA0_ID: - case PPC440SPE_DMA1_ID: - dma_hw_desc = desc->hw_desc; - - switch (dma_hw_desc->opc) { - case DMA_CDB_OPC_NO_OP: - case DMA_CDB_OPC_DFILL128: - return 0; - case DMA_CDB_OPC_DCHECK128: - return 1; - case DMA_CDB_OPC_MV_SG1_SG2: - case DMA_CDB_OPC_MULTICAST: - /* - * Only for RXOR operations we have more than - * one source - */ - if (le32_to_cpu(dma_hw_desc->sg1u) & - DMA_CUED_XOR_WIN_MSK) { - /* RXOR op, there are 2 or 3 sources */ - if (((le32_to_cpu(dma_hw_desc->sg1u) >> - DMA_CUED_REGION_OFF) & - DMA_CUED_REGION_MSK) == DMA_RXOR12) { - /* RXOR 1-2 */ - return 2; - } else { - /* RXOR 1-2-3/1-2-4/1-2-5 */ - return 3; - } - } - return 1; - default: - printk(KERN_ERR "%s: unknown OPC 0x%02x\n", - __func__, dma_hw_desc->opc); - BUG(); - } - case PPC440SPE_XOR_ID: - /* up to 16 sources */ - xor_hw_desc = desc->hw_desc; - return xor_hw_desc->cbc & XOR_CDCR_OAC_MSK; - default: - BUG(); - } - return 0; -} - -/** - * ppc440spe_desc_get_dst_num - get the number of destination addresses in - * this descriptor - */ -static u32 ppc440spe_desc_get_dst_num(struct ppc440spe_adma_desc_slot *desc, - struct ppc440spe_adma_chan *chan) -{ - struct dma_cdb *dma_hw_desc; - - switch (chan->device->id) { - case PPC440SPE_DMA0_ID: - case PPC440SPE_DMA1_ID: - /* May be 1 or 2 destinations */ - dma_hw_desc = desc->hw_desc; - switch (dma_hw_desc->opc) { - case DMA_CDB_OPC_NO_OP: - case DMA_CDB_OPC_DCHECK128: - return 0; - case DMA_CDB_OPC_MV_SG1_SG2: - case DMA_CDB_OPC_DFILL128: - return 1; - case DMA_CDB_OPC_MULTICAST: - if (desc->dst_cnt == 2) - return 2; - else - return 1; - default: - printk(KERN_ERR "%s: unknown OPC 0x%02x\n", - __func__, dma_hw_desc->opc); - BUG(); - } - case PPC440SPE_XOR_ID: - /* Always only 1 destination */ - return 1; - default: - BUG(); - } - return 0; -} - -/** * ppc440spe_desc_get_link - get the address of the descriptor that * follows this one */ @@ -1705,43 +1493,6 @@ static void ppc440spe_adma_free_slots(struct ppc440spe_adma_desc_slot *slot, } } -static void ppc440spe_adma_unmap(struct ppc440spe_adma_chan *chan, - struct ppc440spe_adma_desc_slot *desc) -{ - u32 src_cnt, dst_cnt; - dma_addr_t addr; - - /* - * get the number of sources & destination - * included in this descriptor and unmap - * them all - */ - src_cnt = ppc440spe_desc_get_src_num(desc, chan); - dst_cnt = ppc440spe_desc_get_dst_num(desc, chan); - - /* unmap destinations */ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - while (dst_cnt--) { - addr = ppc440spe_desc_get_dest_addr( - desc, chan, dst_cnt); - dma_unmap_page(chan->device->dev, - addr, desc->unmap_len, - DMA_FROM_DEVICE); - } - } - - /* unmap sources */ - if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - while (src_cnt--) { - addr = ppc440spe_desc_get_src_addr( - desc, chan, src_cnt); - dma_unmap_page(chan->device->dev, - addr, desc->unmap_len, - DMA_TO_DEVICE); - } - } -} - /** * ppc440spe_adma_run_tx_complete_actions - call functions to be called * upon completion @@ -1765,26 +1516,7 @@ static dma_cookie_t ppc440spe_adma_run_tx_complete_actions( desc->async_tx.callback( desc->async_tx.callback_param); - /* unmap dma addresses - * (unmap_single vs unmap_page?) - * - * actually, ppc's dma_unmap_page() functions are empty, so - * the following code is just for the sake of completeness - */ - if (chan && chan->needs_unmap && desc->group_head && - desc->unmap_len) { - struct ppc440spe_adma_desc_slot *unmap = - desc->group_head; - /* assume 1 slot per op always */ - u32 slot_count = unmap->slot_cnt; - - /* Run through the group list and unmap addresses */ - for (i = 0; i < slot_count; i++) { - BUG_ON(!unmap); - ppc440spe_adma_unmap(chan, unmap); - unmap = unmap->hw_next; - } - } + dma_descriptor_unmap(&desc->async_tx); } /* run dependent operations */ diff --git a/drivers/dma/timb_dma.c b/drivers/dma/timb_dma.c index 28af214..4506a7b 100644 --- a/drivers/dma/timb_dma.c +++ b/drivers/dma/timb_dma.c @@ -154,38 +154,6 @@ static bool __td_dma_done_ack(struct timb_dma_chan *td_chan) return done; } -static void __td_unmap_desc(struct timb_dma_chan *td_chan, const u8 *dma_desc, - bool single) -{ - dma_addr_t addr; - int len; - - addr = (dma_desc[7] << 24) | (dma_desc[6] << 16) | (dma_desc[5] << 8) | - dma_desc[4]; - - len = (dma_desc[3] << 8) | dma_desc[2]; - - if (single) - dma_unmap_single(chan2dev(&td_chan->chan), addr, len, - DMA_TO_DEVICE); - else - dma_unmap_page(chan2dev(&td_chan->chan), addr, len, - DMA_TO_DEVICE); -} - -static void __td_unmap_descs(struct timb_dma_desc *td_desc, bool single) -{ - struct timb_dma_chan *td_chan = container_of(td_desc->txd.chan, - struct timb_dma_chan, chan); - u8 *descs; - - for (descs = td_desc->desc_list; ; descs += TIMB_DMA_DESC_SIZE) { - __td_unmap_desc(td_chan, descs, single); - if (descs[0] & 0x02) - break; - } -} - static int td_fill_desc(struct timb_dma_chan *td_chan, u8 *dma_desc, struct scatterlist *sg, bool last) { @@ -293,10 +261,7 @@ static void __td_finish(struct timb_dma_chan *td_chan) list_move(&td_desc->desc_node, &td_chan->free_list); - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) - __td_unmap_descs(td_desc, - txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE); - + dma_descriptor_unmap(txd); /* * The API requires that no submissions are done from a * callback, so we don't need to drop the lock here diff --git a/drivers/dma/txx9dmac.c b/drivers/dma/txx9dmac.c index c2829b4..bae6c29 100644 --- a/drivers/dma/txx9dmac.c +++ b/drivers/dma/txx9dmac.c @@ -419,30 +419,7 @@ txx9dmac_descriptor_complete(struct txx9dmac_chan *dc, list_splice_init(&desc->tx_list, &dc->free_list); list_move(&desc->desc_node, &dc->free_list); - if (!ds) { - dma_addr_t dmaaddr; - if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) { - dmaaddr = is_dmac64(dc) ? - desc->hwdesc.DAR : desc->hwdesc32.DAR; - if (txd->flags & DMA_COMPL_DEST_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&dc->chan), - dmaaddr, desc->len, DMA_FROM_DEVICE); - else - dma_unmap_page(chan2parent(&dc->chan), - dmaaddr, desc->len, DMA_FROM_DEVICE); - } - if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) { - dmaaddr = is_dmac64(dc) ? - desc->hwdesc.SAR : desc->hwdesc32.SAR; - if (txd->flags & DMA_COMPL_SRC_UNMAP_SINGLE) - dma_unmap_single(chan2parent(&dc->chan), - dmaaddr, desc->len, DMA_TO_DEVICE); - else - dma_unmap_page(chan2parent(&dc->chan), - dmaaddr, desc->len, DMA_TO_DEVICE); - } - } - + dma_descriptor_unmap(txd); /* * The API requires that no submissions are done from a * callback, so we don't need to drop the lock here diff --git a/drivers/media/platform/m2m-deinterlace.c b/drivers/media/platform/m2m-deinterlace.c index 540516c..879ea6f 100644 --- a/drivers/media/platform/m2m-deinterlace.c +++ b/drivers/media/platform/m2m-deinterlace.c @@ -341,8 +341,7 @@ static void deinterlace_issue_dma(struct deinterlace_ctx *ctx, int op, ctx->xt->dir = DMA_MEM_TO_MEM; ctx->xt->src_sgl = false; ctx->xt->dst_sgl = true; - flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | - DMA_COMPL_SKIP_DEST_UNMAP | DMA_COMPL_SKIP_SRC_UNMAP; + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; tx = dmadev->device_prep_interleaved_dma(chan, ctx->xt, flags); if (tx == NULL) { diff --git a/drivers/media/platform/timblogiw.c b/drivers/media/platform/timblogiw.c index b557caf..59a95e3 100644 --- a/drivers/media/platform/timblogiw.c +++ b/drivers/media/platform/timblogiw.c @@ -565,7 +565,7 @@ static void buffer_queue(struct videobuf_queue *vq, struct videobuf_buffer *vb) desc = dmaengine_prep_slave_sg(fh->chan, buf->sg, sg_elems, DMA_DEV_TO_MEM, - DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP); + DMA_PREP_INTERRUPT); if (!desc) { spin_lock_irq(&fh->queue_lock); list_del_init(&vb->queue); diff --git a/drivers/misc/carma/carma-fpga.c b/drivers/misc/carma/carma-fpga.c index 7b56563f..5335104 100644 --- a/drivers/misc/carma/carma-fpga.c +++ b/drivers/misc/carma/carma-fpga.c @@ -631,8 +631,7 @@ static int data_submit_dma(struct fpga_device *priv, struct data_buf *buf) struct dma_async_tx_descriptor *tx; dma_cookie_t cookie; dma_addr_t dst, src; - unsigned long dma_flags = DMA_COMPL_SKIP_DEST_UNMAP | - DMA_COMPL_SKIP_SRC_UNMAP; + unsigned long dma_flags = 0; dst_sg = buf->vb.sglist; dst_nents = buf->vb.sglen; diff --git a/drivers/mtd/nand/atmel_nand.c b/drivers/mtd/nand/atmel_nand.c index 060feea..2a837cb 100644 --- a/drivers/mtd/nand/atmel_nand.c +++ b/drivers/mtd/nand/atmel_nand.c @@ -375,8 +375,7 @@ static int atmel_nand_dma_op(struct mtd_info *mtd, void *buf, int len, dma_dev = host->dma_chan->device; - flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP | - DMA_COMPL_SKIP_DEST_UNMAP; + flags = DMA_CTRL_ACK | DMA_PREP_INTERRUPT; phys_addr = dma_map_single(dma_dev->dev, p, len, dir); if (dma_mapping_error(dma_dev->dev, phys_addr)) { diff --git a/drivers/mtd/nand/fsmc_nand.c b/drivers/mtd/nand/fsmc_nand.c index 3dc1a75..8b27522 100644 --- a/drivers/mtd/nand/fsmc_nand.c +++ b/drivers/mtd/nand/fsmc_nand.c @@ -573,8 +573,6 @@ static int dma_xfer(struct fsmc_nand_data *host, void *buffer, int len, dma_dev = chan->device; dma_addr = dma_map_single(dma_dev->dev, buffer, len, direction); - flags |= DMA_COMPL_SKIP_SRC_UNMAP | DMA_COMPL_SKIP_DEST_UNMAP; - if (direction == DMA_TO_DEVICE) { dma_src = dma_addr; dma_dst = host->data_pa; diff --git a/drivers/net/ethernet/micrel/ks8842.c b/drivers/net/ethernet/micrel/ks8842.c index 0951f7a..822616e 100644 --- a/drivers/net/ethernet/micrel/ks8842.c +++ b/drivers/net/ethernet/micrel/ks8842.c @@ -459,8 +459,7 @@ static int ks8842_tx_frame_dma(struct sk_buff *skb, struct net_device *netdev) sg_dma_len(&ctl->sg) += 4 - sg_dma_len(&ctl->sg) % 4; ctl->adesc = dmaengine_prep_slave_sg(ctl->chan, - &ctl->sg, 1, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP); + &ctl->sg, 1, DMA_MEM_TO_DEV, DMA_PREP_INTERRUPT); if (!ctl->adesc) return NETDEV_TX_BUSY; @@ -571,8 +570,7 @@ static int __ks8842_start_new_rx_dma(struct net_device *netdev) sg_dma_len(sg) = DMA_BUFFER_SIZE; ctl->adesc = dmaengine_prep_slave_sg(ctl->chan, - sg, 1, DMA_DEV_TO_MEM, - DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_SRC_UNMAP); + sg, 1, DMA_DEV_TO_MEM, DMA_PREP_INTERRUPT); if (!ctl->adesc) goto out; diff --git a/drivers/ntb/ntb_transport.c b/drivers/ntb/ntb_transport.c index 12a9e83..d0222f1 100644 --- a/drivers/ntb/ntb_transport.c +++ b/drivers/ntb/ntb_transport.c @@ -1034,10 +1034,9 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, struct dma_chan *chan = qp->dma_chan; struct dma_device *device; size_t pay_off, buff_off; - dma_addr_t src, dest; + struct dmaengine_unmap_data *unmap; dma_cookie_t cookie; void *buf = entry->buf; - unsigned long flags; entry->len = len; @@ -1045,35 +1044,49 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, goto err; if (len < copy_bytes) - goto err1; + goto err_wait; device = chan->device; pay_off = (size_t) offset & ~PAGE_MASK; buff_off = (size_t) buf & ~PAGE_MASK; if (!is_dma_copy_aligned(device, pay_off, buff_off, len)) - goto err1; + goto err_wait; - dest = dma_map_single(device->dev, buf, len, DMA_FROM_DEVICE); - if (dma_mapping_error(device->dev, dest)) - goto err1; + unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); + if (!unmap) + goto err_wait; - src = dma_map_single(device->dev, offset, len, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev, src)) - goto err2; + unmap->len = len; + unmap->addr[0] = dma_map_page(device->dev, virt_to_page(offset), + pay_off, len, DMA_TO_DEVICE); + if (dma_mapping_error(device->dev, unmap->addr[0])) + goto err_get_unmap; + + unmap->to_cnt = 1; - flags = DMA_COMPL_DEST_UNMAP_SINGLE | DMA_COMPL_SRC_UNMAP_SINGLE | - DMA_PREP_INTERRUPT; - txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags); + unmap->addr[1] = dma_map_page(device->dev, virt_to_page(buf), + buff_off, len, DMA_FROM_DEVICE); + if (dma_mapping_error(device->dev, unmap->addr[1])) + goto err_get_unmap; + + unmap->from_cnt = 1; + + txd = device->device_prep_dma_memcpy(chan, unmap->addr[1], + unmap->addr[0], len, + DMA_PREP_INTERRUPT); if (!txd) - goto err3; + goto err_get_unmap; txd->callback = ntb_rx_copy_callback; txd->callback_param = entry; + dma_set_unmap(txd, unmap); cookie = dmaengine_submit(txd); if (dma_submit_error(cookie)) - goto err3; + goto err_set_unmap; + + dmaengine_unmap_put(unmap); qp->last_cookie = cookie; @@ -1081,11 +1094,11 @@ static void ntb_async_rx(struct ntb_queue_entry *entry, void *offset, return; -err3: - dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE); -err2: - dma_unmap_single(device->dev, dest, len, DMA_FROM_DEVICE); -err1: +err_set_unmap: + dmaengine_unmap_put(unmap); +err_get_unmap: + dmaengine_unmap_put(unmap); +err_wait: /* If the callbacks come out of order, the writing of the index to the * last completed will be out of order. This may result in the * receive stalling forever. @@ -1245,12 +1258,12 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, struct dma_chan *chan = qp->dma_chan; struct dma_device *device; size_t dest_off, buff_off; - dma_addr_t src, dest; + struct dmaengine_unmap_data *unmap; + dma_addr_t dest; dma_cookie_t cookie; void __iomem *offset; size_t len = entry->len; void *buf = entry->buf; - unsigned long flags; offset = qp->tx_mw + qp->tx_max_frame * qp->tx_index; hdr = offset + qp->tx_max_frame - sizeof(struct ntb_payload_header); @@ -1273,28 +1286,41 @@ static void ntb_async_tx(struct ntb_transport_qp *qp, if (!is_dma_copy_aligned(device, buff_off, dest_off, len)) goto err; - src = dma_map_single(device->dev, buf, len, DMA_TO_DEVICE); - if (dma_mapping_error(device->dev, src)) + unmap = dmaengine_get_unmap_data(device->dev, 1, GFP_NOWAIT); + if (!unmap) goto err; - flags = DMA_COMPL_SRC_UNMAP_SINGLE | DMA_PREP_INTERRUPT; - txd = device->device_prep_dma_memcpy(chan, dest, src, len, flags); + unmap->len = len; + unmap->addr[0] = dma_map_page(device->dev, virt_to_page(buf), + buff_off, len, DMA_TO_DEVICE); + if (dma_mapping_error(device->dev, unmap->addr[0])) + goto err_get_unmap; + + unmap->to_cnt = 1; + + txd = device->device_prep_dma_memcpy(chan, dest, unmap->addr[0], len, + DMA_PREP_INTERRUPT); if (!txd) - goto err1; + goto err_get_unmap; txd->callback = ntb_tx_copy_callback; txd->callback_param = entry; + dma_set_unmap(txd, unmap); cookie = dmaengine_submit(txd); if (dma_submit_error(cookie)) - goto err1; + goto err_set_unmap; + + dmaengine_unmap_put(unmap); dma_async_issue_pending(chan); qp->tx_async++; return; -err1: - dma_unmap_single(device->dev, src, len, DMA_TO_DEVICE); +err_set_unmap: + dmaengine_unmap_put(unmap); +err_get_unmap: + dmaengine_unmap_put(unmap); err: ntb_memcpy_tx(entry, offset); qp->tx_memcpy++; diff --git a/drivers/spi/spi-dw-mid.c b/drivers/spi/spi-dw-mid.c index b9f0192..6d207af 100644 --- a/drivers/spi/spi-dw-mid.c +++ b/drivers/spi/spi-dw-mid.c @@ -150,7 +150,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change) &dws->tx_sgl, 1, DMA_MEM_TO_DEV, - DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP); + DMA_PREP_INTERRUPT); txdesc->callback = dw_spi_dma_done; txdesc->callback_param = dws; @@ -173,7 +173,7 @@ static int mid_spi_dma_transfer(struct dw_spi *dws, int cs_change) &dws->rx_sgl, 1, DMA_DEV_TO_MEM, - DMA_PREP_INTERRUPT | DMA_COMPL_SKIP_DEST_UNMAP); + DMA_PREP_INTERRUPT); rxdesc->callback = dw_spi_dma_done; rxdesc->callback_param = dws; diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 4b460a6..41cf0c3 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -171,12 +171,6 @@ struct dma_interleaved_template { * @DMA_CTRL_ACK - if clear, the descriptor cannot be reused until the client * acknowledges receipt, i.e. has has a chance to establish any dependency * chains - * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) - * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) - * @DMA_COMPL_SRC_UNMAP_SINGLE - set to do the source dma-unmapping as single - * (if not set, do the source dma-unmapping as page) - * @DMA_COMPL_DEST_UNMAP_SINGLE - set to do the destination dma-unmapping as single - * (if not set, do the destination dma-unmapping as page) * @DMA_PREP_PQ_DISABLE_P - prevent generation of P while generating Q * @DMA_PREP_PQ_DISABLE_Q - prevent generation of Q while generating P * @DMA_PREP_CONTINUE - indicate to a driver that it is reusing buffers as @@ -188,14 +182,10 @@ struct dma_interleaved_template { enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), - DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), - DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), - DMA_COMPL_SRC_UNMAP_SINGLE = (1 << 4), - DMA_COMPL_DEST_UNMAP_SINGLE = (1 << 5), - DMA_PREP_PQ_DISABLE_P = (1 << 6), - DMA_PREP_PQ_DISABLE_Q = (1 << 7), - DMA_PREP_CONTINUE = (1 << 8), - DMA_PREP_FENCE = (1 << 9), + DMA_PREP_PQ_DISABLE_P = (1 << 2), + DMA_PREP_PQ_DISABLE_Q = (1 << 3), + DMA_PREP_CONTINUE = (1 << 4), + DMA_PREP_FENCE = (1 << 5), }; /** @@ -413,6 +403,17 @@ void dma_chan_cleanup(struct kref *kref); typedef bool (*dma_filter_fn)(struct dma_chan *chan, void *filter_param); typedef void (*dma_async_tx_callback)(void *dma_async_param); + +struct dmaengine_unmap_data { + u8 to_cnt; + u8 from_cnt; + u8 bidi_cnt; + struct device *dev; + struct kref kref; + size_t len; + dma_addr_t addr[0]; +}; + /** * struct dma_async_tx_descriptor - async transaction descriptor * ---dma generic offload fields--- @@ -438,6 +439,7 @@ struct dma_async_tx_descriptor { dma_cookie_t (*tx_submit)(struct dma_async_tx_descriptor *tx); dma_async_tx_callback callback; void *callback_param; + struct dmaengine_unmap_data *unmap; #ifdef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH struct dma_async_tx_descriptor *next; struct dma_async_tx_descriptor *parent; @@ -445,6 +447,40 @@ struct dma_async_tx_descriptor { #endif }; +#ifdef CONFIG_DMA_ENGINE +static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx, + struct dmaengine_unmap_data *unmap) +{ + kref_get(&unmap->kref); + tx->unmap = unmap; +} + +struct dmaengine_unmap_data * +dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags); +void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap); +#else +static inline void dma_set_unmap(struct dma_async_tx_descriptor *tx, + struct dmaengine_unmap_data *unmap) +{ +} +static inline struct dmaengine_unmap_data * +dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags) +{ + return NULL; +} +static inline void dmaengine_unmap_put(struct dmaengine_unmap_data *unmap) +{ +} +#endif + +static inline void dma_descriptor_unmap(struct dma_async_tx_descriptor *tx) +{ + if (tx->unmap) { + dmaengine_unmap_put(tx->unmap); + tx->unmap = NULL; + } +} + #ifndef CONFIG_ASYNC_TX_ENABLE_CHANNEL_SWITCH static inline void txd_lock(struct dma_async_tx_descriptor *txd) { |