summaryrefslogtreecommitdiffstats
path: root/crypto/async_tx
diff options
context:
space:
mode:
authorDan Williams <dan.j.williams@intel.com>2009-09-08 17:42:50 -0700
committerDan Williams <dan.j.williams@intel.com>2009-09-08 17:42:50 -0700
commit0403e3827788d878163f9ef0541b748b0f88ca5d (patch)
tree2dc73744bd92c268a1310f24668167f130877278 /crypto/async_tx
parentf9dd2134374c8de6b911e2b8652c6c9622eaa658 (diff)
downloadop-kernel-dev-0403e3827788d878163f9ef0541b748b0f88ca5d.zip
op-kernel-dev-0403e3827788d878163f9ef0541b748b0f88ca5d.tar.gz
dmaengine: add fence support
Some engines optimize operation by reading ahead in the descriptor chain such that descriptor2 may start execution before descriptor1 completes. If descriptor2 depends on the result from descriptor1 then a fence is required (on descriptor2) to disable this optimization. The async_tx api could implicitly identify dependencies via the 'depend_tx' parameter, but that would constrain cases where the dependency chain only specifies a completion order rather than a data dependency. So, provide an ASYNC_TX_FENCE to explicitly identify data dependencies. Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Diffstat (limited to 'crypto/async_tx')
-rw-r--r--crypto/async_tx/async_memcpy.c7
-rw-r--r--crypto/async_tx/async_memset.c7
-rw-r--r--crypto/async_tx/async_pq.c5
-rw-r--r--crypto/async_tx/async_raid6_recov.c47
-rw-r--r--crypto/async_tx/async_xor.c11
5 files changed, 50 insertions, 27 deletions
diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c
index 98e15bd..b38cbb3 100644
--- a/crypto/async_tx/async_memcpy.c
+++ b/crypto/async_tx/async_memcpy.c
@@ -52,9 +52,12 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset,
if (device) {
dma_addr_t dma_dest, dma_src;
- unsigned long dma_prep_flags;
+ unsigned long dma_prep_flags = 0;
- dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, dest_offset, len,
DMA_FROM_DEVICE);
diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c
index b896a6e..a374784 100644
--- a/crypto/async_tx/async_memset.c
+++ b/crypto/async_tx/async_memset.c
@@ -49,9 +49,12 @@ async_memset(struct page *dest, int val, unsigned int offset, size_t len,
if (device) {
dma_addr_t dma_dest;
- unsigned long dma_prep_flags;
+ unsigned long dma_prep_flags = 0;
- dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
dma_dest = dma_map_page(device->dev, dest, offset, len,
DMA_FROM_DEVICE);
diff --git a/crypto/async_tx/async_pq.c b/crypto/async_tx/async_pq.c
index 108b21e..a25e290 100644
--- a/crypto/async_tx/async_pq.c
+++ b/crypto/async_tx/async_pq.c
@@ -101,6 +101,7 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
*/
if (src_cnt > pq_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
dma_flags |= DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
@@ -111,6 +112,8 @@ do_async_gen_syndrome(struct dma_chan *chan, struct page **blocks,
if (cb_fn_orig)
dma_flags |= DMA_PREP_INTERRUPT;
}
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward
@@ -282,6 +285,8 @@ async_syndrome_val(struct page **blocks, unsigned int offset, int disks,
dma_flags |= DMA_PREP_PQ_DISABLE_P;
if (!Q(blocks, disks))
dma_flags |= DMA_PREP_PQ_DISABLE_Q;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
for (i = 0; i < disks; i++)
if (likely(blocks[i])) {
BUG_ON(is_raid6_zero_block(blocks[i]));
diff --git a/crypto/async_tx/async_raid6_recov.c b/crypto/async_tx/async_raid6_recov.c
index 0c14d48..822a42d 100644
--- a/crypto/async_tx/async_raid6_recov.c
+++ b/crypto/async_tx/async_raid6_recov.c
@@ -44,6 +44,8 @@ async_sum_product(struct page *dest, struct page **srcs, unsigned char *coef,
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, srcs[0], 0, len, DMA_TO_DEVICE);
dma_src[1] = dma_map_page(dev, srcs[1], 0, len, DMA_TO_DEVICE);
@@ -89,6 +91,8 @@ async_mult(struct page *dest, struct page *src, u8 coef, size_t len,
struct dma_async_tx_descriptor *tx;
enum dma_ctrl_flags dma_flags = DMA_PREP_PQ_DISABLE_P;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
dma_dest[1] = dma_map_page(dev, dest, 0, len, DMA_BIDIRECTIONAL);
dma_src[0] = dma_map_page(dev, src, 0, len, DMA_TO_DEVICE);
tx = dma->device_prep_dma_pq(chan, dma_dest, dma_src, 1, &coef,
@@ -138,7 +142,7 @@ __2data_recov_4(size_t bytes, int faila, int failb, struct page **blocks,
srcs[1] = q;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(b, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
@@ -188,23 +192,23 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
dp = blocks[faila];
dq = blocks[failb];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_memcpy(dp, g, 0, 0, bytes, submit);
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -212,7 +216,7 @@ __2data_recov_5(size_t bytes, int faila, int failb, struct page **blocks,
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
@@ -252,7 +256,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
blocks[failb] = (void *)raid6_empty_zero_page;
blocks[disks-1] = dq;
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
/* Restore pointer table */
@@ -264,15 +268,15 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
/* compute P + Pxy */
srcs[0] = dp;
srcs[1] = p;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dp, srcs, 0, 2, bytes, submit);
/* compute Q + Qxy */
srcs[0] = dq;
srcs[1] = q;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
/* Dx = A*(P+Pxy) + B*(Q+Qxy) */
@@ -280,7 +284,7 @@ __2data_recov_n(int disks, size_t bytes, int faila, int failb,
srcs[1] = dq;
coef[0] = raid6_gfexi[failb-faila];
coef[1] = raid6_gfinv[raid6_gfexp[faila]^raid6_gfexp[failb]];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_sum_product(dq, srcs, coef, bytes, submit);
/* Dy = P+Pxy+Dx */
@@ -407,13 +411,16 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
int good = faila == 0 ? 1 : 0;
struct page *g = blocks[good];
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
tx = async_memcpy(p, g, 0, 0, bytes, submit);
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
tx = async_mult(dq, g, raid6_gfexp[good], bytes, submit);
} else {
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL,
+ scribble);
tx = async_gen_syndrome(blocks, 0, disks, bytes, submit);
}
@@ -426,11 +433,11 @@ async_raid6_datap_recov(int disks, size_t bytes, int faila,
srcs[0] = dq;
srcs[1] = q;
- init_async_submit(submit, ASYNC_TX_XOR_DROP_DST, tx, NULL, NULL,
- scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE|ASYNC_TX_XOR_DROP_DST, tx,
+ NULL, NULL, scribble);
tx = async_xor(dq, srcs, 0, 2, bytes, submit);
- init_async_submit(submit, 0, tx, NULL, NULL, scribble);
+ init_async_submit(submit, ASYNC_TX_FENCE, tx, NULL, NULL, scribble);
tx = async_mult(dq, dq, coef, bytes, submit);
srcs[0] = p;
diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c
index 56b5f98..db27987 100644
--- a/crypto/async_tx/async_xor.c
+++ b/crypto/async_tx/async_xor.c
@@ -69,6 +69,7 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
*/
if (src_cnt > xor_src_cnt) {
submit->flags &= ~ASYNC_TX_ACK;
+ submit->flags |= ASYNC_TX_FENCE;
dma_flags = DMA_COMPL_SKIP_DEST_UNMAP;
submit->cb_fn = NULL;
submit->cb_param = NULL;
@@ -78,7 +79,8 @@ do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list,
}
if (submit->cb_fn)
dma_flags |= DMA_PREP_INTERRUPT;
-
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_flags |= DMA_PREP_FENCE;
/* Since we have clobbered the src_list we are committed
* to doing this asynchronously. Drivers force forward progress
* in case they can not provide a descriptor
@@ -264,12 +266,15 @@ async_xor_val(struct page *dest, struct page **src_list, unsigned int offset,
dma_src = (dma_addr_t *) src_list;
if (dma_src && device && src_cnt <= device->max_xor) {
- unsigned long dma_prep_flags;
+ unsigned long dma_prep_flags = 0;
int i;
pr_debug("%s: (async) len: %zu\n", __func__, len);
- dma_prep_flags = submit->cb_fn ? DMA_PREP_INTERRUPT : 0;
+ if (submit->cb_fn)
+ dma_prep_flags |= DMA_PREP_INTERRUPT;
+ if (submit->flags & ASYNC_TX_FENCE)
+ dma_prep_flags |= DMA_PREP_FENCE;
for (i = 0; i < src_cnt; i++)
dma_src[i] = dma_map_page(device->dev, src_list[i],
offset, len, DMA_TO_DEVICE);
OpenPOWER on IntegriCloud