From 4297a462f455e38f08976df7b16c849614a287da Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 16 Dec 2009 16:28:03 +0100 Subject: dma: at_hdmac: correct incompatible type for argument 1 of 'spin_lock_bh' Correct a typo error in locking calls. Cc: Signed-off-by: Nicolas Ferre Signed-off-by: Dan Williams --- drivers/dma/at_hdmac.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/dma/at_hdmac.c b/drivers/dma/at_hdmac.c index 7585c41..c558fa1 100644 --- a/drivers/dma/at_hdmac.c +++ b/drivers/dma/at_hdmac.c @@ -815,7 +815,7 @@ atc_is_tx_complete(struct dma_chan *chan, dev_vdbg(chan2dev(chan), "is_tx_complete: %d (d%d, u%d)\n", cookie, done ? *done : 0, used ? *used : 0); - spin_lock_bh(atchan->lock); + spin_lock_bh(&atchan->lock); last_complete = atchan->completed_cookie; last_used = chan->cookie; @@ -830,7 +830,7 @@ atc_is_tx_complete(struct dma_chan *chan, ret = dma_async_is_complete(cookie, last_complete, last_used); } - spin_unlock_bh(atchan->lock); + spin_unlock_bh(&atchan->lock); if (done) *done = last_complete; -- cgit v1.1 From 3542a113ab2f5880f1b62e5909d754250fb57d6b Mon Sep 17 00:00:00 2001 From: Guennadi Liakhovetski Date: Thu, 17 Dec 2009 09:41:39 -0700 Subject: sh: fix DMA driver's descriptor chaining and cookie assignment The SH DMA driver wrongly assigns negative cookies to transfer descriptors, also, its chaining of partial descriptors is broken. The latter problem is usually invisible, because maximum transfer size per chunk is 16M, but if you artificially set this limit lower, the driver fails. Since cookies are also used in chunk management, both these problems are fixed in one patch. As side effects a possible memory leak, when descriptors are prepared, but not submitted, and multiple races have also been fixed. Signed-off-by: Guennadi Liakhovetski Acked-by: Paul Mundt Acked-by: Nobuhiro Iwamatsu Signed-off-by: Dan Williams --- drivers/dma/shdma.c | 324 +++++++++++++++++++++++++++++++++------------------- drivers/dma/shdma.h | 9 +- 2 files changed, 213 insertions(+), 120 deletions(-) diff --git a/drivers/dma/shdma.c b/drivers/dma/shdma.c index 8d9acd7..9546f5f 100644 --- a/drivers/dma/shdma.c +++ b/drivers/dma/shdma.c @@ -23,16 +23,19 @@ #include #include #include -#include #include #include #include #include "shdma.h" /* DMA descriptor control */ -#define DESC_LAST (-1) -#define DESC_COMP (1) -#define DESC_NCOMP (0) +enum sh_dmae_desc_status { + DESC_IDLE, + DESC_PREPARED, + DESC_SUBMITTED, + DESC_COMPLETED, /* completed, have to call callback */ + DESC_WAITING, /* callback called, waiting for ack / re-submit */ +}; #define NR_DESCS_PER_CHANNEL 32 /* @@ -45,6 +48,8 @@ */ #define RS_DEFAULT (RS_DUAL) +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all); + #define SH_DMAC_CHAN_BASE(id) (dma_base_addr[id]) static void sh_dmae_writel(struct sh_dmae_chan *sh_dc, u32 data, u32 reg) { @@ -106,11 +111,11 @@ static inline unsigned int calc_xmit_shift(struct sh_dmae_chan *sh_chan) return ts_shift[(chcr & CHCR_TS_MASK) >> CHCR_TS_SHIFT]; } -static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs hw) +static void dmae_set_reg(struct sh_dmae_chan *sh_chan, struct sh_dmae_regs *hw) { - sh_dmae_writel(sh_chan, hw.sar, SAR); - sh_dmae_writel(sh_chan, hw.dar, DAR); - sh_dmae_writel(sh_chan, hw.tcr >> calc_xmit_shift(sh_chan), TCR); + sh_dmae_writel(sh_chan, hw->sar, SAR); + sh_dmae_writel(sh_chan, hw->dar, DAR); + sh_dmae_writel(sh_chan, hw->tcr >> calc_xmit_shift(sh_chan), TCR); } static void dmae_start(struct sh_dmae_chan *sh_chan) @@ -184,8 +189,9 @@ static int dmae_set_dmars(struct sh_dmae_chan *sh_chan, u16 val) static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) { - struct sh_desc *desc = tx_to_sh_desc(tx); + struct sh_desc *desc = tx_to_sh_desc(tx), *chunk, *last = desc, *c; struct sh_dmae_chan *sh_chan = to_sh_chan(tx->chan); + dma_async_tx_callback callback = tx->callback; dma_cookie_t cookie; spin_lock_bh(&sh_chan->desc_lock); @@ -195,45 +201,53 @@ static dma_cookie_t sh_dmae_tx_submit(struct dma_async_tx_descriptor *tx) if (cookie < 0) cookie = 1; - /* If desc only in the case of 1 */ - if (desc->async_tx.cookie != -EBUSY) - desc->async_tx.cookie = cookie; - sh_chan->common.cookie = desc->async_tx.cookie; + sh_chan->common.cookie = cookie; + tx->cookie = cookie; + + /* Mark all chunks of this descriptor as submitted, move to the queue */ + list_for_each_entry_safe(chunk, c, desc->node.prev, node) { + /* + * All chunks are on the global ld_free, so, we have to find + * the end of the chain ourselves + */ + if (chunk != desc && (chunk->mark == DESC_IDLE || + chunk->async_tx.cookie > 0 || + chunk->async_tx.cookie == -EBUSY || + &chunk->node == &sh_chan->ld_free)) + break; + chunk->mark = DESC_SUBMITTED; + /* Callback goes to the last chunk */ + chunk->async_tx.callback = NULL; + chunk->cookie = cookie; + list_move_tail(&chunk->node, &sh_chan->ld_queue); + last = chunk; + } + + last->async_tx.callback = callback; + last->async_tx.callback_param = tx->callback_param; - list_splice_init(&desc->tx_list, sh_chan->ld_queue.prev); + dev_dbg(sh_chan->dev, "submit #%d@%p on %d: %x[%d] -> %x\n", + tx->cookie, &last->async_tx, sh_chan->id, + desc->hw.sar, desc->hw.tcr, desc->hw.dar); spin_unlock_bh(&sh_chan->desc_lock); return cookie; } +/* Called with desc_lock held */ static struct sh_desc *sh_dmae_get_desc(struct sh_dmae_chan *sh_chan) { - struct sh_desc *desc, *_desc, *ret = NULL; + struct sh_desc *desc; - spin_lock_bh(&sh_chan->desc_lock); - list_for_each_entry_safe(desc, _desc, &sh_chan->ld_free, node) { - if (async_tx_test_ack(&desc->async_tx)) { + list_for_each_entry(desc, &sh_chan->ld_free, node) + if (desc->mark != DESC_PREPARED) { + BUG_ON(desc->mark != DESC_IDLE); list_del(&desc->node); - ret = desc; - break; + return desc; } - } - spin_unlock_bh(&sh_chan->desc_lock); - - return ret; -} - -static void sh_dmae_put_desc(struct sh_dmae_chan *sh_chan, struct sh_desc *desc) -{ - if (desc) { - spin_lock_bh(&sh_chan->desc_lock); - - list_splice_init(&desc->tx_list, &sh_chan->ld_free); - list_add(&desc->node, &sh_chan->ld_free); - spin_unlock_bh(&sh_chan->desc_lock); - } + return NULL; } static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) @@ -252,11 +266,10 @@ static int sh_dmae_alloc_chan_resources(struct dma_chan *chan) dma_async_tx_descriptor_init(&desc->async_tx, &sh_chan->common); desc->async_tx.tx_submit = sh_dmae_tx_submit; - desc->async_tx.flags = DMA_CTRL_ACK; - INIT_LIST_HEAD(&desc->tx_list); - sh_dmae_put_desc(sh_chan, desc); + desc->mark = DESC_IDLE; spin_lock_bh(&sh_chan->desc_lock); + list_add(&desc->node, &sh_chan->ld_free); sh_chan->descs_allocated++; } spin_unlock_bh(&sh_chan->desc_lock); @@ -273,7 +286,10 @@ static void sh_dmae_free_chan_resources(struct dma_chan *chan) struct sh_desc *desc, *_desc; LIST_HEAD(list); - BUG_ON(!list_empty(&sh_chan->ld_queue)); + /* Prepared and not submitted descriptors can still be on the queue */ + if (!list_empty(&sh_chan->ld_queue)) + sh_dmae_chan_ld_cleanup(sh_chan, true); + spin_lock_bh(&sh_chan->desc_lock); list_splice_init(&sh_chan->ld_free, &list); @@ -292,6 +308,8 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( struct sh_dmae_chan *sh_chan; struct sh_desc *first = NULL, *prev = NULL, *new; size_t copy_size; + LIST_HEAD(tx_list); + int chunks = (len + SH_DMA_TCR_MAX) / (SH_DMA_TCR_MAX + 1); if (!chan) return NULL; @@ -301,108 +319,189 @@ static struct dma_async_tx_descriptor *sh_dmae_prep_memcpy( sh_chan = to_sh_chan(chan); + /* Have to lock the whole loop to protect against concurrent release */ + spin_lock_bh(&sh_chan->desc_lock); + + /* + * Chaining: + * first descriptor is what user is dealing with in all API calls, its + * cookie is at first set to -EBUSY, at tx-submit to a positive + * number + * if more than one chunk is needed further chunks have cookie = -EINVAL + * the last chunk, if not equal to the first, has cookie = -ENOSPC + * all chunks are linked onto the tx_list head with their .node heads + * only during this function, then they are immediately spliced + * back onto the free list in form of a chain + */ do { - /* Allocate the link descriptor from DMA pool */ + /* Allocate the link descriptor from the free list */ new = sh_dmae_get_desc(sh_chan); if (!new) { dev_err(sh_chan->dev, "No free memory for link descriptor\n"); - goto err_get_desc; + list_for_each_entry(new, &tx_list, node) + new->mark = DESC_IDLE; + list_splice(&tx_list, &sh_chan->ld_free); + spin_unlock_bh(&sh_chan->desc_lock); + return NULL; } - copy_size = min(len, (size_t)SH_DMA_TCR_MAX); + copy_size = min(len, (size_t)SH_DMA_TCR_MAX + 1); new->hw.sar = dma_src; new->hw.dar = dma_dest; new->hw.tcr = copy_size; - if (!first) + if (!first) { + /* First desc */ + new->async_tx.cookie = -EBUSY; first = new; + } else { + /* Other desc - invisible to the user */ + new->async_tx.cookie = -EINVAL; + } - new->mark = DESC_NCOMP; - async_tx_ack(&new->async_tx); + dev_dbg(sh_chan->dev, + "chaining %u of %u with %p, dst %x, cookie %d\n", + copy_size, len, &new->async_tx, dma_dest, + new->async_tx.cookie); + + new->mark = DESC_PREPARED; + new->async_tx.flags = flags; + new->chunks = chunks--; prev = new; len -= copy_size; dma_src += copy_size; dma_dest += copy_size; /* Insert the link descriptor to the LD ring */ - list_add_tail(&new->node, &first->tx_list); + list_add_tail(&new->node, &tx_list); } while (len); - new->async_tx.flags = flags; /* client is in control of this ack */ - new->async_tx.cookie = -EBUSY; /* Last desc */ + if (new != first) + new->async_tx.cookie = -ENOSPC; - return &first->async_tx; + /* Put them back on the free list, so, they don't get lost */ + list_splice_tail(&tx_list, &sh_chan->ld_free); -err_get_desc: - sh_dmae_put_desc(sh_chan, first); - return NULL; + spin_unlock_bh(&sh_chan->desc_lock); + return &first->async_tx; } -/* - * sh_chan_ld_cleanup - Clean up link descriptors - * - * This function clean up the ld_queue of DMA channel. - */ -static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan) +static dma_async_tx_callback __ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) { struct sh_desc *desc, *_desc; + /* Is the "exposed" head of a chain acked? */ + bool head_acked = false; + dma_cookie_t cookie = 0; + dma_async_tx_callback callback = NULL; + void *param = NULL; spin_lock_bh(&sh_chan->desc_lock); list_for_each_entry_safe(desc, _desc, &sh_chan->ld_queue, node) { - dma_async_tx_callback callback; - void *callback_param; - - /* non send data */ - if (desc->mark == DESC_NCOMP) + struct dma_async_tx_descriptor *tx = &desc->async_tx; + + BUG_ON(tx->cookie > 0 && tx->cookie != desc->cookie); + BUG_ON(desc->mark != DESC_SUBMITTED && + desc->mark != DESC_COMPLETED && + desc->mark != DESC_WAITING); + + /* + * queue is ordered, and we use this loop to (1) clean up all + * completed descriptors, and to (2) update descriptor flags of + * any chunks in a (partially) completed chain + */ + if (!all && desc->mark == DESC_SUBMITTED && + desc->cookie != cookie) break; - /* send data sesc */ - callback = desc->async_tx.callback; - callback_param = desc->async_tx.callback_param; + if (tx->cookie > 0) + cookie = tx->cookie; - /* Remove from ld_queue list */ - list_splice_init(&desc->tx_list, &sh_chan->ld_free); + if (desc->mark == DESC_COMPLETED && desc->chunks == 1) { + BUG_ON(sh_chan->completed_cookie != desc->cookie - 1); + sh_chan->completed_cookie = desc->cookie; + } - dev_dbg(sh_chan->dev, "link descriptor %p will be recycle.\n", - desc); + /* Call callback on the last chunk */ + if (desc->mark == DESC_COMPLETED && tx->callback) { + desc->mark = DESC_WAITING; + callback = tx->callback; + param = tx->callback_param; + dev_dbg(sh_chan->dev, "descriptor #%d@%p on %d callback\n", + tx->cookie, tx, sh_chan->id); + BUG_ON(desc->chunks != 1); + break; + } - list_move(&desc->node, &sh_chan->ld_free); - /* Run the link descriptor callback function */ - if (callback) { - spin_unlock_bh(&sh_chan->desc_lock); - dev_dbg(sh_chan->dev, "link descriptor %p callback\n", - desc); - callback(callback_param); - spin_lock_bh(&sh_chan->desc_lock); + if (tx->cookie > 0 || tx->cookie == -EBUSY) { + if (desc->mark == DESC_COMPLETED) { + BUG_ON(tx->cookie < 0); + desc->mark = DESC_WAITING; + } + head_acked = async_tx_test_ack(tx); + } else { + switch (desc->mark) { + case DESC_COMPLETED: + desc->mark = DESC_WAITING; + /* Fall through */ + case DESC_WAITING: + if (head_acked) + async_tx_ack(&desc->async_tx); + } + } + + dev_dbg(sh_chan->dev, "descriptor %p #%d completed.\n", + tx, tx->cookie); + + if (((desc->mark == DESC_COMPLETED || + desc->mark == DESC_WAITING) && + async_tx_test_ack(&desc->async_tx)) || all) { + /* Remove from ld_queue list */ + desc->mark = DESC_IDLE; + list_move(&desc->node, &sh_chan->ld_free); } } spin_unlock_bh(&sh_chan->desc_lock); + + if (callback) + callback(param); + + return callback; +} + +/* + * sh_chan_ld_cleanup - Clean up link descriptors + * + * This function cleans up the ld_queue of DMA channel. + */ +static void sh_dmae_chan_ld_cleanup(struct sh_dmae_chan *sh_chan, bool all) +{ + while (__ld_cleanup(sh_chan, all)) + ; } static void sh_chan_xfer_ld_queue(struct sh_dmae_chan *sh_chan) { - struct list_head *ld_node; - struct sh_dmae_regs hw; + struct sh_desc *sd; + spin_lock_bh(&sh_chan->desc_lock); /* DMA work check */ - if (dmae_is_busy(sh_chan)) + if (dmae_is_busy(sh_chan)) { + spin_unlock_bh(&sh_chan->desc_lock); return; + } /* Find the first un-transfer desciptor */ - for (ld_node = sh_chan->ld_queue.next; - (ld_node != &sh_chan->ld_queue) - && (to_sh_desc(ld_node)->mark == DESC_COMP); - ld_node = ld_node->next) - cpu_relax(); - - if (ld_node != &sh_chan->ld_queue) { - /* Get the ld start address from ld_queue */ - hw = to_sh_desc(ld_node)->hw; - dmae_set_reg(sh_chan, hw); - dmae_start(sh_chan); - } + list_for_each_entry(sd, &sh_chan->ld_queue, node) + if (sd->mark == DESC_SUBMITTED) { + /* Get the ld start address from ld_queue */ + dmae_set_reg(sh_chan, &sd->hw); + dmae_start(sh_chan); + break; + } + + spin_unlock_bh(&sh_chan->desc_lock); } static void sh_dmae_memcpy_issue_pending(struct dma_chan *chan) @@ -420,12 +519,11 @@ static enum dma_status sh_dmae_is_complete(struct dma_chan *chan, dma_cookie_t last_used; dma_cookie_t last_complete; - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); last_used = chan->cookie; last_complete = sh_chan->completed_cookie; - if (last_complete == -EBUSY) - last_complete = last_used; + BUG_ON(last_complete < 0); if (done) *done = last_complete; @@ -480,11 +578,13 @@ static irqreturn_t sh_dmae_err(int irq, void *data) err = sh_dmae_rst(0); if (err) return err; +#ifdef SH_DMAC_BASE1 if (shdev->pdata.mode & SHDMA_DMAOR1) { err = sh_dmae_rst(1); if (err) return err; } +#endif disable_irq(irq); return IRQ_HANDLED; } @@ -494,35 +594,25 @@ static irqreturn_t sh_dmae_err(int irq, void *data) static void dmae_do_tasklet(unsigned long data) { struct sh_dmae_chan *sh_chan = (struct sh_dmae_chan *)data; - struct sh_desc *desc, *_desc, *cur_desc = NULL; + struct sh_desc *desc; u32 sar_buf = sh_dmae_readl(sh_chan, SAR); - list_for_each_entry_safe(desc, _desc, - &sh_chan->ld_queue, node) { - if ((desc->hw.sar + desc->hw.tcr) == sar_buf) { - cur_desc = desc; + spin_lock(&sh_chan->desc_lock); + list_for_each_entry(desc, &sh_chan->ld_queue, node) { + if ((desc->hw.sar + desc->hw.tcr) == sar_buf && + desc->mark == DESC_SUBMITTED) { + dev_dbg(sh_chan->dev, "done #%d@%p dst %u\n", + desc->async_tx.cookie, &desc->async_tx, + desc->hw.dar); + desc->mark = DESC_COMPLETED; break; } } + spin_unlock(&sh_chan->desc_lock); - if (cur_desc) { - switch (cur_desc->async_tx.cookie) { - case 0: /* other desc data */ - break; - case -EBUSY: /* last desc */ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie; - break; - default: /* first desc ( 0 < )*/ - sh_chan->completed_cookie = - cur_desc->async_tx.cookie - 1; - break; - } - cur_desc->mark = DESC_COMP; - } /* Next desc */ sh_chan_xfer_ld_queue(sh_chan); - sh_dmae_chan_ld_cleanup(sh_chan); + sh_dmae_chan_ld_cleanup(sh_chan, false); } static unsigned int get_dmae_irq(unsigned int id) diff --git a/drivers/dma/shdma.h b/drivers/dma/shdma.h index 60b81e5..108f1cf 100644 --- a/drivers/dma/shdma.h +++ b/drivers/dma/shdma.h @@ -13,9 +13,9 @@ #ifndef __DMA_SHDMA_H #define __DMA_SHDMA_H -#include -#include #include +#include +#include #define SH_DMA_TCR_MAX 0x00FFFFFF /* 16MB */ @@ -26,13 +26,16 @@ struct sh_dmae_regs { }; struct sh_desc { - struct list_head tx_list; struct sh_dmae_regs hw; struct list_head node; struct dma_async_tx_descriptor async_tx; + dma_cookie_t cookie; + int chunks; int mark; }; +struct device; + struct sh_dmae_chan { dma_cookie_t completed_cookie; /* The maximum cookie completed */ spinlock_t desc_lock; /* Descriptor operation lock */ -- cgit v1.1 From cd78809f6191485a90ea6c92c2b58900ab5c156f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Dec 2009 13:52:39 -0700 Subject: ioat3: fix p-disabled q-continuation When continuing a pq calculation the driver needs 3 extra sources. The driver can perform a 3 source calculation with a single descriptor, but needs an extended descriptor to process up to 8 sources in one operation. However, in the p-disabled case only one extra source is needed. When continuing a p-disabled operation there are occasions (i.e. 0 < src_cnt % 8 < 3) where the tail operation does not need an extended descriptor. Properly account for this fact otherwise invalid 'dmacount' values will be written to hardware usually causing the channel to halt with 'invalid descriptor' errors. Cc: Signed-off-by: Dan Williams --- drivers/dma/ioat/dma_v3.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index 42f6f10..e58ecb2 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -650,9 +650,11 @@ __ioat3_prep_pq_lock(struct dma_chan *c, enum sum_check_flags *result, num_descs = ioat2_xferlen_to_descs(ioat, len); /* we need 2x the number of descriptors to cover greater than 3 - * sources + * sources (we need 1 extra source in the q-only continuation + * case and 3 extra sources in the p+q continuation case. */ - if (src_cnt > 3 || flags & DMA_PREP_CONTINUE) { + if (src_cnt + dmaf_p_disabled_continue(flags) > 3 || + (dmaf_continue(flags) && !dmaf_p_disabled_continue(flags))) { with_ext = 1; num_descs *= 2; } else -- cgit v1.1 From e02a0e47a3f061c1a53fc4376332a988ec047e8a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 17 Dec 2009 13:55:38 -0700 Subject: async_tx: expand async raid6 test to cover ioatdma corner case Add explicit 11 and 12 disks cases to exercise the 0 < src_cnt % 8 < 3 corner case in the ioatdma driver. Signed-off-by: Dan Williams --- crypto/async_tx/raid6test.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/crypto/async_tx/raid6test.c b/crypto/async_tx/raid6test.c index 3ec27c7..f84f6b4 100644 --- a/crypto/async_tx/raid6test.c +++ b/crypto/async_tx/raid6test.c @@ -214,6 +214,13 @@ static int raid6_test(void) err += test(4, &tests); if (NDISKS > 5) err += test(5, &tests); + /* the 11 and 12 disk cases are special for ioatdma (p-disabled + * q-continuation without extended descriptor) + */ + if (NDISKS > 12) { + err += test(11, &tests); + err += test(12, &tests); + } err += test(NDISKS, &tests); pr("\n"); -- cgit v1.1 From a6d52d70677e99bdb89b6921c265d0a58c22e597 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 19 Dec 2009 15:36:02 -0700 Subject: ioat2,3: put channel hardware in known state at init Put the ioat2 and ioat3 state machines in the halted state with all errors cleared. The ioat1 init path is not disturbed for stability, there are no reported ioat1 initiaization issues. Cc: Reported-by: Roland Dreier Tested-by: Roland Dreier Acked-by: Simon Horman Signed-off-by: Dan Williams --- drivers/dma/ioat/dma.c | 2 +- drivers/dma/ioat/dma.h | 18 ++++++++++++ drivers/dma/ioat/dma_v2.c | 69 +++++++++++++++++++++++++++++++++----------- drivers/dma/ioat/dma_v2.h | 2 ++ drivers/dma/ioat/dma_v3.c | 54 +++++++++++++++++++++++++--------- drivers/dma/ioat/registers.h | 1 + 6 files changed, 114 insertions(+), 32 deletions(-) diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c index c524d36..dcc4ab7 100644 --- a/drivers/dma/ioat/dma.c +++ b/drivers/dma/ioat/dma.c @@ -1032,7 +1032,7 @@ int __devinit ioat_probe(struct ioatdma_device *device) dma->dev = &pdev->dev; if (!dma->chancnt) { - dev_err(dev, "zero channels detected\n"); + dev_err(dev, "channel enumeration error\n"); goto err_setup_interrupts; } diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h index 45edde9..bbc3e78 100644 --- a/drivers/dma/ioat/dma.h +++ b/drivers/dma/ioat/dma.h @@ -60,6 +60,7 @@ * @dca: direct cache access context * @intr_quirk: interrupt setup quirk (for ioat_v1 devices) * @enumerate_channels: hw version specific channel enumeration + * @reset_hw: hw version specific channel (re)initialization * @cleanup_tasklet: select between the v2 and v3 cleanup routines * @timer_fn: select between the v2 and v3 timer watchdog routines * @self_test: hardware version specific self test for each supported op type @@ -78,6 +79,7 @@ struct ioatdma_device { struct dca_provider *dca; void (*intr_quirk)(struct ioatdma_device *device); int (*enumerate_channels)(struct ioatdma_device *device); + int (*reset_hw)(struct ioat_chan_common *chan); void (*cleanup_tasklet)(unsigned long data); void (*timer_fn)(unsigned long data); int (*self_test)(struct ioatdma_device *device); @@ -264,6 +266,22 @@ static inline void ioat_suspend(struct ioat_chan_common *chan) writeb(IOAT_CHANCMD_SUSPEND, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); } +static inline void ioat_reset(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + + writeb(IOAT_CHANCMD_RESET, chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); +} + +static inline bool ioat_reset_pending(struct ioat_chan_common *chan) +{ + u8 ver = chan->device->version; + u8 cmd; + + cmd = readb(chan->reg_base + IOAT_CHANCMD_OFFSET(ver)); + return (cmd & IOAT_CHANCMD_RESET) == IOAT_CHANCMD_RESET; +} + static inline void ioat_set_chainaddr(struct ioat_dma_chan *ioat, u64 addr) { struct ioat_chan_common *chan = &ioat->base; diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c index 8f1f7f0..5f7a500 100644 --- a/drivers/dma/ioat/dma_v2.c +++ b/drivers/dma/ioat/dma_v2.c @@ -239,20 +239,50 @@ void __ioat2_restart_chan(struct ioat2_dma_chan *ioat) __ioat2_start_null_desc(ioat); } -static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo) { - struct ioat_chan_common *chan = &ioat->base; - unsigned long phys_complete; + unsigned long end = jiffies + tmo; + int err = 0; u32 status; status = ioat_chansts(chan); if (is_ioat_active(status) || is_ioat_idle(status)) ioat_suspend(chan); while (is_ioat_active(status) || is_ioat_idle(status)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } status = ioat_chansts(chan); cpu_relax(); } + return err; +} + +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo) +{ + unsigned long end = jiffies + tmo; + int err = 0; + + ioat_reset(chan); + while (ioat_reset_pending(chan)) { + if (end && time_after(jiffies, end)) { + err = -ETIMEDOUT; + break; + } + cpu_relax(); + } + + return err; +} + +static void ioat2_restart_channel(struct ioat2_dma_chan *ioat) +{ + struct ioat_chan_common *chan = &ioat->base; + unsigned long phys_complete; + + ioat2_quiesce(chan, 0); if (ioat_cleanup_preamble(chan, &phys_complete)) __cleanup(ioat, phys_complete); @@ -318,6 +348,19 @@ void ioat2_timer_event(unsigned long data) spin_unlock_bh(&chan->cleanup_lock); } +static int ioat2_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it initialized */ + u32 chanerr; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + /** * ioat2_enumerate_channels - find and initialize the device's channels * @device: the device to be enumerated @@ -360,6 +403,10 @@ int ioat2_enumerate_channels(struct ioatdma_device *device) (unsigned long) ioat); ioat->xfercap_log = xfercap_log; spin_lock_init(&ioat->ring_lock); + if (device->reset_hw(&ioat->base)) { + i = 0; + break; + } } dma->chancnt = i; return i; @@ -467,7 +514,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) struct ioat2_dma_chan *ioat = to_ioat2_chan(c); struct ioat_chan_common *chan = &ioat->base; struct ioat_ring_ent **ring; - u32 chanerr; int order; /* have we already been set up? */ @@ -477,12 +523,6 @@ int ioat2_alloc_chan_resources(struct dma_chan *c) /* Setup register to interrupt and write completion status on error */ writew(IOAT_CHANCTRL_RUN, chan->reg_base + IOAT_CHANCTRL_OFFSET); - chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); - if (chanerr) { - dev_err(to_dev(chan), "CHANERR = %x, clearing\n", chanerr); - writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); - } - /* allocate a completion writeback area */ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ chan->completion = pci_pool_alloc(chan->device->completion_pool, @@ -746,13 +786,7 @@ void ioat2_free_chan_resources(struct dma_chan *c) tasklet_disable(&chan->cleanup_task); del_timer_sync(&chan->timer); device->cleanup_tasklet((unsigned long) ioat); - - /* Delay 100ms after reset to allow internal DMA logic to quiesce - * before removing DMA descriptor resources. - */ - writeb(IOAT_CHANCMD_RESET, - chan->reg_base + IOAT_CHANCMD_OFFSET(chan->device->version)); - mdelay(100); + device->reset_hw(chan); spin_lock_bh(&ioat->ring_lock); descs = ioat2_ring_space(ioat); @@ -839,6 +873,7 @@ int __devinit ioat2_dma_probe(struct ioatdma_device *device, int dca) int err; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat2_reset_hw; device->cleanup_tasklet = ioat2_cleanup_tasklet; device->timer_fn = ioat2_timer_event; device->self_test = ioat_dma_self_test; diff --git a/drivers/dma/ioat/dma_v2.h b/drivers/dma/ioat/dma_v2.h index 1d849ef..3afad8d 100644 --- a/drivers/dma/ioat/dma_v2.h +++ b/drivers/dma/ioat/dma_v2.h @@ -185,6 +185,8 @@ bool reshape_ring(struct ioat2_dma_chan *ioat, int order); void __ioat2_issue_pending(struct ioat2_dma_chan *ioat); void ioat2_cleanup_tasklet(unsigned long data); void ioat2_timer_event(unsigned long data); +int ioat2_quiesce(struct ioat_chan_common *chan, unsigned long tmo); +int ioat2_reset_sync(struct ioat_chan_common *chan, unsigned long tmo); extern struct kobj_type ioat2_ktype; extern struct kmem_cache *ioat2_cache; #endif /* IOATDMA_V2_H */ diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c index e58ecb2..9908c9e 100644 --- a/drivers/dma/ioat/dma_v3.c +++ b/drivers/dma/ioat/dma_v3.c @@ -1130,6 +1130,45 @@ static int __devinit ioat3_dma_self_test(struct ioatdma_device *device) return 0; } +static int ioat3_reset_hw(struct ioat_chan_common *chan) +{ + /* throw away whatever the channel was doing and get it + * initialized, with ioat3 specific workarounds + */ + struct ioatdma_device *device = chan->device; + struct pci_dev *pdev = device->pdev; + u32 chanerr; + u16 dev_id; + int err; + + ioat2_quiesce(chan, msecs_to_jiffies(100)); + + chanerr = readl(chan->reg_base + IOAT_CHANERR_OFFSET); + writel(chanerr, chan->reg_base + IOAT_CHANERR_OFFSET); + + /* -= IOAT ver.3 workarounds =- */ + /* Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3, and clear any + * pending errors + */ + pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); + err = pci_read_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, &chanerr); + if (err) { + dev_err(&pdev->dev, "channel error register unreachable\n"); + return err; + } + pci_write_config_dword(pdev, IOAT_PCI_CHANERR_INT_OFFSET, chanerr); + + /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) + pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); + + return ioat2_reset_sync(chan, msecs_to_jiffies(200)); +} + int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) { struct pci_dev *pdev = device->pdev; @@ -1139,10 +1178,10 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) struct ioat_chan_common *chan; bool is_raid_device = false; int err; - u16 dev_id; u32 cap; device->enumerate_channels = ioat2_enumerate_channels; + device->reset_hw = ioat3_reset_hw; device->self_test = ioat3_dma_self_test; dma = &device->common; dma->device_prep_dma_memcpy = ioat2_dma_prep_memcpy_lock; @@ -1218,19 +1257,6 @@ int __devinit ioat3_dma_probe(struct ioatdma_device *device, int dca) dma->device_prep_dma_xor_val = NULL; #endif - /* -= IOAT ver.3 workarounds =- */ - /* Write CHANERRMSK_INT with 3E07h to mask out the errors - * that can cause stability issues for IOAT ver.3 - */ - pci_write_config_dword(pdev, IOAT_PCI_CHANERRMASK_INT_OFFSET, 0x3e07); - - /* Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit - * (workaround for spurious config parity error after restart) - */ - pci_read_config_word(pdev, IOAT_PCI_DEVICE_ID_OFFSET, &dev_id); - if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) - pci_write_config_dword(pdev, IOAT_PCI_DMAUNCERRSTS_OFFSET, 0x10); - err = ioat_probe(device); if (err) return err; diff --git a/drivers/dma/ioat/registers.h b/drivers/dma/ioat/registers.h index f015ec1..e8ae63b 100644 --- a/drivers/dma/ioat/registers.h +++ b/drivers/dma/ioat/registers.h @@ -27,6 +27,7 @@ #define IOAT_PCI_DEVICE_ID_OFFSET 0x02 #define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERR_INT_OFFSET 0x180 #define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 /* MMIO Device Registers */ -- cgit v1.1 From 1e9d1b13efae7e0a2705611d47ae5f07e27015f0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sat, 19 Dec 2009 08:30:30 +0100 Subject: drivers/dma: drop unnecesary memset memset of 0 is not needed after kzalloc The semantic patch that makes this change is as follows: (http://coccinelle.lip6.fr/) // @@ expression x; statement S; @@ x = kzalloc(...); if (x == NULL) S ... when != x -memset(x,0,...);// Signed-off-by: Julia Lawall Signed-off-by: Dan Williams --- drivers/dma/dw_dmac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c index 2eea823..53bb3ca 100644 --- a/drivers/dma/dw_dmac.c +++ b/drivers/dma/dw_dmac.c @@ -1270,8 +1270,6 @@ static int __init dw_probe(struct platform_device *pdev) goto err_kfree; } - memset(dw, 0, sizeof *dw); - dw->regs = ioremap(io->start, DW_REGLEN); if (!dw->regs) { err = -ENOMEM; -- cgit v1.1 From 0794ec8ce327ec74416b569b8fb1951274693700 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Tue, 22 Dec 2009 21:30:59 +0100 Subject: drivers/dma: Correct use after free Move the kfree after the iounmap that refers to the same structure. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // @@ expression x,e; identifier f; iterator I; statement S; @@ *kfree(x); ... when != &x when != x = e when != I(x,...) S *x->f // Signed-off-by: Julia Lawall Signed-off-by: Dan Williams --- drivers/dma/coh901318.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c index 4a99cd9..b5f2ee0 100644 --- a/drivers/dma/coh901318.c +++ b/drivers/dma/coh901318.c @@ -1294,8 +1294,8 @@ static int __exit coh901318_remove(struct platform_device *pdev) dma_async_device_unregister(&base->dma_slave); coh901318_pool_destroy(&base->pool); free_irq(platform_get_irq(pdev, 0), base); - kfree(base); iounmap(base->virtbase); + kfree(base); release_mem_region(pdev->resource->start, resource_size(pdev->resource)); return 0; -- cgit v1.1