diff options
33 files changed, 4590 insertions, 339 deletions
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c index 021d512..604f44f 100644 --- a/arch/avr32/mach-at32ap/at32ap700x.c +++ b/arch/avr32/mach-at32ap/at32ap700x.c @@ -7,6 +7,7 @@ */ #include <linux/clk.h> #include <linux/delay.h> +#include <linux/dw_dmac.h> #include <linux/fb.h> #include <linux/init.h> #include <linux/platform_device.h> @@ -594,6 +595,17 @@ static void __init genclk_init_parent(struct clk *clk) clk->parent = parent; } +static struct dw_dma_platform_data dw_dmac0_data = { + .nr_channels = 3, +}; + +static struct resource dw_dmac0_resource[] = { + PBMEM(0xff200000), + IRQ(2), +}; +DEFINE_DEV_DATA(dw_dmac, 0); +DEV_CLK(hclk, dw_dmac0, hsb, 10); + /* -------------------------------------------------------------------- * System peripherals * -------------------------------------------------------------------- */ @@ -708,17 +720,6 @@ static struct clk pico_clk = { .users = 1, }; -static struct resource dmaca0_resource[] = { - { - .start = 0xff200000, - .end = 0xff20ffff, - .flags = IORESOURCE_MEM, - }, - IRQ(2), -}; -DEFINE_DEV(dmaca, 0); -DEV_CLK(hclk, dmaca0, hsb, 10); - /* -------------------------------------------------------------------- * HMATRIX * -------------------------------------------------------------------- */ @@ -831,7 +832,7 @@ void __init at32_add_system_devices(void) platform_device_register(&at32_eic0_device); platform_device_register(&smc0_device); platform_device_register(&pdc_device); - platform_device_register(&dmaca0_device); + platform_device_register(&dw_dmac0_device); platform_device_register(&at32_tcb0_device); platform_device_register(&at32_tcb1_device); @@ -2032,7 +2033,7 @@ struct clk *at32_clock_list[] = { &smc0_mck, &pdc_hclk, &pdc_pclk, - &dmaca0_hclk, + &dw_dmac0_hclk, &pico_clk, &pio0_mck, &pio1_mck, diff --git a/crypto/async_tx/async_memcpy.c b/crypto/async_tx/async_memcpy.c index a5eda80..ddccfb0 100644 --- a/crypto/async_tx/async_memcpy.c +++ b/crypto/async_tx/async_memcpy.c @@ -73,15 +73,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, pr_debug("%s: (sync) len: %zu\n", __func__, len); /* wait for any prerequisite operations */ - if (depend_tx) { - /* if ack is already set then we cannot be sure - * we are referring to the correct operation - */ - BUG_ON(async_tx_test_ack(depend_tx)); - if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) - panic("%s: DMA_ERROR waiting for depend_tx\n", - __func__); - } + async_tx_quiesce(&depend_tx); dest_buf = kmap_atomic(dest, KM_USER0) + dest_offset; src_buf = kmap_atomic(src, KM_USER1) + src_offset; @@ -91,7 +83,7 @@ async_memcpy(struct page *dest, struct page *src, unsigned int dest_offset, kunmap_atomic(dest_buf, KM_USER0); kunmap_atomic(src_buf, KM_USER1); - async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + async_tx_sync_epilog(cb_fn, cb_param); } return tx; diff --git a/crypto/async_tx/async_memset.c b/crypto/async_tx/async_memset.c index f5ff3906..5b5eb99 100644 --- a/crypto/async_tx/async_memset.c +++ b/crypto/async_tx/async_memset.c @@ -72,19 +72,11 @@ async_memset(struct page *dest, int val, unsigned int offset, dest_buf = (void *) (((char *) page_address(dest)) + offset); /* wait for any prerequisite operations */ - if (depend_tx) { - /* if ack is already set then we cannot be sure - * we are referring to the correct operation - */ - BUG_ON(depend_tx->ack); - if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) - panic("%s: DMA_ERROR waiting for depend_tx\n", - __func__); - } + async_tx_quiesce(&depend_tx); memset(dest_buf, val, len); - async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + async_tx_sync_epilog(cb_fn, cb_param); } return tx; diff --git a/crypto/async_tx/async_tx.c b/crypto/async_tx/async_tx.c index 095c798..85eaf7b 100644 --- a/crypto/async_tx/async_tx.c +++ b/crypto/async_tx/async_tx.c @@ -295,7 +295,7 @@ dma_channel_add_remove(struct dma_client *client, case DMA_RESOURCE_REMOVED: found = 0; spin_lock_irqsave(&async_tx_lock, flags); - list_for_each_entry_rcu(ref, &async_tx_master_list, node) + list_for_each_entry(ref, &async_tx_master_list, node) if (ref->chan == chan) { /* permit backing devices to go away */ dma_chan_put(ref->chan); @@ -608,23 +608,34 @@ async_trigger_callback(enum async_tx_flags flags, pr_debug("%s: (sync)\n", __func__); /* wait for any prerequisite operations */ - if (depend_tx) { - /* if ack is already set then we cannot be sure - * we are referring to the correct operation - */ - BUG_ON(async_tx_test_ack(depend_tx)); - if (dma_wait_for_async_tx(depend_tx) == DMA_ERROR) - panic("%s: DMA_ERROR waiting for depend_tx\n", - __func__); - } + async_tx_quiesce(&depend_tx); - async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + async_tx_sync_epilog(cb_fn, cb_param); } return tx; } EXPORT_SYMBOL_GPL(async_trigger_callback); +/** + * async_tx_quiesce - ensure tx is complete and freeable upon return + * @tx - transaction to quiesce + */ +void async_tx_quiesce(struct dma_async_tx_descriptor **tx) +{ + if (*tx) { + /* if ack is already set then we cannot be sure + * we are referring to the correct operation + */ + BUG_ON(async_tx_test_ack(*tx)); + if (dma_wait_for_async_tx(*tx) == DMA_ERROR) + panic("DMA_ERROR waiting for transaction\n"); + async_tx_ack(*tx); + *tx = NULL; + } +} +EXPORT_SYMBOL_GPL(async_tx_quiesce); + module_init(async_tx_init); module_exit(async_tx_exit); diff --git a/crypto/async_tx/async_xor.c b/crypto/async_tx/async_xor.c index 3a0dddc..65974c6 100644 --- a/crypto/async_tx/async_xor.c +++ b/crypto/async_tx/async_xor.c @@ -35,74 +35,121 @@ * when CONFIG_DMA_ENGINE=n */ static __always_inline struct dma_async_tx_descriptor * -do_async_xor(struct dma_device *device, - struct dma_chan *chan, struct page *dest, struct page **src_list, - unsigned int offset, unsigned int src_cnt, size_t len, - enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) +do_async_xor(struct dma_chan *chan, struct page *dest, struct page **src_list, + unsigned int offset, int src_cnt, size_t len, + enum async_tx_flags flags, + struct dma_async_tx_descriptor *depend_tx, + dma_async_tx_callback cb_fn, void *cb_param) { - dma_addr_t dma_dest; + struct dma_device *dma = chan->device; dma_addr_t *dma_src = (dma_addr_t *) src_list; - struct dma_async_tx_descriptor *tx; + struct dma_async_tx_descriptor *tx = NULL; + int src_off = 0; int i; - unsigned long dma_prep_flags = cb_fn ? DMA_PREP_INTERRUPT : 0; - - pr_debug("%s: len: %zu\n", __func__, len); - - dma_dest = dma_map_page(device->dev, dest, offset, len, - DMA_FROM_DEVICE); + dma_async_tx_callback _cb_fn; + void *_cb_param; + enum async_tx_flags async_flags; + enum dma_ctrl_flags dma_flags; + int xor_src_cnt; + dma_addr_t dma_dest; + dma_dest = dma_map_page(dma->dev, dest, offset, len, DMA_FROM_DEVICE); for (i = 0; i < src_cnt; i++) - dma_src[i] = dma_map_page(device->dev, src_list[i], offset, + dma_src[i] = dma_map_page(dma->dev, src_list[i], offset, len, DMA_TO_DEVICE); - /* Since we have clobbered the src_list we are committed - * to doing this asynchronously. Drivers force forward progress - * in case they can not provide a descriptor - */ - tx = device->device_prep_dma_xor(chan, dma_dest, dma_src, src_cnt, len, - dma_prep_flags); - if (!tx) { - if (depend_tx) - dma_wait_for_async_tx(depend_tx); - - while (!tx) - tx = device->device_prep_dma_xor(chan, dma_dest, - dma_src, src_cnt, len, - dma_prep_flags); - } + while (src_cnt) { + async_flags = flags; + dma_flags = 0; + xor_src_cnt = min(src_cnt, dma->max_xor); + /* if we are submitting additional xors, leave the chain open, + * clear the callback parameters, and leave the destination + * buffer mapped + */ + if (src_cnt > xor_src_cnt) { + async_flags &= ~ASYNC_TX_ACK; + dma_flags = DMA_COMPL_SKIP_DEST_UNMAP; + _cb_fn = NULL; + _cb_param = NULL; + } else { + _cb_fn = cb_fn; + _cb_param = cb_param; + } + if (_cb_fn) + dma_flags |= DMA_PREP_INTERRUPT; - async_tx_submit(chan, tx, flags, depend_tx, cb_fn, cb_param); + /* Since we have clobbered the src_list we are committed + * to doing this asynchronously. Drivers force forward progress + * in case they can not provide a descriptor + */ + tx = dma->device_prep_dma_xor(chan, dma_dest, &dma_src[src_off], + xor_src_cnt, len, dma_flags); + + if (unlikely(!tx)) + async_tx_quiesce(&depend_tx); + + /* spin wait for the preceeding transactions to complete */ + while (unlikely(!tx)) { + dma_async_issue_pending(chan); + tx = dma->device_prep_dma_xor(chan, dma_dest, + &dma_src[src_off], + xor_src_cnt, len, + dma_flags); + } + + async_tx_submit(chan, tx, async_flags, depend_tx, _cb_fn, + _cb_param); + + depend_tx = tx; + flags |= ASYNC_TX_DEP_ACK; + + if (src_cnt > xor_src_cnt) { + /* drop completed sources */ + src_cnt -= xor_src_cnt; + src_off += xor_src_cnt; + + /* use the intermediate result a source */ + dma_src[--src_off] = dma_dest; + src_cnt++; + } else + break; + } return tx; } static void do_sync_xor(struct page *dest, struct page **src_list, unsigned int offset, - unsigned int src_cnt, size_t len, enum async_tx_flags flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_param) + int src_cnt, size_t len, enum async_tx_flags flags, + dma_async_tx_callback cb_fn, void *cb_param) { - void *_dest; int i; - - pr_debug("%s: len: %zu\n", __func__, len); + int xor_src_cnt; + int src_off = 0; + void *dest_buf; + void **srcs = (void **) src_list; /* reuse the 'src_list' array to convert to buffer pointers */ for (i = 0; i < src_cnt; i++) - src_list[i] = (struct page *) - (page_address(src_list[i]) + offset); + srcs[i] = page_address(src_list[i]) + offset; /* set destination address */ - _dest = page_address(dest) + offset; + dest_buf = page_address(dest) + offset; if (flags & ASYNC_TX_XOR_ZERO_DST) - memset(_dest, 0, len); + memset(dest_buf, 0, len); - xor_blocks(src_cnt, len, _dest, - (void **) src_list); + while (src_cnt > 0) { + /* process up to 'MAX_XOR_BLOCKS' sources */ + xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); + xor_blocks(xor_src_cnt, len, dest_buf, &srcs[src_off]); - async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + /* drop completed sources */ + src_cnt -= xor_src_cnt; + src_off += xor_src_cnt; + } + + async_tx_sync_epilog(cb_fn, cb_param); } /** @@ -132,106 +179,34 @@ async_xor(struct page *dest, struct page **src_list, unsigned int offset, struct dma_chan *chan = async_tx_find_channel(depend_tx, DMA_XOR, &dest, 1, src_list, src_cnt, len); - struct dma_device *device = chan ? chan->device : NULL; - struct dma_async_tx_descriptor *tx = NULL; - dma_async_tx_callback _cb_fn; - void *_cb_param; - unsigned long local_flags; - int xor_src_cnt; - int i = 0, src_off = 0; - BUG_ON(src_cnt <= 1); - while (src_cnt) { - local_flags = flags; - if (device) { /* run the xor asynchronously */ - xor_src_cnt = min(src_cnt, device->max_xor); - /* if we are submitting additional xors - * only set the callback on the last transaction - */ - if (src_cnt > xor_src_cnt) { - local_flags &= ~ASYNC_TX_ACK; - _cb_fn = NULL; - _cb_param = NULL; - } else { - _cb_fn = cb_fn; - _cb_param = cb_param; - } - - tx = do_async_xor(device, chan, dest, - &src_list[src_off], offset, - xor_src_cnt, len, local_flags, - depend_tx, _cb_fn, _cb_param); - } else { /* run the xor synchronously */ - /* in the sync case the dest is an implied source - * (assumes the dest is at the src_off index) - */ - if (flags & ASYNC_TX_XOR_DROP_DST) { - src_cnt--; - src_off++; - } - - /* process up to 'MAX_XOR_BLOCKS' sources */ - xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS); - - /* if we are submitting additional xors - * only set the callback on the last transaction - */ - if (src_cnt > xor_src_cnt) { - local_flags &= ~ASYNC_TX_ACK; - _cb_fn = NULL; - _cb_param = NULL; - } else { - _cb_fn = cb_fn; - _cb_param = cb_param; - } - - /* wait for any prerequisite operations */ - if (depend_tx) { - /* if ack is already set then we cannot be sure - * we are referring to the correct operation - */ - BUG_ON(async_tx_test_ack(depend_tx)); - if (dma_wait_for_async_tx(depend_tx) == - DMA_ERROR) - panic("%s: DMA_ERROR waiting for " - "depend_tx\n", - __func__); - } - - do_sync_xor(dest, &src_list[src_off], offset, - xor_src_cnt, len, local_flags, depend_tx, - _cb_fn, _cb_param); - } + if (chan) { + /* run the xor asynchronously */ + pr_debug("%s (async): len: %zu\n", __func__, len); - /* the previous tx is hidden from the client, - * so ack it - */ - if (i && depend_tx) - async_tx_ack(depend_tx); + return do_async_xor(chan, dest, src_list, offset, src_cnt, len, + flags, depend_tx, cb_fn, cb_param); + } else { + /* run the xor synchronously */ + pr_debug("%s (sync): len: %zu\n", __func__, len); - depend_tx = tx; + /* in the sync case the dest is an implied source + * (assumes the dest is the first source) + */ + if (flags & ASYNC_TX_XOR_DROP_DST) { + src_cnt--; + src_list++; + } - if (src_cnt > xor_src_cnt) { - /* drop completed sources */ - src_cnt -= xor_src_cnt; - src_off += xor_src_cnt; + /* wait for any prerequisite operations */ + async_tx_quiesce(&depend_tx); - /* unconditionally preserve the destination */ - flags &= ~ASYNC_TX_XOR_ZERO_DST; + do_sync_xor(dest, src_list, offset, src_cnt, len, + flags, cb_fn, cb_param); - /* use the intermediate result a source, but remember - * it's dropped, because it's implied, in the sync case - */ - src_list[--src_off] = dest; - src_cnt++; - flags |= ASYNC_TX_XOR_DROP_DST; - } else - src_cnt = 0; - i++; + return NULL; } - - return tx; } EXPORT_SYMBOL_GPL(async_xor); @@ -285,11 +260,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, len, result, dma_prep_flags); - if (!tx) { - if (depend_tx) - dma_wait_for_async_tx(depend_tx); + if (unlikely(!tx)) { + async_tx_quiesce(&depend_tx); while (!tx) + dma_async_issue_pending(chan); tx = device->device_prep_dma_zero_sum(chan, dma_src, src_cnt, len, result, dma_prep_flags); @@ -307,18 +282,11 @@ async_xor_zero_sum(struct page *dest, struct page **src_list, tx = async_xor(dest, src_list, offset, src_cnt, len, xor_flags, depend_tx, NULL, NULL); - if (tx) { - if (dma_wait_for_async_tx(tx) == DMA_ERROR) - panic("%s: DMA_ERROR waiting for tx\n", - __func__); - async_tx_ack(tx); - } + async_tx_quiesce(&tx); *result = page_is_zero(dest, offset, len) ? 0 : 1; - tx = NULL; - - async_tx_sync_epilog(flags, depend_tx, cb_fn, cb_param); + async_tx_sync_epilog(cb_fn, cb_param); } return tx; diff --git a/drivers/dca/dca-core.c b/drivers/dca/dca-core.c index bf5b92f..ec249d2 100644 --- a/drivers/dca/dca-core.c +++ b/drivers/dca/dca-core.c @@ -28,13 +28,29 @@ #include <linux/device.h> #include <linux/dca.h> -MODULE_LICENSE("GPL"); +#define DCA_VERSION "1.4" -/* For now we're assuming a single, global, DCA provider for the system. */ +MODULE_VERSION(DCA_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); static DEFINE_SPINLOCK(dca_lock); -static struct dca_provider *global_dca = NULL; +static LIST_HEAD(dca_providers); + +static struct dca_provider *dca_find_provider_by_dev(struct device *dev) +{ + struct dca_provider *dca, *ret = NULL; + + list_for_each_entry(dca, &dca_providers, node) { + if ((!dev) || (dca->ops->dev_managed(dca, dev))) { + ret = dca; + break; + } + } + + return ret; +} /** * dca_add_requester - add a dca client to the list @@ -42,25 +58,39 @@ static struct dca_provider *global_dca = NULL; */ int dca_add_requester(struct device *dev) { - int err, slot; + struct dca_provider *dca; + int err, slot = -ENODEV; - if (!global_dca) - return -ENODEV; + if (!dev) + return -EFAULT; spin_lock(&dca_lock); - slot = global_dca->ops->add_requester(global_dca, dev); - spin_unlock(&dca_lock); - if (slot < 0) + + /* check if the requester has not been added already */ + dca = dca_find_provider_by_dev(dev); + if (dca) { + spin_unlock(&dca_lock); + return -EEXIST; + } + + list_for_each_entry(dca, &dca_providers, node) { + slot = dca->ops->add_requester(dca, dev); + if (slot >= 0) + break; + } + if (slot < 0) { + spin_unlock(&dca_lock); return slot; + } - err = dca_sysfs_add_req(global_dca, dev, slot); + err = dca_sysfs_add_req(dca, dev, slot); if (err) { - spin_lock(&dca_lock); - global_dca->ops->remove_requester(global_dca, dev); + dca->ops->remove_requester(dca, dev); spin_unlock(&dca_lock); return err; } + spin_unlock(&dca_lock); return 0; } EXPORT_SYMBOL_GPL(dca_add_requester); @@ -71,30 +101,78 @@ EXPORT_SYMBOL_GPL(dca_add_requester); */ int dca_remove_requester(struct device *dev) { + struct dca_provider *dca; int slot; - if (!global_dca) - return -ENODEV; + + if (!dev) + return -EFAULT; spin_lock(&dca_lock); - slot = global_dca->ops->remove_requester(global_dca, dev); - spin_unlock(&dca_lock); - if (slot < 0) + dca = dca_find_provider_by_dev(dev); + if (!dca) { + spin_unlock(&dca_lock); + return -ENODEV; + } + slot = dca->ops->remove_requester(dca, dev); + if (slot < 0) { + spin_unlock(&dca_lock); return slot; + } - dca_sysfs_remove_req(global_dca, slot); + dca_sysfs_remove_req(dca, slot); + + spin_unlock(&dca_lock); return 0; } EXPORT_SYMBOL_GPL(dca_remove_requester); /** - * dca_get_tag - return the dca tag for the given cpu + * dca_common_get_tag - return the dca tag (serves both new and old api) + * @dev - the device that wants dca service * @cpu - the cpuid as returned by get_cpu() */ -u8 dca_get_tag(int cpu) +u8 dca_common_get_tag(struct device *dev, int cpu) { - if (!global_dca) + struct dca_provider *dca; + u8 tag; + + spin_lock(&dca_lock); + + dca = dca_find_provider_by_dev(dev); + if (!dca) { + spin_unlock(&dca_lock); return -ENODEV; - return global_dca->ops->get_tag(global_dca, cpu); + } + tag = dca->ops->get_tag(dca, dev, cpu); + + spin_unlock(&dca_lock); + return tag; +} + +/** + * dca3_get_tag - return the dca tag to the requester device + * for the given cpu (new api) + * @dev - the device that wants dca service + * @cpu - the cpuid as returned by get_cpu() + */ +u8 dca3_get_tag(struct device *dev, int cpu) +{ + if (!dev) + return -EFAULT; + + return dca_common_get_tag(dev, cpu); +} +EXPORT_SYMBOL_GPL(dca3_get_tag); + +/** + * dca_get_tag - return the dca tag for the given cpu (old api) + * @cpu - the cpuid as returned by get_cpu() + */ +u8 dca_get_tag(int cpu) +{ + struct device *dev = NULL; + + return dca_common_get_tag(dev, cpu); } EXPORT_SYMBOL_GPL(dca_get_tag); @@ -140,12 +218,10 @@ int register_dca_provider(struct dca_provider *dca, struct device *dev) { int err; - if (global_dca) - return -EEXIST; err = dca_sysfs_add_provider(dca, dev); if (err) return err; - global_dca = dca; + list_add(&dca->node, &dca_providers); blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_ADD, NULL); return 0; @@ -158,11 +234,9 @@ EXPORT_SYMBOL_GPL(register_dca_provider); */ void unregister_dca_provider(struct dca_provider *dca) { - if (!global_dca) - return; blocking_notifier_call_chain(&dca_provider_chain, DCA_PROVIDER_REMOVE, NULL); - global_dca = NULL; + list_del(&dca->node); dca_sysfs_remove_provider(dca); } EXPORT_SYMBOL_GPL(unregister_dca_provider); @@ -187,6 +261,7 @@ EXPORT_SYMBOL_GPL(dca_unregister_notify); static int __init dca_init(void) { + printk(KERN_ERR "dca service started, version %s\n", DCA_VERSION); return dca_sysfs_init(); } diff --git a/drivers/dca/dca-sysfs.c b/drivers/dca/dca-sysfs.c index 9a70377..7af4b40 100644 --- a/drivers/dca/dca-sysfs.c +++ b/drivers/dca/dca-sysfs.c @@ -13,10 +13,11 @@ static spinlock_t dca_idr_lock; int dca_sysfs_add_req(struct dca_provider *dca, struct device *dev, int slot) { struct device *cd; + static int req_count; cd = device_create_drvdata(dca_class, dca->cd, MKDEV(0, slot + 1), NULL, - "requester%d", slot); + "requester%d", req_count++); if (IS_ERR(cd)) return PTR_ERR(cd); return 0; diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig index 6239c3d..cd30390 100644 --- a/drivers/dma/Kconfig +++ b/drivers/dma/Kconfig @@ -4,13 +4,14 @@ menuconfig DMADEVICES bool "DMA Engine support" - depends on (PCI && X86) || ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX || PPC - depends on !HIGHMEM64G + depends on !HIGHMEM64G && HAS_DMA help DMA engines can do asynchronous data transfers without involving the host CPU. Currently, this framework can be used to offload memory copies in the network stack and - RAID operations in the MD driver. + RAID operations in the MD driver. This menu only presents + DMA Device drivers supported by the configured arch, it may + be empty in some cases. if DMADEVICES @@ -37,6 +38,15 @@ config INTEL_IOP_ADMA help Enable support for the Intel(R) IOP Series RAID engines. +config DW_DMAC + tristate "Synopsys DesignWare AHB DMA support" + depends on AVR32 + select DMA_ENGINE + default y if CPU_AT32AP7000 + help + Support the Synopsys DesignWare AHB DMA controller. This + can be integrated in chips such as the Atmel AT32ap7000. + config FSL_DMA bool "Freescale MPC85xx/MPC83xx DMA support" depends on PPC @@ -46,6 +56,14 @@ config FSL_DMA MPC8560/40, MPC8555, MPC8548 and MPC8641 processors. The MPC8349, MPC8360 is also supported. +config MV_XOR + bool "Marvell XOR engine support" + depends on PLAT_ORION + select ASYNC_CORE + select DMA_ENGINE + ---help--- + Enable support for the Marvell XOR engine. + config DMA_ENGINE bool @@ -55,10 +73,19 @@ comment "DMA Clients" config NET_DMA bool "Network: TCP receive copy offload" depends on DMA_ENGINE && NET + default (INTEL_IOATDMA || FSL_DMA) help This enables the use of DMA engines in the network stack to offload receive copy-to-user operations, freeing CPU cycles. - Since this is the main user of the DMA engine, it should be enabled; - say Y here. + + Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise + say N. + +config DMATEST + tristate "DMA Test client" + depends on DMA_ENGINE + help + Simple DMA test client. Say N unless you're debugging a + DMA Device driver. endif diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile index c8036d9..14f5952 100644 --- a/drivers/dma/Makefile +++ b/drivers/dma/Makefile @@ -1,6 +1,9 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o obj-$(CONFIG_NET_DMA) += iovlock.o +obj-$(CONFIG_DMATEST) += dmatest.o obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o obj-$(CONFIG_FSL_DMA) += fsldma.o +obj-$(CONFIG_MV_XOR) += mv_xor.o +obj-$(CONFIG_DW_DMAC) += dw_dmac.o diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c index 97b329e..dc003a3 100644 --- a/drivers/dma/dmaengine.c +++ b/drivers/dma/dmaengine.c @@ -169,12 +169,18 @@ static void dma_client_chan_alloc(struct dma_client *client) enum dma_state_client ack; /* Find a channel */ - list_for_each_entry(device, &dma_device_list, global_node) + list_for_each_entry(device, &dma_device_list, global_node) { + /* Does the client require a specific DMA controller? */ + if (client->slave && client->slave->dma_dev + && client->slave->dma_dev != device->dev) + continue; + list_for_each_entry(chan, &device->channels, device_node) { if (!dma_chan_satisfies_mask(chan, client->cap_mask)) continue; - desc = chan->device->device_alloc_chan_resources(chan); + desc = chan->device->device_alloc_chan_resources( + chan, client); if (desc >= 0) { ack = client->event_callback(client, chan, @@ -183,12 +189,14 @@ static void dma_client_chan_alloc(struct dma_client *client) /* we are done once this client rejects * an available resource */ - if (ack == DMA_ACK) + if (ack == DMA_ACK) { dma_chan_get(chan); - else if (ack == DMA_NAK) + chan->client_count++; + } else if (ack == DMA_NAK) return; } } + } } enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) @@ -272,8 +280,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan) /* client was holding resources for this channel so * free it */ - if (ack == DMA_ACK) + if (ack == DMA_ACK) { dma_chan_put(chan); + chan->client_count--; + } } mutex_unlock(&dma_list_mutex); @@ -285,6 +295,10 @@ static void dma_clients_notify_removed(struct dma_chan *chan) */ void dma_async_client_register(struct dma_client *client) { + /* validate client data */ + BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) && + !client->slave); + mutex_lock(&dma_list_mutex); list_add_tail(&client->global_node, &dma_client_list); mutex_unlock(&dma_list_mutex); @@ -313,8 +327,10 @@ void dma_async_client_unregister(struct dma_client *client) ack = client->event_callback(client, chan, DMA_RESOURCE_REMOVED); - if (ack == DMA_ACK) + if (ack == DMA_ACK) { dma_chan_put(chan); + chan->client_count--; + } } list_del(&client->global_node); @@ -359,6 +375,10 @@ int dma_async_device_register(struct dma_device *device) !device->device_prep_dma_memset); BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && !device->device_prep_dma_interrupt); + BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && + !device->device_prep_slave_sg); + BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && + !device->device_terminate_all); BUG_ON(!device->device_alloc_chan_resources); BUG_ON(!device->device_free_chan_resources); @@ -378,7 +398,7 @@ int dma_async_device_register(struct dma_device *device) chan->chan_id = chancnt++; chan->dev.class = &dma_devclass; - chan->dev.parent = NULL; + chan->dev.parent = device->dev; snprintf(chan->dev.bus_id, BUS_ID_SIZE, "dma%dchan%d", device->dev_id, chan->chan_id); @@ -394,6 +414,7 @@ int dma_async_device_register(struct dma_device *device) kref_get(&device->refcount); kref_get(&device->refcount); kref_init(&chan->refcount); + chan->client_count = 0; chan->slow_ref = 0; INIT_RCU_HEAD(&chan->rcu); } diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c new file mode 100644 index 0000000..a08d197 --- /dev/null +++ b/drivers/dma/dmatest.c @@ -0,0 +1,444 @@ +/* + * DMA Engine test module + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/init.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/wait.h> + +static unsigned int test_buf_size = 16384; +module_param(test_buf_size, uint, S_IRUGO); +MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer"); + +static char test_channel[BUS_ID_SIZE]; +module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO); +MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)"); + +static char test_device[BUS_ID_SIZE]; +module_param_string(device, test_device, sizeof(test_device), S_IRUGO); +MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)"); + +static unsigned int threads_per_chan = 1; +module_param(threads_per_chan, uint, S_IRUGO); +MODULE_PARM_DESC(threads_per_chan, + "Number of threads to start per channel (default: 1)"); + +static unsigned int max_channels; +module_param(max_channels, uint, S_IRUGO); +MODULE_PARM_DESC(nr_channels, + "Maximum number of channels to use (default: all)"); + +/* + * Initialization patterns. All bytes in the source buffer has bit 7 + * set, all bytes in the destination buffer has bit 7 cleared. + * + * Bit 6 is set for all bytes which are to be copied by the DMA + * engine. Bit 5 is set for all bytes which are to be overwritten by + * the DMA engine. + * + * The remaining bits are the inverse of a counter which increments by + * one for each byte address. + */ +#define PATTERN_SRC 0x80 +#define PATTERN_DST 0x00 +#define PATTERN_COPY 0x40 +#define PATTERN_OVERWRITE 0x20 +#define PATTERN_COUNT_MASK 0x1f + +struct dmatest_thread { + struct list_head node; + struct task_struct *task; + struct dma_chan *chan; + u8 *srcbuf; + u8 *dstbuf; +}; + +struct dmatest_chan { + struct list_head node; + struct dma_chan *chan; + struct list_head threads; +}; + +/* + * These are protected by dma_list_mutex since they're only used by + * the DMA client event callback + */ +static LIST_HEAD(dmatest_channels); +static unsigned int nr_channels; + +static bool dmatest_match_channel(struct dma_chan *chan) +{ + if (test_channel[0] == '\0') + return true; + return strcmp(chan->dev.bus_id, test_channel) == 0; +} + +static bool dmatest_match_device(struct dma_device *device) +{ + if (test_device[0] == '\0') + return true; + return strcmp(device->dev->bus_id, test_device) == 0; +} + +static unsigned long dmatest_random(void) +{ + unsigned long buf; + + get_random_bytes(&buf, sizeof(buf)); + return buf; +} + +static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < start; i++) + buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); + for ( ; i < start + len; i++) + buf[i] = PATTERN_SRC | PATTERN_COPY + | (~i & PATTERN_COUNT_MASK);; + for ( ; i < test_buf_size; i++) + buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); +} + +static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < start; i++) + buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); + for ( ; i < start + len; i++) + buf[i] = PATTERN_DST | PATTERN_OVERWRITE + | (~i & PATTERN_COUNT_MASK); + for ( ; i < test_buf_size; i++) + buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); +} + +static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, + unsigned int counter, bool is_srcbuf) +{ + u8 diff = actual ^ pattern; + u8 expected = pattern | (~counter & PATTERN_COUNT_MASK); + const char *thread_name = current->comm; + + if (is_srcbuf) + pr_warning("%s: srcbuf[0x%x] overwritten!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if ((pattern & PATTERN_COPY) + && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) + pr_warning("%s: dstbuf[0x%x] not copied!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if (diff & PATTERN_SRC) + pr_warning("%s: dstbuf[0x%x] was copied!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else + pr_warning("%s: dstbuf[0x%x] mismatch!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); +} + +static unsigned int dmatest_verify(u8 *buf, unsigned int start, + unsigned int end, unsigned int counter, u8 pattern, + bool is_srcbuf) +{ + unsigned int i; + unsigned int error_count = 0; + u8 actual; + + for (i = start; i < end; i++) { + actual = buf[i]; + if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) { + if (error_count < 32) + dmatest_mismatch(actual, pattern, i, counter, + is_srcbuf); + error_count++; + } + counter++; + } + + if (error_count > 32) + pr_warning("%s: %u errors suppressed\n", + current->comm, error_count - 32); + + return error_count; +} + +/* + * This function repeatedly tests DMA transfers of various lengths and + * offsets until it is told to exit by kthread_stop(). There may be + * multiple threads running this function in parallel for a single + * channel, and there may be multiple channels being tested in + * parallel. + * + * Before each test, the source and destination buffer is initialized + * with a known pattern. This pattern is different depending on + * whether it's in an area which is supposed to be copied or + * overwritten, and different in the source and destination buffers. + * So if the DMA engine doesn't copy exactly what we tell it to copy, + * we'll notice. + */ +static int dmatest_func(void *data) +{ + struct dmatest_thread *thread = data; + struct dma_chan *chan; + const char *thread_name; + unsigned int src_off, dst_off, len; + unsigned int error_count; + unsigned int failed_tests = 0; + unsigned int total_tests = 0; + dma_cookie_t cookie; + enum dma_status status; + int ret; + + thread_name = current->comm; + + ret = -ENOMEM; + thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL); + if (!thread->srcbuf) + goto err_srcbuf; + thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL); + if (!thread->dstbuf) + goto err_dstbuf; + + smp_rmb(); + chan = thread->chan; + dma_chan_get(chan); + + while (!kthread_should_stop()) { + total_tests++; + + len = dmatest_random() % test_buf_size + 1; + src_off = dmatest_random() % (test_buf_size - len + 1); + dst_off = dmatest_random() % (test_buf_size - len + 1); + + dmatest_init_srcbuf(thread->srcbuf, src_off, len); + dmatest_init_dstbuf(thread->dstbuf, dst_off, len); + + cookie = dma_async_memcpy_buf_to_buf(chan, + thread->dstbuf + dst_off, + thread->srcbuf + src_off, + len); + if (dma_submit_error(cookie)) { + pr_warning("%s: #%u: submit error %d with src_off=0x%x " + "dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, cookie, + src_off, dst_off, len); + msleep(100); + failed_tests++; + continue; + } + dma_async_memcpy_issue_pending(chan); + + do { + msleep(1); + status = dma_async_memcpy_complete( + chan, cookie, NULL, NULL); + } while (status == DMA_IN_PROGRESS); + + if (status == DMA_ERROR) { + pr_warning("%s: #%u: error during copy\n", + thread_name, total_tests - 1); + failed_tests++; + continue; + } + + error_count = 0; + + pr_debug("%s: verifying source buffer...\n", thread_name); + error_count += dmatest_verify(thread->srcbuf, 0, src_off, + 0, PATTERN_SRC, true); + error_count += dmatest_verify(thread->srcbuf, src_off, + src_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, true); + error_count += dmatest_verify(thread->srcbuf, src_off + len, + test_buf_size, src_off + len, + PATTERN_SRC, true); + + pr_debug("%s: verifying dest buffer...\n", + thread->task->comm); + error_count += dmatest_verify(thread->dstbuf, 0, dst_off, + 0, PATTERN_DST, false); + error_count += dmatest_verify(thread->dstbuf, dst_off, + dst_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, false); + error_count += dmatest_verify(thread->dstbuf, dst_off + len, + test_buf_size, dst_off + len, + PATTERN_DST, false); + + if (error_count) { + pr_warning("%s: #%u: %u errors with " + "src_off=0x%x dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, error_count, + src_off, dst_off, len); + failed_tests++; + } else { + pr_debug("%s: #%u: No errors with " + "src_off=0x%x dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, + src_off, dst_off, len); + } + } + + ret = 0; + dma_chan_put(chan); + kfree(thread->dstbuf); +err_dstbuf: + kfree(thread->srcbuf); +err_srcbuf: + pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", + thread_name, total_tests, failed_tests, ret); + return ret; +} + +static void dmatest_cleanup_channel(struct dmatest_chan *dtc) +{ + struct dmatest_thread *thread; + struct dmatest_thread *_thread; + int ret; + + list_for_each_entry_safe(thread, _thread, &dtc->threads, node) { + ret = kthread_stop(thread->task); + pr_debug("dmatest: thread %s exited with status %d\n", + thread->task->comm, ret); + list_del(&thread->node); + kfree(thread); + } + kfree(dtc); +} + +static enum dma_state_client dmatest_add_channel(struct dma_chan *chan) +{ + struct dmatest_chan *dtc; + struct dmatest_thread *thread; + unsigned int i; + + dtc = kmalloc(sizeof(struct dmatest_chan), GFP_ATOMIC); + if (!dtc) { + pr_warning("dmatest: No memory for %s\n", chan->dev.bus_id); + return DMA_NAK; + } + + dtc->chan = chan; + INIT_LIST_HEAD(&dtc->threads); + + for (i = 0; i < threads_per_chan; i++) { + thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); + if (!thread) { + pr_warning("dmatest: No memory for %s-test%u\n", + chan->dev.bus_id, i); + break; + } + thread->chan = dtc->chan; + smp_wmb(); + thread->task = kthread_run(dmatest_func, thread, "%s-test%u", + chan->dev.bus_id, i); + if (IS_ERR(thread->task)) { + pr_warning("dmatest: Failed to run thread %s-test%u\n", + chan->dev.bus_id, i); + kfree(thread); + break; + } + + /* srcbuf and dstbuf are allocated by the thread itself */ + + list_add_tail(&thread->node, &dtc->threads); + } + + pr_info("dmatest: Started %u threads using %s\n", i, chan->dev.bus_id); + + list_add_tail(&dtc->node, &dmatest_channels); + nr_channels++; + + return DMA_ACK; +} + +static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan) +{ + struct dmatest_chan *dtc, *_dtc; + + list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) { + if (dtc->chan == chan) { + list_del(&dtc->node); + dmatest_cleanup_channel(dtc); + pr_debug("dmatest: lost channel %s\n", + chan->dev.bus_id); + return DMA_ACK; + } + } + + return DMA_DUP; +} + +/* + * Start testing threads as new channels are assigned to us, and kill + * them when the channels go away. + * + * When we unregister the client, all channels are removed so this + * will also take care of cleaning things up when the module is + * unloaded. + */ +static enum dma_state_client +dmatest_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + enum dma_state_client ack = DMA_NAK; + + switch (state) { + case DMA_RESOURCE_AVAILABLE: + if (!dmatest_match_channel(chan) + || !dmatest_match_device(chan->device)) + ack = DMA_DUP; + else if (max_channels && nr_channels >= max_channels) + ack = DMA_NAK; + else + ack = dmatest_add_channel(chan); + break; + + case DMA_RESOURCE_REMOVED: + ack = dmatest_remove_channel(chan); + break; + + default: + pr_info("dmatest: Unhandled event %u (%s)\n", + state, chan->dev.bus_id); + break; + } + + return ack; +} + +static struct dma_client dmatest_client = { + .event_callback = dmatest_event, +}; + +static int __init dmatest_init(void) +{ + dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask); + dma_async_client_register(&dmatest_client); + dma_async_client_chan_request(&dmatest_client); + + return 0; +} +module_init(dmatest_init); + +static void __exit dmatest_exit(void) +{ + dma_async_client_unregister(&dmatest_client); +} +module_exit(dmatest_exit); + +MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c new file mode 100644 index 0000000..94df917 --- /dev/null +++ b/drivers/dma/dw_dmac.c @@ -0,0 +1,1122 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on + * AVR32 systems.) + * + * Copyright (C) 2007-2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "dw_dmac_regs.h" + +/* + * This supports the Synopsys "DesignWare AHB Central DMA Controller", + * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all + * of which use ARM any more). See the "Databook" from Synopsys for + * information beyond what licensees probably provide. + * + * The driver has currently been tested only with the Atmel AT32AP7000, + * which does not support descriptor writeback. + */ + +/* NOTE: DMS+SMS is system-specific. We should get this information + * from the platform code somehow. + */ +#define DWC_DEFAULT_CTLLO (DWC_CTLL_DST_MSIZE(0) \ + | DWC_CTLL_SRC_MSIZE(0) \ + | DWC_CTLL_DMS(0) \ + | DWC_CTLL_SMS(1) \ + | DWC_CTLL_LLP_D_EN \ + | DWC_CTLL_LLP_S_EN) + +/* + * This is configuration-dependent and usually a funny size like 4095. + * Let's round it down to the nearest power of two. + * + * Note that this is a transfer count, i.e. if we transfer 32-bit + * words, we can do 8192 bytes per descriptor. + * + * This parameter is also system-specific. + */ +#define DWC_MAX_COUNT 2048U + +/* + * Number of descriptors to allocate for each channel. This should be + * made configurable somehow; preferably, the clients (at least the + * ones using slave transfers) should be able to give us a hint. + */ +#define NR_DESCS_PER_CHANNEL 64 + +/*----------------------------------------------------------------------*/ + +/* + * Because we're not relying on writeback from the controller (it may not + * even be configured into the core!) we don't need to use dma_pool. These + * descriptors -- and associated data -- are cacheable. We do need to make + * sure their dcache entries are written back before handing them off to + * the controller, though. + */ + +static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) +{ + return list_entry(dwc->active_list.next, struct dw_desc, desc_node); +} + +static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc) +{ + return list_entry(dwc->queue.next, struct dw_desc, desc_node); +} + +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + struct dw_desc *ret = NULL; + unsigned int i = 0; + + spin_lock_bh(&dwc->lock); + list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc); + i++; + } + spin_unlock_bh(&dwc->lock); + + dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i); + + return ret; +} + +static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + struct dw_desc *child; + + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + dma_sync_single_for_cpu(dwc->chan.dev.parent, + child->txd.phys, sizeof(child->lli), + DMA_TO_DEVICE); + dma_sync_single_for_cpu(dwc->chan.dev.parent, + desc->txd.phys, sizeof(desc->lli), + DMA_TO_DEVICE); +} + +/* + * Move a descriptor, including any children, to the free list. + * `desc' must not be on any lists. + */ +static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + if (desc) { + struct dw_desc *child; + + dwc_sync_desc_for_cpu(dwc, desc); + + spin_lock_bh(&dwc->lock); + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + dev_vdbg(&dwc->chan.dev, + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->txd.tx_list, &dwc->free_list); + dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc); + list_add(&desc->desc_node, &dwc->free_list); + spin_unlock_bh(&dwc->lock); + } +} + +/* Called with dwc->lock held and bh disabled */ +static dma_cookie_t +dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + dma_cookie_t cookie = dwc->chan.cookie; + + if (++cookie < 0) + cookie = 1; + + dwc->chan.cookie = cookie; + desc->txd.cookie = cookie; + + return cookie; +} + +/*----------------------------------------------------------------------*/ + +/* Called with dwc->lock held and bh disabled */ +static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(&dwc->chan.dev, + "BUG: Attempted to start non-idle channel\n"); + dev_err(&dwc->chan.dev, + " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", + channel_readl(dwc, SAR), + channel_readl(dwc, DAR), + channel_readl(dwc, LLP), + channel_readl(dwc, CTL_HI), + channel_readl(dwc, CTL_LO)); + + /* The tasklet will hopefully advance the queue... */ + return; + } + + channel_writel(dwc, LLP, first->txd.phys); + channel_writel(dwc, CTL_LO, + DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); +} + +/*----------------------------------------------------------------------*/ + +static void +dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + dma_async_tx_callback callback; + void *param; + struct dma_async_tx_descriptor *txd = &desc->txd; + + dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie); + + dwc->completed = txd->cookie; + callback = txd->callback; + param = txd->callback_param; + + dwc_sync_desc_for_cpu(dwc, desc); + list_splice_init(&txd->tx_list, &dwc->free_list); + list_move(&desc->desc_node, &dwc->free_list); + + /* + * We use dma_unmap_page() regardless of how the buffers were + * mapped before they were submitted... + */ + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) + dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len, + DMA_FROM_DEVICE); + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) + dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len, + DMA_TO_DEVICE); + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); +} + +static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(&dwc->chan.dev, + "BUG: XFER bit set, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + } + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + if (!list_empty(&dwc->queue)) + dwc_dostart(dwc, dwc_first_queued(dwc)); + list_splice_init(&dwc->active_list, &list); + list_splice_init(&dwc->queue, &dwc->active_list); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc); +} + +static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + dma_addr_t llp; + struct dw_desc *desc, *_desc; + struct dw_desc *child; + u32 status_xfer; + + /* + * Clear block interrupt flag before scanning so that we don't + * miss any, and read LLP before RAW_XFER to ensure it is + * valid if we decide to scan the list. + */ + dma_writel(dw, CLEAR.BLOCK, dwc->mask); + llp = channel_readl(dwc, LLP); + status_xfer = dma_readl(dw, RAW.XFER); + + if (status_xfer & dwc->mask) { + /* Everything we've submitted is done */ + dma_writel(dw, CLEAR.XFER, dwc->mask); + dwc_complete_all(dw, dwc); + return; + } + + dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp); + + list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + if (desc->lli.llp == llp) + /* This one is currently in progress */ + return; + + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + if (child->lli.llp == llp) + /* Currently in progress */ + return; + + /* + * No descriptors so far seem to be in progress, i.e. + * this one must be done. + */ + dwc_descriptor_complete(dwc, desc); + } + + dev_err(&dwc->chan.dev, + "BUG: All descriptors done, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + if (!list_empty(&dwc->queue)) { + dwc_dostart(dwc, dwc_first_queued(dwc)); + list_splice_init(&dwc->queue, &dwc->active_list); + } +} + +static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) +{ + dev_printk(KERN_CRIT, &dwc->chan.dev, + " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", + lli->sar, lli->dar, lli->llp, + lli->ctlhi, lli->ctllo); +} + +static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *bad_desc; + struct dw_desc *child; + + dwc_scan_descriptors(dw, dwc); + + /* + * The descriptor currently at the head of the active list is + * borked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + bad_desc = dwc_first_active(dwc); + list_del_init(&bad_desc->desc_node); + list_splice_init(&dwc->queue, dwc->active_list.prev); + + /* Clear the error flag and try to restart the controller */ + dma_writel(dw, CLEAR.ERROR, dwc->mask); + if (!list_empty(&dwc->active_list)) + dwc_dostart(dwc, dwc_first_active(dwc)); + + /* + * KERN_CRITICAL may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_printk(KERN_CRIT, &dwc->chan.dev, + "Bad descriptor submitted for DMA!\n"); + dev_printk(KERN_CRIT, &dwc->chan.dev, + " cookie: %d\n", bad_desc->txd.cookie); + dwc_dump_lli(dwc, &bad_desc->lli); + list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + dwc_dump_lli(dwc, &child->lli); + + /* Pretend the descriptor completed successfully */ + dwc_descriptor_complete(dwc, bad_desc); +} + +static void dw_dma_tasklet(unsigned long data) +{ + struct dw_dma *dw = (struct dw_dma *)data; + struct dw_dma_chan *dwc; + u32 status_block; + u32 status_xfer; + u32 status_err; + int i; + + status_block = dma_readl(dw, RAW.BLOCK); + status_xfer = dma_readl(dw, RAW.BLOCK); + status_err = dma_readl(dw, RAW.ERROR); + + dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n", + status_block, status_err); + + for (i = 0; i < dw->dma.chancnt; i++) { + dwc = &dw->chan[i]; + spin_lock(&dwc->lock); + if (status_err & (1 << i)) + dwc_handle_error(dw, dwc); + else if ((status_block | status_xfer) & (1 << i)) + dwc_scan_descriptors(dw, dwc); + spin_unlock(&dwc->lock); + } + + /* + * Re-enable interrupts. Block Complete interrupts are only + * enabled if the INT_EN bit in the descriptor is set. This + * will trigger a scan before the whole list is done. + */ + channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); +} + +static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) +{ + struct dw_dma *dw = dev_id; + u32 status; + + dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n", + dma_readl(dw, STATUS_INT)); + + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + status = dma_readl(dw, STATUS_INT); + if (status) { + dev_err(dw->dma.dev, + "BUG: Unexpected interrupts pending: 0x%x\n", + status); + + /* Try to recover */ + channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); + channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); + } + + tasklet_schedule(&dw->tasklet); + + return IRQ_HANDLED; +} + +/*----------------------------------------------------------------------*/ + +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&dwc->lock); + cookie = dwc_assign_cookie(dwc, desc); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + if (list_empty(&dwc->active_list)) { + dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n", + desc->txd.cookie); + dwc_dostart(dwc, desc); + list_add_tail(&desc->desc_node, &dwc->active_list); + } else { + dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n", + desc->txd.cookie); + + list_add_tail(&desc->desc_node, &dwc->queue); + } + + spin_unlock_bh(&dwc->lock); + + return cookie; +} + +static struct dma_async_tx_descriptor * +dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_desc *desc; + struct dw_desc *first; + struct dw_desc *prev; + size_t xfer_count; + size_t offset; + unsigned int src_width; + unsigned int dst_width; + u32 ctllo; + + dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n", + dest, src, len, flags); + + if (unlikely(!len)) { + dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + /* + * We can be a lot more clever here, but this should take care + * of the most common optimization. + */ + if (!((src | dest | len) & 3)) + src_width = dst_width = 2; + else if (!((src | dest | len) & 1)) + src_width = dst_width = 1; + else + src_width = dst_width = 0; + + ctllo = DWC_DEFAULT_CTLLO + | DWC_CTLL_DST_WIDTH(dst_width) + | DWC_CTLL_SRC_WIDTH(src_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_INC + | DWC_CTLL_FC_M2M; + prev = first = NULL; + + for (offset = 0; offset < len; offset += xfer_count << src_width) { + xfer_count = min_t(size_t, (len - offset) >> src_width, + DWC_MAX_COUNT); + + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + + desc->lli.sar = src + offset; + desc->lli.dar = dest + offset; + desc->lli.ctllo = ctllo; + desc->lli.ctlhi = xfer_count; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + } + + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + prev->lli.ctllo |= DWC_CTLL_INT_EN; + + prev->lli.llp = 0; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +static struct dma_async_tx_descriptor * +dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma_slave *dws = dwc->dws; + struct dw_desc *prev; + struct dw_desc *first; + u32 ctllo; + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(&chan->dev, "prep_dma_slave\n"); + + if (unlikely(!dws || !sg_len)) + return NULL; + + reg_width = dws->slave.reg_width; + prev = first = NULL; + + sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction); + + switch (direction) { + case DMA_TO_DEVICE: + ctllo = (DWC_DEFAULT_CTLLO + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC + | DWC_CTLL_FC_M2P); + reg = dws->slave.tx_reg; + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len; + u32 mem; + + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(&chan->dev, + "not enough descriptors available\n"); + goto err_desc_get; + } + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.sar = mem; + desc->lli.dar = reg; + desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); + desc->lli.ctlhi = len >> mem_width; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, + sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + total_len += len; + } + break; + case DMA_FROM_DEVICE: + ctllo = (DWC_DEFAULT_CTLLO + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX + | DWC_CTLL_FC_P2M); + + reg = dws->slave.rx_reg; + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len; + u32 mem; + + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(&chan->dev, + "not enough descriptors available\n"); + goto err_desc_get; + } + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.sar = reg; + desc->lli.dar = mem; + desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); + desc->lli.ctlhi = len >> reg_width; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, + sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + total_len += len; + } + break; + default: + return NULL; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + prev->lli.ctllo |= DWC_CTLL_INT_EN; + + prev->lli.llp = 0; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + + first->len = total_len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +static void dwc_terminate_all(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_bh(&dwc->lock); + + channel_clear_bit(dw, CH_EN, dwc->mask); + + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + /* active_list entries will end up before queued entries */ + list_splice_init(&dwc->queue, &list); + list_splice_init(&dwc->active_list, &list); + + spin_unlock_bh(&dwc->lock); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc); +} + +static enum dma_status +dwc_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + int ret; + + last_complete = dwc->completed; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret != DMA_SUCCESS) { + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + + last_complete = dwc->completed; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + } + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return ret; +} + +static void dwc_issue_pending(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + spin_lock_bh(&dwc->lock); + if (!list_empty(&dwc->queue)) + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + spin_unlock_bh(&dwc->lock); +} + +static int dwc_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc; + struct dma_slave *slave; + struct dw_dma_slave *dws; + int i; + u32 cfghi; + u32 cfglo; + + dev_vdbg(&chan->dev, "alloc_chan_resources\n"); + + /* Channels doing slave DMA can only handle one client. */ + if (dwc->dws || client->slave) { + if (chan->client_count) + return -EBUSY; + } + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_dbg(&chan->dev, "DMA channel not idle?\n"); + return -EIO; + } + + dwc->completed = chan->cookie = 1; + + cfghi = DWC_CFGH_FIFO_MODE; + cfglo = 0; + + slave = client->slave; + if (slave) { + /* + * We need controller-specific data to set up slave + * transfers. + */ + BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev); + + dws = container_of(slave, struct dw_dma_slave, slave); + + dwc->dws = dws; + cfghi = dws->cfg_hi; + cfglo = dws->cfg_lo; + } else { + dwc->dws = NULL; + } + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); + + /* + * NOTE: some controllers may have additional features that we + * need to initialize here, like "scatter-gather" (which + * doesn't mean what you think it means), and status writeback. + */ + + spin_lock_bh(&dwc->lock); + i = dwc->descs_allocated; + while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { + spin_unlock_bh(&dwc->lock); + + desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL); + if (!desc) { + dev_info(&chan->dev, + "only allocated %d descriptors\n", i); + spin_lock_bh(&dwc->lock); + break; + } + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + INIT_LIST_HEAD(&desc->txd.tx_list); + desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli, + sizeof(desc->lli), DMA_TO_DEVICE); + dwc_desc_put(dwc, desc); + + spin_lock_bh(&dwc->lock); + i = ++dwc->descs_allocated; + } + + /* Enable interrupts */ + channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.BLOCK, dwc->mask); + channel_set_bit(dw, MASK.ERROR, dwc->mask); + + spin_unlock_bh(&dwc->lock); + + dev_dbg(&chan->dev, + "alloc_chan_resources allocated %d descriptors\n", i); + + return i; +} + +static void dwc_free_chan_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + + dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n", + dwc->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&dwc->active_list)); + BUG_ON(!list_empty(&dwc->queue)); + BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); + + spin_lock_bh(&dwc->lock); + list_splice_init(&dwc->free_list, &list); + dwc->descs_allocated = 0; + dwc->dws = NULL; + + /* Disable interrupts */ + channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); + channel_clear_bit(dw, MASK.ERROR, dwc->mask); + + spin_unlock_bh(&dwc->lock); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) { + dev_vdbg(&chan->dev, " freeing descriptor %p\n", desc); + dma_unmap_single(chan->dev.parent, desc->txd.phys, + sizeof(desc->lli), DMA_TO_DEVICE); + kfree(desc); + } + + dev_vdbg(&chan->dev, "free_chan_resources done\n"); +} + +/*----------------------------------------------------------------------*/ + +static void dw_dma_off(struct dw_dma *dw) +{ + dma_writel(dw, CFG, 0); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + while (dma_readl(dw, CFG) & DW_CFG_DMA_EN) + cpu_relax(); +} + +static int __init dw_probe(struct platform_device *pdev) +{ + struct dw_dma_platform_data *pdata; + struct resource *io; + struct dw_dma *dw; + size_t size; + int irq; + int err; + int i; + + pdata = pdev->dev.platform_data; + if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) + return -EINVAL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + size = sizeof(struct dw_dma); + size += pdata->nr_channels * sizeof(struct dw_dma_chan); + dw = kzalloc(size, GFP_KERNEL); + if (!dw) + return -ENOMEM; + + if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) { + err = -EBUSY; + goto err_kfree; + } + + memset(dw, 0, sizeof *dw); + + dw->regs = ioremap(io->start, DW_REGLEN); + if (!dw->regs) { + err = -ENOMEM; + goto err_release_r; + } + + dw->clk = clk_get(&pdev->dev, "hclk"); + if (IS_ERR(dw->clk)) { + err = PTR_ERR(dw->clk); + goto err_clk; + } + clk_enable(dw->clk); + + /* force dma off, just in case */ + dw_dma_off(dw); + + err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw); + if (err) + goto err_irq; + + platform_set_drvdata(pdev, dw); + + tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw); + + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) { + struct dw_dma_chan *dwc = &dw->chan[i]; + + dwc->chan.device = &dw->dma; + dwc->chan.cookie = dwc->completed = 1; + dwc->chan.chan_id = i; + list_add_tail(&dwc->chan.device_node, &dw->dma.channels); + + dwc->ch_regs = &__dw_regs(dw)->CHAN[i]; + spin_lock_init(&dwc->lock); + dwc->mask = 1 << i; + + INIT_LIST_HEAD(&dwc->active_list); + INIT_LIST_HEAD(&dwc->queue); + INIT_LIST_HEAD(&dwc->free_list); + + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + /* Clear/disable all interrupts on all channels. */ + dma_writel(dw, CLEAR.XFER, dw->all_chan_mask); + dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask); + dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); + dw->dma.dev = &pdev->dev; + dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; + dw->dma.device_free_chan_resources = dwc_free_chan_resources; + + dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy; + + dw->dma.device_prep_slave_sg = dwc_prep_slave_sg; + dw->dma.device_terminate_all = dwc_terminate_all; + + dw->dma.device_is_tx_complete = dwc_is_tx_complete; + dw->dma.device_issue_pending = dwc_issue_pending; + + dma_writel(dw, CFG, DW_CFG_DMA_EN); + + printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n", + pdev->dev.bus_id, dw->dma.chancnt); + + dma_async_device_register(&dw->dma); + + return 0; + +err_irq: + clk_disable(dw->clk); + clk_put(dw->clk); +err_clk: + iounmap(dw->regs); + dw->regs = NULL; +err_release_r: + release_resource(io); +err_kfree: + kfree(dw); + return err; +} + +static int __exit dw_remove(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma_chan *dwc, *_dwc; + struct resource *io; + + dw_dma_off(dw); + dma_async_device_unregister(&dw->dma); + + free_irq(platform_get_irq(pdev, 0), dw); + tasklet_kill(&dw->tasklet); + + list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels, + chan.device_node) { + list_del(&dwc->chan.device_node); + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + clk_disable(dw->clk); + clk_put(dw->clk); + + iounmap(dw->regs); + dw->regs = NULL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(io->start, DW_REGLEN); + + kfree(dw); + + return 0; +} + +static void dw_shutdown(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); +} + +static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); + return 0; +} + +static int dw_resume_early(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + clk_enable(dw->clk); + dma_writel(dw, CFG, DW_CFG_DMA_EN); + return 0; + +} + +static struct platform_driver dw_driver = { + .remove = __exit_p(dw_remove), + .shutdown = dw_shutdown, + .suspend_late = dw_suspend_late, + .resume_early = dw_resume_early, + .driver = { + .name = "dw_dmac", + }, +}; + +static int __init dw_init(void) +{ + return platform_driver_probe(&dw_driver, dw_probe); +} +module_init(dw_init); + +static void __exit dw_exit(void) +{ + platform_driver_unregister(&dw_driver); +} +module_exit(dw_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); +MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>"); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h new file mode 100644 index 0000000..00fdd18 --- /dev/null +++ b/drivers/dma/dw_dmac_regs.h @@ -0,0 +1,225 @@ +/* + * Driver for the Synopsys DesignWare AHB DMA Controller + * + * Copyright (C) 2005-2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/dw_dmac.h> + +#define DW_DMA_MAX_NR_CHANNELS 8 + +/* + * Redefine this macro to handle differences between 32- and 64-bit + * addressing, big vs. little endian, etc. + */ +#define DW_REG(name) u32 name; u32 __pad_##name + +/* Hardware register definitions. */ +struct dw_dma_chan_regs { + DW_REG(SAR); /* Source Address Register */ + DW_REG(DAR); /* Destination Address Register */ + DW_REG(LLP); /* Linked List Pointer */ + u32 CTL_LO; /* Control Register Low */ + u32 CTL_HI; /* Control Register High */ + DW_REG(SSTAT); + DW_REG(DSTAT); + DW_REG(SSTATAR); + DW_REG(DSTATAR); + u32 CFG_LO; /* Configuration Register Low */ + u32 CFG_HI; /* Configuration Register High */ + DW_REG(SGR); + DW_REG(DSR); +}; + +struct dw_dma_irq_regs { + DW_REG(XFER); + DW_REG(BLOCK); + DW_REG(SRC_TRAN); + DW_REG(DST_TRAN); + DW_REG(ERROR); +}; + +struct dw_dma_regs { + /* per-channel registers */ + struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS]; + + /* irq handling */ + struct dw_dma_irq_regs RAW; /* r */ + struct dw_dma_irq_regs STATUS; /* r (raw & mask) */ + struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */ + struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */ + + DW_REG(STATUS_INT); /* r */ + + /* software handshaking */ + DW_REG(REQ_SRC); + DW_REG(REQ_DST); + DW_REG(SGL_REQ_SRC); + DW_REG(SGL_REQ_DST); + DW_REG(LAST_SRC); + DW_REG(LAST_DST); + + /* miscellaneous */ + DW_REG(CFG); + DW_REG(CH_EN); + DW_REG(ID); + DW_REG(TEST); + + /* optional encoded params, 0x3c8..0x3 */ +}; + +/* Bitfields in CTL_LO */ +#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4) +#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ +#define DWC_CTLL_DST_DEC (1<<7) +#define DWC_CTLL_DST_FIX (2<<7) +#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ +#define DWC_CTLL_SRC_DEC (1<<9) +#define DWC_CTLL_SRC_FIX (2<<9) +#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14) +#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */ +#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */ +#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */ +#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */ +#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */ +#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */ +/* plus 4 transfer types for peripheral-as-flow-controller */ +#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */ +#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */ +#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */ + +/* Bitfields in CTL_HI */ +#define DWC_CTLH_DONE 0x00001000 +#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff + +/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */ +#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ +#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */ +#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */ +#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */ +#define DWC_CFGL_MAX_BURST(x) ((x) << 20) +#define DWC_CFGL_RELOAD_SAR (1 << 30) +#define DWC_CFGL_RELOAD_DAR (1 << 31) + +/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */ +#define DWC_CFGH_DS_UPD_EN (1 << 5) +#define DWC_CFGH_SS_UPD_EN (1 << 6) + +/* Bitfields in SGR */ +#define DWC_SGR_SGI(x) ((x) << 0) +#define DWC_SGR_SGC(x) ((x) << 20) + +/* Bitfields in DSR */ +#define DWC_DSR_DSI(x) ((x) << 0) +#define DWC_DSR_DSC(x) ((x) << 20) + +/* Bitfields in CFG */ +#define DW_CFG_DMA_EN (1 << 0) + +#define DW_REGLEN 0x400 + +struct dw_dma_chan { + struct dma_chan chan; + void __iomem *ch_regs; + u8 mask; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + dma_cookie_t completed; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + + struct dw_dma_slave *dws; + + unsigned int descs_allocated; +}; + +static inline struct dw_dma_chan_regs __iomem * +__dwc_regs(struct dw_dma_chan *dwc) +{ + return dwc->ch_regs; +} + +#define channel_readl(dwc, name) \ + __raw_readl(&(__dwc_regs(dwc)->name)) +#define channel_writel(dwc, name, val) \ + __raw_writel((val), &(__dwc_regs(dwc)->name)) + +static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct dw_dma_chan, chan); +} + + +struct dw_dma { + struct dma_device dma; + void __iomem *regs; + struct tasklet_struct tasklet; + struct clk *clk; + + u8 all_chan_mask; + + struct dw_dma_chan chan[0]; +}; + +static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) +{ + return dw->regs; +} + +#define dma_readl(dw, name) \ + __raw_readl(&(__dw_regs(dw)->name)) +#define dma_writel(dw, name, val) \ + __raw_writel((val), &(__dw_regs(dw)->name)) + +#define channel_set_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | 0) + +static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct dw_dma, dma); +} + +/* LLI == Linked List Item; a.k.a. DMA block descriptor */ +struct dw_lli { + /* values that are not changed by hardware */ + dma_addr_t sar; + dma_addr_t dar; + dma_addr_t llp; /* chain to next lli */ + u32 ctllo; + /* values that may get written back: */ + u32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ + u32 sstat; + u32 dstat; +}; + +struct dw_desc { + /* FIRST values the hardware uses */ + struct dw_lli lli; + + /* THEN values for driver housekeeping */ + struct list_head desc_node; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +static inline struct dw_desc * +txd_to_dw_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct dw_desc, txd); +} diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c index 054eabf..c0059ca 100644 --- a/drivers/dma/fsldma.c +++ b/drivers/dma/fsldma.c @@ -366,7 +366,8 @@ static struct fsl_desc_sw *fsl_dma_alloc_descriptor( * * Return - The number of descriptors allocated. */ -static int fsl_dma_alloc_chan_resources(struct dma_chan *chan) +static int fsl_dma_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) { struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); LIST_HEAD(tmp_list); @@ -809,8 +810,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan) if (!src) { dev_err(fsl_chan->dev, "selftest: Cannot alloc memory for test!\n"); - err = -ENOMEM; - goto out; + return -ENOMEM; } dest = src + test_size; @@ -820,7 +820,7 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan) chan = &fsl_chan->common; - if (fsl_dma_alloc_chan_resources(chan) < 1) { + if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) { dev_err(fsl_chan->dev, "selftest: Cannot alloc resources for DMA\n"); err = -ENODEV; @@ -842,13 +842,13 @@ static int fsl_dma_self_test(struct fsl_dma_chan *fsl_chan) if (fsl_dma_is_complete(chan, cookie, NULL, NULL) != DMA_SUCCESS) { dev_err(fsl_chan->dev, "selftest: Time out!\n"); err = -ENODEV; - goto out; + goto free_resources; } /* Test free and re-alloc channel resources */ fsl_dma_free_chan_resources(chan); - if (fsl_dma_alloc_chan_resources(chan) < 1) { + if (fsl_dma_alloc_chan_resources(chan, NULL) < 1) { dev_err(fsl_chan->dev, "selftest: Cannot alloc resources for DMA\n"); err = -ENODEV; @@ -927,8 +927,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, if (!new_fsl_chan) { dev_err(&dev->dev, "No free memory for allocating " "dma channels!\n"); - err = -ENOMEM; - goto err; + return -ENOMEM; } /* get dma channel register base */ @@ -936,7 +935,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, if (err) { dev_err(&dev->dev, "Can't get %s property 'reg'\n", dev->node->full_name); - goto err; + goto err_no_reg; } new_fsl_chan->feature = *(u32 *)match->data; @@ -958,7 +957,7 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, dev_err(&dev->dev, "There is no %d channel!\n", new_fsl_chan->id); err = -EINVAL; - goto err; + goto err_no_chan; } fdev->chan[new_fsl_chan->id] = new_fsl_chan; tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet, @@ -997,23 +996,26 @@ static int __devinit of_fsl_dma_chan_probe(struct of_device *dev, if (err) { dev_err(&dev->dev, "DMA channel %s request_irq error " "with return %d\n", dev->node->full_name, err); - goto err; + goto err_no_irq; } } err = fsl_dma_self_test(new_fsl_chan); if (err) - goto err; + goto err_self_test; dev_info(&dev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id, match->compatible, new_fsl_chan->irq); return 0; -err: - dma_halt(new_fsl_chan); - iounmap(new_fsl_chan->reg_base); + +err_self_test: free_irq(new_fsl_chan->irq, new_fsl_chan); +err_no_irq: list_del(&new_fsl_chan->common.device_node); +err_no_chan: + iounmap(new_fsl_chan->reg_base); +err_no_reg: kfree(new_fsl_chan); return err; } @@ -1054,8 +1056,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL); if (!fdev) { dev_err(&dev->dev, "No enough memory for 'priv'\n"); - err = -ENOMEM; - goto err; + return -ENOMEM; } fdev->dev = &dev->dev; INIT_LIST_HEAD(&fdev->common.channels); @@ -1065,7 +1066,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, if (err) { dev_err(&dev->dev, "Can't get %s property 'reg'\n", dev->node->full_name); - goto err; + goto err_no_reg; } dev_info(&dev->dev, "Probe the Freescale DMA driver for %s " @@ -1103,6 +1104,7 @@ static int __devinit of_fsl_dma_probe(struct of_device *dev, err: iounmap(fdev->reg_base); +err_no_reg: kfree(fdev); return err; } diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c index 16e0fd8..9b16a3a 100644 --- a/drivers/dma/ioat.c +++ b/drivers/dma/ioat.c @@ -47,6 +47,16 @@ static struct pci_device_id ioat_pci_tbl[] = { /* I/OAT v2 platforms */ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, { 0, } }; @@ -83,6 +93,11 @@ static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase) if (device->dma && ioat_dca_enabled) device->dca = ioat2_dca_init(pdev, iobase); break; + case IOAT_VER_3_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat3_dca_init(pdev, iobase); + break; default: err = -ENODEV; break; diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c index 9e92276..6cf622d 100644 --- a/drivers/dma/ioat_dca.c +++ b/drivers/dma/ioat_dca.c @@ -37,12 +37,18 @@ #include "ioatdma_registers.h" /* - * Bit 16 of a tag map entry is the "valid" bit, if it is set then bits 0:15 + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 * contain the bit number of the APIC ID to map into the DCA tag. If the valid * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. */ #define DCA_TAG_MAP_VALID 0x80 +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 + +#define DCA_TAG_MAP_MASK 0xDF + /* * "Legacy" DCA systems do not implement the DCA register set in the * I/OAT device. Software needs direct support for their tag mappings. @@ -95,6 +101,7 @@ struct ioat_dca_slot { }; #define IOAT_DCA_MAX_REQ 6 +#define IOAT3_DCA_MAX_REQ 2 struct ioat_dca_priv { void __iomem *iobase; @@ -171,7 +178,9 @@ static int ioat_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) +static u8 ioat_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) { struct ioat_dca_priv *ioatdca = dca_priv(dca); int i, apic_id, bit, value; @@ -193,10 +202,26 @@ static u8 ioat_dca_get_tag(struct dca_provider *dca, int cpu) return tag; } +static int ioat_dca_dev_managed(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + pdev = to_pci_dev(dev); + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) + return 1; + } + return 0; +} + static struct dca_ops ioat_dca_ops = { .add_requester = ioat_dca_add_requester, .remove_requester = ioat_dca_remove_requester, .get_tag = ioat_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, }; @@ -207,6 +232,8 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) u8 *tag_map = NULL; int i; int err; + u8 version; + u8 max_requesters; if (!system_has_dca_enabled(pdev)) return NULL; @@ -237,15 +264,20 @@ struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) if (tag_map == NULL) return NULL; + version = readb(iobase + IOAT_VER_OFFSET); + if (version == IOAT_VER_3_0) + max_requesters = IOAT3_DCA_MAX_REQ; + else + max_requesters = IOAT_DCA_MAX_REQ; + dca = alloc_dca_provider(&ioat_dca_ops, sizeof(*ioatdca) + - (sizeof(struct ioat_dca_slot) * IOAT_DCA_MAX_REQ)); + (sizeof(struct ioat_dca_slot) * max_requesters)); if (!dca) return NULL; ioatdca = dca_priv(dca); - ioatdca->max_requesters = IOAT_DCA_MAX_REQ; - + ioatdca->max_requesters = max_requesters; ioatdca->dca_base = iobase + 0x54; /* copy over the APIC ID to DCA tag mapping */ @@ -323,11 +355,13 @@ static int ioat2_dca_remove_requester(struct dca_provider *dca, return -ENODEV; } -static u8 ioat2_dca_get_tag(struct dca_provider *dca, int cpu) +static u8 ioat2_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) { u8 tag; - tag = ioat_dca_get_tag(dca, cpu); + tag = ioat_dca_get_tag(dca, dev, cpu); tag = (~tag) & 0x1F; return tag; } @@ -336,6 +370,7 @@ static struct dca_ops ioat2_dca_ops = { .add_requester = ioat2_dca_add_requester, .remove_requester = ioat2_dca_remove_requester, .get_tag = ioat2_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, }; static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) @@ -425,3 +460,198 @@ struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) return dca; } + +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat3_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat3_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { + bit = entry & + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); + value = (apic_id & (1 << bit)) ? 1 : 0; + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; + value = (apic_id & (1 << bit)) ? 0 : 1; + } else { + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; + } + tag |= (value << i); + } + + return tag; +} + +static struct dca_ops ioat3_dca_ops = { + .add_requester = ioat3_dca_add_requester, + .remove_requester = ioat3_dca_remove_requester, + .get_tag = ioat3_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } tag_map; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat3_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map.low = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); + tag_map.high = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); + for (i = 0; i < 8; i++) { + bit = tag_map.full >> (8 * i); + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c index 318e8a2..a52156e 100644 --- a/drivers/dma/ioat_dma.c +++ b/drivers/dma/ioat_dma.c @@ -32,6 +32,7 @@ #include <linux/dmaengine.h> #include <linux/delay.h> #include <linux/dma-mapping.h> +#include <linux/workqueue.h> #include "ioatdma.h" #include "ioatdma_registers.h" #include "ioatdma_hw.h" @@ -41,11 +42,23 @@ #define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) #define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) static int ioat_pending_level = 4; module_param(ioat_pending_level, int, 0644); MODULE_PARM_DESC(ioat_pending_level, "high-water mark for pushing ioat descriptors (default: 4)"); +#define RESET_DELAY msecs_to_jiffies(100) +#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) +static void ioat_dma_chan_reset_part2(struct work_struct *work); +static void ioat_dma_chan_watchdog(struct work_struct *work); + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + /* internal functions */ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); @@ -122,6 +135,38 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) int i; struct ioat_dma_chan *ioat_chan; + /* + * IOAT ver.3 workarounds + */ + if (device->version == IOAT_VER_3_0) { + u32 chan_err_mask; + u16 dev_id; + u32 dmauncerrsts; + + /* + * Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + chan_err_mask = 0x3E07; + pci_write_config_dword(device->pdev, + IOAT_PCI_CHANERRMASK_INT_OFFSET, + chan_err_mask); + + /* + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(device->pdev, + IOAT_PCI_DEVICE_ID_OFFSET, + &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + dmauncerrsts = 0x10; + pci_write_config_dword(device->pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + dmauncerrsts); + } + } + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); @@ -137,6 +182,7 @@ static int ioat_dma_enumerate_channels(struct ioatdma_device *device) ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); ioat_chan->xfercap = xfercap; ioat_chan->desccount = 0; + INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); if (ioat_chan->device->version != IOAT_VER_1_2) { writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE | IOAT_DMA_DCA_ANY_CPU, @@ -175,7 +221,7 @@ static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - if (ioat_chan->pending != 0) { + if (ioat_chan->pending > 0) { spin_lock_bh(&ioat_chan->desc_lock); __ioat1_dma_memcpy_issue_pending(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); @@ -194,13 +240,228 @@ static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); - if (ioat_chan->pending != 0) { + if (ioat_chan->pending > 0) { spin_lock_bh(&ioat_chan->desc_lock); __ioat2_dma_memcpy_issue_pending(ioat_chan); spin_unlock_bh(&ioat_chan->desc_lock); } } + +/** + * ioat_dma_chan_reset_part2 - reinit the channel after a reset + */ +static void ioat_dma_chan_reset_part2(struct work_struct *work) +{ + struct ioat_dma_chan *ioat_chan = + container_of(work, struct ioat_dma_chan, work.work); + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->desc_lock); + + ioat_chan->completion_virt->low = 0; + ioat_chan->completion_virt->high = 0; + ioat_chan->pending = 0; + + /* + * count the descriptors waiting, and be sure to do it + * right for both the CB1 line and the CB2 ring + */ + ioat_chan->dmacount = 0; + if (ioat_chan->used_desc.prev) { + desc = to_ioat_desc(ioat_chan->used_desc.prev); + do { + ioat_chan->dmacount++; + desc = to_ioat_desc(desc->node.next); + } while (&desc->node != ioat_chan->used_desc.next); + } + + /* + * write the new starting descriptor address + * this puts channel engine into ARMED state + */ + desc = to_ioat_desc(ioat_chan->used_desc.prev); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + /* tell the engine to go with what's left to be done */ + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + + break; + } + dev_err(&ioat_chan->device->pdev->dev, + "chan%d reset - %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + + spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +/** + * ioat_dma_reset_channel - restart a channel + * @ioat_chan: IOAT DMA channel handle + */ +static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) +{ + u32 chansts, chanerr; + + if (!ioat_chan->used_desc.prev) + return; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + chansts = (ioat_chan->completion_virt->low + & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + if (chanerr) { + dev_err(&ioat_chan->device->pdev->dev, + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(ioat_chan), chansts, chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* + * whack it upside the head with a reset + * and wait for things to settle out. + * force the pending count to a really big negative + * to make sure no one forces an issue_pending + * while we're waiting. + */ + + spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->pending = INT_MIN; + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + spin_unlock_bh(&ioat_chan->desc_lock); + + /* schedule the 2nd half instead of sleeping a long time */ + schedule_delayed_work(&ioat_chan->work, RESET_DELAY); +} + +/** + * ioat_dma_chan_watchdog - watch for stuck channels + */ +static void ioat_dma_chan_watchdog(struct work_struct *work) +{ + struct ioatdma_device *device = + container_of(work, struct ioatdma_device, work.work); + struct ioat_dma_chan *ioat_chan; + int i; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } completion_hw; + unsigned long compl_desc_addr_hw; + + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + + if (ioat_chan->device->version == IOAT_VER_1_2 + /* have we started processing anything yet */ + && ioat_chan->last_completion + /* have we completed any since last watchdog cycle? */ + && (ioat_chan->last_completion == + ioat_chan->watchdog_completion) + /* has TCP stuck on one cookie since last watchdog? */ + && (ioat_chan->watchdog_tcp_cookie == + ioat_chan->watchdog_last_tcp_cookie) + && (ioat_chan->watchdog_tcp_cookie != + ioat_chan->completed_cookie) + /* is there something in the chain to be processed? */ + /* CB1 chain always has at least the last one processed */ + && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) + && ioat_chan->pending == 0) { + + /* + * check CHANSTS register for completed + * descriptor address. + * if it is different than completion writeback, + * it is not zero + * and it has changed since the last watchdog + * we can assume that channel + * is still working correctly + * and the problem is in completion writeback. + * update completion writeback + * with actual CHANSTS value + * else + * try resetting the channel + */ + + completion_hw.low = readl(ioat_chan->reg_base + + IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); + completion_hw.high = readl(ioat_chan->reg_base + + IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); +#if (BITS_PER_LONG == 64) + compl_desc_addr_hw = + completion_hw.full + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +#else + compl_desc_addr_hw = + completion_hw.low & IOAT_LOW_COMPLETION_MASK; +#endif + + if ((compl_desc_addr_hw != 0) + && (compl_desc_addr_hw != ioat_chan->watchdog_completion) + && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { + ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; + ioat_chan->completion_virt->low = completion_hw.low; + ioat_chan->completion_virt->high = completion_hw.high; + } else { + ioat_dma_reset_channel(ioat_chan); + ioat_chan->watchdog_completion = 0; + ioat_chan->last_compl_desc_addr_hw = 0; + } + + /* + * for version 2.0 if there are descriptors yet to be processed + * and the last completed hasn't changed since the last watchdog + * if they haven't hit the pending level + * issue the pending to push them through + * else + * try resetting the channel + */ + } else if (ioat_chan->device->version == IOAT_VER_2_0 + && ioat_chan->used_desc.prev + && ioat_chan->last_completion + && ioat_chan->last_completion == ioat_chan->watchdog_completion) { + + if (ioat_chan->pending < ioat_pending_level) + ioat2_dma_memcpy_issue_pending(&ioat_chan->common); + else { + ioat_dma_reset_channel(ioat_chan); + ioat_chan->watchdog_completion = 0; + } + } else { + ioat_chan->last_compl_desc_addr_hw = 0; + ioat_chan->watchdog_completion + = ioat_chan->last_completion; + } + + ioat_chan->watchdog_last_tcp_cookie = + ioat_chan->watchdog_tcp_cookie; + } + + schedule_delayed_work(&device->work, WATCHDOG_DELAY); +} + static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); @@ -250,6 +511,13 @@ static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) prev = new; } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (new->async_tx.callback) { hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; @@ -335,7 +603,14 @@ static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) desc_count++; } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); - hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; if (new->async_tx.callback) { hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; if (first != new) { @@ -406,6 +681,7 @@ static struct ioat_desc_sw *ioat_dma_alloc_descriptor( desc_sw->async_tx.tx_submit = ioat1_tx_submit; break; case IOAT_VER_2_0: + case IOAT_VER_3_0: desc_sw->async_tx.tx_submit = ioat2_tx_submit; break; } @@ -452,7 +728,8 @@ static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors * @chan: the channel to be filled out */ -static int ioat_dma_alloc_chan_resources(struct dma_chan *chan) +static int ioat_dma_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) { struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); struct ioat_desc_sw *desc; @@ -555,6 +832,7 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) } break; case IOAT_VER_2_0: + case IOAT_VER_3_0: list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { list_del(&desc->node); @@ -585,6 +863,10 @@ static void ioat_dma_free_chan_resources(struct dma_chan *chan) ioat_chan->last_completion = ioat_chan->completion_addr = 0; ioat_chan->pending = 0; ioat_chan->dmacount = 0; + ioat_chan->watchdog_completion = 0; + ioat_chan->last_compl_desc_addr_hw = 0; + ioat_chan->watchdog_tcp_cookie = + ioat_chan->watchdog_last_tcp_cookie = 0; } /** @@ -640,7 +922,8 @@ ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) /* set up the noop descriptor */ noop_desc = to_ioat_desc(ioat_chan->used_desc.next); - noop_desc->hw->size = 0; + /* set size to non-zero value (channel returns error when size is 0) */ + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; noop_desc->hw->src_addr = 0; noop_desc->hw->dst_addr = 0; @@ -690,6 +973,7 @@ static struct ioat_desc_sw *ioat_dma_get_next_descriptor( return ioat1_dma_get_next_descriptor(ioat_chan); break; case IOAT_VER_2_0: + case IOAT_VER_3_0: return ioat2_dma_get_next_descriptor(ioat_chan); break; } @@ -716,8 +1000,12 @@ static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( new->src = dma_src; new->async_tx.flags = flags; return &new->async_tx; - } else + } else { + dev_err(&ioat_chan->device->pdev->dev, + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); return NULL; + } } static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( @@ -744,8 +1032,13 @@ static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( new->src = dma_src; new->async_tx.flags = flags; return &new->async_tx; - } else + } else { + spin_unlock_bh(&ioat_chan->desc_lock); + dev_err(&ioat_chan->device->pdev->dev, + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); return NULL; + } } static void ioat_dma_cleanup_tasklet(unsigned long data) @@ -756,6 +1049,27 @@ static void ioat_dma_cleanup_tasklet(unsigned long data) chan->reg_base + IOAT_CHANCTRL_OFFSET); } +static void +ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) +{ + /* + * yes we are unmapping both _page and _single + * alloc'd regions with unmap_page. Is this + * *really* that bad? + */ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); +} + /** * ioat_dma_memcpy_cleanup - cleanup up finished descriptors * @chan: ioat channel to be cleaned up @@ -799,11 +1113,27 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) if (phys_complete == ioat_chan->last_completion) { spin_unlock_bh(&ioat_chan->cleanup_lock); + /* + * perhaps we're stuck so hard that the watchdog can't go off? + * try to catch it after 2 seconds + */ + if (ioat_chan->device->version != IOAT_VER_3_0) { + if (time_after(jiffies, + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); + ioat_chan->last_completion_time = jiffies; + } + } return; } + ioat_chan->last_completion_time = jiffies; cookie = 0; - spin_lock_bh(&ioat_chan->desc_lock); + if (!spin_trylock_bh(&ioat_chan->desc_lock)) { + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } + switch (ioat_chan->device->version) { case IOAT_VER_1_2: list_for_each_entry_safe(desc, _desc, @@ -816,21 +1146,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) */ if (desc->async_tx.cookie) { cookie = desc->async_tx.cookie; - - /* - * yes we are unmapping both _page and _single - * alloc'd regions with unmap_page. Is this - * *really* that bad? - */ - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - + ioat_dma_unmap(ioat_chan, desc); if (desc->async_tx.callback) { desc->async_tx.callback(desc->async_tx.callback_param); desc->async_tx.callback = NULL; @@ -862,6 +1178,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) } break; case IOAT_VER_2_0: + case IOAT_VER_3_0: /* has some other thread has already cleaned up? */ if (ioat_chan->used_desc.prev == NULL) break; @@ -889,16 +1206,7 @@ static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) if (desc->async_tx.cookie) { cookie = desc->async_tx.cookie; desc->async_tx.cookie = 0; - - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, dst), - pci_unmap_len(desc, len), - PCI_DMA_FROMDEVICE); - pci_unmap_page(ioat_chan->device->pdev, - pci_unmap_addr(desc, src), - pci_unmap_len(desc, len), - PCI_DMA_TODEVICE); - + ioat_dma_unmap(ioat_chan, desc); if (desc->async_tx.callback) { desc->async_tx.callback(desc->async_tx.callback_param); desc->async_tx.callback = NULL; @@ -943,6 +1251,7 @@ static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, last_used = chan->cookie; last_complete = ioat_chan->completed_cookie; + ioat_chan->watchdog_tcp_cookie = cookie; if (done) *done = last_complete; @@ -973,10 +1282,19 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) spin_lock_bh(&ioat_chan->desc_lock); desc = ioat_dma_get_next_descriptor(ioat_chan); + + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return; + } + desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL | IOAT_DMA_DESCRIPTOR_CTL_INT_GN | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; - desc->hw->size = 0; + /* set size to non-zero value (channel returns error when size is 0) */ + desc->hw->size = NULL_DESC_BUFFER_SIZE; desc->hw->src_addr = 0; desc->hw->dst_addr = 0; async_tx_ack(&desc->async_tx); @@ -994,6 +1312,7 @@ static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); break; case IOAT_VER_2_0: + case IOAT_VER_3_0: writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); writel(((u64) desc->async_tx.phys) >> 32, @@ -1049,7 +1368,7 @@ static int ioat_dma_self_test(struct ioatdma_device *device) dma_chan = container_of(device->common.channels.next, struct dma_chan, device_node); - if (device->common.device_alloc_chan_resources(dma_chan) < 1) { + if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) { dev_err(&device->pdev->dev, "selftest cannot allocate chan resource\n"); err = -ENODEV; @@ -1312,6 +1631,7 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, ioat1_dma_memcpy_issue_pending; break; case IOAT_VER_2_0: + case IOAT_VER_3_0: device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; device->common.device_issue_pending = ioat2_dma_memcpy_issue_pending; @@ -1331,8 +1651,16 @@ struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, if (err) goto err_self_test; + ioat_set_tcp_copy_break(device); + dma_async_device_register(&device->common); + if (device->version != IOAT_VER_3_0) { + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, + WATCHDOG_DELAY); + } + return device; err_self_test: @@ -1365,6 +1693,10 @@ void ioat_dma_remove(struct ioatdma_device *device) pci_release_regions(device->pdev); pci_disable_device(device->pdev); + if (device->version != IOAT_VER_3_0) { + cancel_delayed_work(&device->work); + } + list_for_each_entry_safe(chan, _chan, &device->common.channels, device_node) { ioat_chan = to_ioat_chan(chan); diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h index f2c7fed..a3306d0 100644 --- a/drivers/dma/ioatdma.h +++ b/drivers/dma/ioatdma.h @@ -27,8 +27,9 @@ #include <linux/dmapool.h> #include <linux/cache.h> #include <linux/pci_ids.h> +#include <net/tcp.h> -#define IOAT_DMA_VERSION "2.04" +#define IOAT_DMA_VERSION "3.30" enum ioat_interrupt { none = 0, @@ -40,6 +41,7 @@ enum ioat_interrupt { #define IOAT_LOW_COMPLETION_MASK 0xffffffc0 #define IOAT_DMA_DCA_ANY_CPU ~0 +#define IOAT_WATCHDOG_PERIOD (2 * HZ) /** @@ -62,6 +64,7 @@ struct ioatdma_device { struct dma_device common; u8 version; enum ioat_interrupt irq_mode; + struct delayed_work work; struct msix_entry msix_entries[4]; struct ioat_dma_chan *idx[4]; }; @@ -75,6 +78,7 @@ struct ioat_dma_chan { dma_cookie_t completed_cookie; unsigned long last_completion; + unsigned long last_completion_time; size_t xfercap; /* XFERCAP register value expanded out */ @@ -82,6 +86,10 @@ struct ioat_dma_chan { spinlock_t desc_lock; struct list_head free_desc; struct list_head used_desc; + unsigned long watchdog_completion; + int watchdog_tcp_cookie; + u32 watchdog_last_tcp_cookie; + struct delayed_work work; int pending; int dmacount; @@ -98,6 +106,7 @@ struct ioat_dma_chan { u32 high; }; } *completion_virt; + unsigned long last_compl_desc_addr_hw; struct tasklet_struct cleanup_task; }; @@ -121,17 +130,34 @@ struct ioat_desc_sw { struct dma_async_tx_descriptor async_tx; }; +static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +{ + #ifdef CONFIG_NET_DMA + switch (dev->version) { + case IOAT_VER_1_2: + case IOAT_VER_3_0: + sysctl_tcp_dma_copybreak = 4096; + break; + case IOAT_VER_2_0: + sysctl_tcp_dma_copybreak = 2048; + break; + } + #endif +} + #if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, void __iomem *iobase); void ioat_dma_remove(struct ioatdma_device *device); struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); #else #define ioat_dma_probe(pdev, iobase) NULL #define ioat_dma_remove(device) do { } while (0) #define ioat_dca_init(pdev, iobase) NULL #define ioat2_dca_init(pdev, iobase) NULL +#define ioat3_dca_init(pdev, iobase) NULL #endif #endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h index dd470fa..f1ae2c7 100644 --- a/drivers/dma/ioatdma_hw.h +++ b/drivers/dma/ioatdma_hw.h @@ -35,6 +35,7 @@ #define IOAT_PCI_SID 0x8086 #define IOAT_VER_1_2 0x12 /* Version 1.2 */ #define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ struct ioat_dma_descriptor { uint32_t size; diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h index 9832d7e..827cb50 100644 --- a/drivers/dma/ioatdma_registers.h +++ b/drivers/dma/ioatdma_registers.h @@ -25,6 +25,10 @@ #define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 #define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 + /* MMIO Device Registers */ #define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ @@ -149,7 +153,23 @@ #define IOAT_DCA_GREQID_VALID 0x20000000 #define IOAT_DCA_GREQID_LASTID 0x80000000 +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 + +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT3_CSI_CONTROL_OFFSET 0x0C +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 + +#define IOAT3_PCI_CONTROL_OFFSET 0x0E +#define IOAT3_PCI_CONTROL_MEMWR 0x1 + +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 +#define IOAT3_DCA_GREQID_OFFSET 0x02 #define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ #define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c index 0ec0f43..85bfeba 100644 --- a/drivers/dma/iop-adma.c +++ b/drivers/dma/iop-adma.c @@ -82,17 +82,24 @@ iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, struct device *dev = &iop_chan->device->pdev->dev; u32 len = unmap->unmap_len; - u32 src_cnt = unmap->unmap_src_cnt; - dma_addr_t addr = iop_desc_get_dest_addr(unmap, - iop_chan); - - dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); - while (src_cnt--) { - addr = iop_desc_get_src_addr(unmap, - iop_chan, - src_cnt); - dma_unmap_page(dev, addr, len, - DMA_TO_DEVICE); + enum dma_ctrl_flags flags = desc->async_tx.flags; + u32 src_cnt; + dma_addr_t addr; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + addr = iop_desc_get_dest_addr(unmap, iop_chan); + dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + src_cnt = unmap->unmap_src_cnt; + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, + iop_chan, + src_cnt); + dma_unmap_page(dev, addr, len, + DMA_TO_DEVICE); + } } desc->group_head = NULL; } @@ -366,8 +373,8 @@ retry: if (!retry++) goto retry; - /* try to free some slots if the allocation fails */ - tasklet_schedule(&iop_chan->irq_tasklet); + /* perform direct reclaim if the allocation fails */ + __iop_adma_slot_cleanup(iop_chan); return NULL; } @@ -443,8 +450,18 @@ iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); -/* returns the number of allocated descriptors */ -static int iop_adma_alloc_chan_resources(struct dma_chan *chan) +/** + * iop_adma_alloc_chan_resources - returns the number of allocated descriptors + * @chan - allocate descriptor resources for this channel + * @client - current client requesting the channel be ready for requests + * + * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To + * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be + * greater than 2x the number slots needed to satisfy a device->max_xor + * request. + * */ +static int iop_adma_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) { char *hw_desc; int idx; @@ -838,7 +855,7 @@ static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) dma_chan = container_of(device->common.channels.next, struct dma_chan, device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) { err = -ENODEV; goto out; } @@ -936,7 +953,7 @@ iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) dma_chan = container_of(device->common.channels.next, struct dma_chan, device_node); - if (iop_adma_alloc_chan_resources(dma_chan) < 1) { + if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) { err = -ENODEV; goto out; } @@ -1387,6 +1404,8 @@ static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) spin_unlock_bh(&iop_chan->lock); } +MODULE_ALIAS("platform:iop-adma"); + static struct platform_driver iop_adma_driver = { .probe = iop_adma_probe, .remove = iop_adma_remove, diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c new file mode 100644 index 0000000..a4e4494 --- /dev/null +++ b/drivers/dma/mv_xor.c @@ -0,0 +1,1375 @@ +/* + * offload engine driver for the Marvell XOR engine + * Copyright (C) 2007, 2008, Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/async_tx.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/memory.h> +#include <asm/plat-orion/mv_xor.h> +#include "mv_xor.h" + +static void mv_xor_issue_pending(struct dma_chan *chan); + +#define to_mv_xor_chan(chan) \ + container_of(chan, struct mv_xor_chan, common) + +#define to_mv_xor_device(dev) \ + container_of(dev, struct mv_xor_device, common) + +#define to_mv_xor_slot(tx) \ + container_of(tx, struct mv_xor_desc_slot, async_tx) + +static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + + hw_desc->status = (1 << 31); + hw_desc->phy_next_desc = 0; + hw_desc->desc_command = (1 << 31); +} + +static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + return hw_desc->phy_dest_addr; +} + +static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc, + int src_idx) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + return hw_desc->phy_src_addr[src_idx]; +} + + +static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc, + u32 byte_count) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->byte_count = byte_count; +} + +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; +} + +static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_next_desc = 0; +} + +static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val) +{ + desc->value = val; +} + +static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc, + dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_dest_addr = addr; +} + +static int mv_chan_memset_slot_count(size_t len) +{ + return 1; +} + +#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c) + +static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_src_addr[index] = addr; + if (desc->type == DMA_XOR) + hw_desc->desc_command |= (1 << index); +} + +static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan) +{ + return __raw_readl(XOR_CURR_DESC(chan)); +} + +static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan, + u32 next_desc_addr) +{ + __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan)); +} + +static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr) +{ + __raw_writel(desc_addr, XOR_DEST_POINTER(chan)); +} + +static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size) +{ + __raw_writel(block_size, XOR_BLOCK_SIZE(chan)); +} + +static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value) +{ + __raw_writel(value, XOR_INIT_VALUE_LOW(chan)); + __raw_writel(value, XOR_INIT_VALUE_HIGH(chan)); +} + +static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan) +{ + u32 val = __raw_readl(XOR_INTR_MASK(chan)); + val |= XOR_INTR_MASK_VALUE << (chan->idx * 16); + __raw_writel(val, XOR_INTR_MASK(chan)); +} + +static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) +{ + u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan)); + intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF; + return intr_cause; +} + +static int mv_is_err_intr(u32 intr_cause) +{ + if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9))) + return 1; + + return 0; +} + +static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) +{ + u32 val = (1 << (1 + (chan->idx * 16))); + dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val); + __raw_writel(val, XOR_INTR_CAUSE(chan)); +} + +static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan) +{ + u32 val = 0xFFFF0000 >> (chan->idx * 16); + __raw_writel(val, XOR_INTR_CAUSE(chan)); +} + +static int mv_can_chain(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc_slot *chain_old_tail = list_entry( + desc->chain_node.prev, struct mv_xor_desc_slot, chain_node); + + if (chain_old_tail->type != desc->type) + return 0; + if (desc->type == DMA_MEMSET) + return 0; + + return 1; +} + +static void mv_set_mode(struct mv_xor_chan *chan, + enum dma_transaction_type type) +{ + u32 op_mode; + u32 config = __raw_readl(XOR_CONFIG(chan)); + + switch (type) { + case DMA_XOR: + op_mode = XOR_OPERATION_MODE_XOR; + break; + case DMA_MEMCPY: + op_mode = XOR_OPERATION_MODE_MEMCPY; + break; + case DMA_MEMSET: + op_mode = XOR_OPERATION_MODE_MEMSET; + break; + default: + dev_printk(KERN_ERR, chan->device->common.dev, + "error: unsupported operation %d.\n", + type); + BUG(); + return; + } + + config &= ~0x7; + config |= op_mode; + __raw_writel(config, XOR_CONFIG(chan)); + chan->current_type = type; +} + +static void mv_chan_activate(struct mv_xor_chan *chan) +{ + u32 activation; + + dev_dbg(chan->device->common.dev, " activate chan.\n"); + activation = __raw_readl(XOR_ACTIVATION(chan)); + activation |= 0x1; + __raw_writel(activation, XOR_ACTIVATION(chan)); +} + +static char mv_chan_is_busy(struct mv_xor_chan *chan) +{ + u32 state = __raw_readl(XOR_ACTIVATION(chan)); + + state = (state >> 4) & 0x3; + + return (state == 1) ? 1 : 0; +} + +static int mv_chan_xor_slot_count(size_t len, int src_cnt) +{ + return 1; +} + +/** + * mv_xor_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_xor_free_slots(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *slot) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n", + __func__, __LINE__, slot); + + slot->slots_per_op = 0; + +} + +/* + * mv_xor_start_new_chain - program the engine to operate on new chain headed by + * sw_desc + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n", + __func__, __LINE__, sw_desc); + if (sw_desc->type != mv_chan->current_type) + mv_set_mode(mv_chan, sw_desc->type); + + if (sw_desc->type == DMA_MEMSET) { + /* for memset requests we need to program the engine, no + * descriptors used. + */ + struct mv_xor_desc *hw_desc = sw_desc->hw_desc; + mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr); + mv_chan_set_block_size(mv_chan, sw_desc->unmap_len); + mv_chan_set_value(mv_chan, sw_desc->value); + } else { + /* set the hardware chain */ + mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys); + } + mv_chan->pending += sw_desc->slot_cnt; + mv_xor_issue_pending(&mv_chan->common); +} + +static dma_cookie_t +mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + */ + if (desc->group_head && desc->unmap_len) { + struct mv_xor_desc_slot *unmap = desc->group_head; + struct device *dev = + &mv_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = desc->async_tx.flags; + u32 src_cnt; + dma_addr_t addr; + + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + addr = mv_desc_get_dest_addr(unmap); + dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + src_cnt = unmap->unmap_src_cnt; + while (src_cnt--) { + addr = mv_desc_get_src_addr(unmap, + src_cnt); + dma_unmap_page(dev, addr, len, + DMA_TO_DEVICE); + } + } + desc->group_head = NULL; + } + } + + /* run dependent operations */ + async_tx_run_dependencies(&desc->async_tx); + + return cookie; +} + +static int +mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__); + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + completed_node) { + + if (async_tx_test_ack(&iter->async_tx)) { + list_del(&iter->completed_node); + mv_xor_free_slots(mv_chan, iter); + } + } + return 0; +} + +static int +mv_xor_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n", + __func__, __LINE__, desc, desc->async_tx.flags); + list_del(&desc->chain_node); + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) { + /* move this slot to the completed_slots */ + list_add_tail(&desc->completed_node, &mv_chan->completed_slots); + return 0; + } + + mv_xor_free_slots(mv_chan, desc); + return 0; +} + +static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + dma_cookie_t cookie = 0; + int busy = mv_chan_is_busy(mv_chan); + u32 current_desc = mv_chan_get_current_desc(mv_chan); + int seen_current = 0; + + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__); + dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc); + mv_xor_clean_completed_slots(mv_chan); + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + chain_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy + */ + if (iter->async_tx.phys == current_desc) { + seen_current = 1; + if (busy) + break; + } + + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); + + if (mv_xor_clean_slot(iter, mv_chan)) + break; + } + + if ((busy == 0) && !list_empty(&mv_chan->chain)) { + struct mv_xor_desc_slot *chain_head; + chain_head = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + chain_node); + + mv_xor_start_new_chain(mv_chan, chain_head); + } + + if (cookie > 0) + mv_chan->completed_cookie = cookie; +} + +static void +mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + spin_lock_bh(&mv_chan->lock); + __mv_xor_slot_cleanup(mv_chan); + spin_unlock_bh(&mv_chan->lock); +} + +static void mv_xor_tasklet(unsigned long data) +{ + struct mv_xor_chan *chan = (struct mv_xor_chan *) data; + __mv_xor_slot_cleanup(chan); +} + +static struct mv_xor_desc_slot * +mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots, + int slots_per_op) +{ + struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL; + LIST_HEAD(chain); + int slots_found, retry = 0; + + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = mv_chan->last_used; + else + iter = list_entry(&mv_chan->all_slots, + struct mv_xor_desc_slot, + slot_node); + + list_for_each_entry_safe_continue( + iter, _iter, &mv_chan->all_slots, slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + /* give up after finding the first busy slot + * on the second pass through the list + */ + if (retry) + break; + + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) + alloc_start = iter; + + if (slots_found == num_slots) { + struct mv_xor_desc_slot *alloc_tail = NULL; + struct mv_xor_desc_slot *last_used = NULL; + iter = alloc_start; + while (num_slots) { + int i; + + /* pre-ack all but the last descriptor */ + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct mv_xor_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->async_tx.tx_list); + mv_chan->last_used = last_used; + mv_desc_clear_next_desc(alloc_start); + mv_desc_clear_next_desc(alloc_tail); + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&mv_chan->irq_tasklet); + + return NULL; +} + +static dma_cookie_t +mv_desc_assign_cookie(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *desc) +{ + dma_cookie_t cookie = mv_chan->common.cookie; + + if (++cookie < 0) + cookie = 1; + mv_chan->common.cookie = desc->async_tx.cookie = cookie; + return cookie; +} + +/************************ DMA engine API functions ****************************/ +static dma_cookie_t +mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx); + struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan); + struct mv_xor_desc_slot *grp_start, *old_chain_tail; + dma_cookie_t cookie; + int new_hw_chain = 1; + + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p: async_tx %p\n", + __func__, sw_desc, &sw_desc->async_tx); + + grp_start = sw_desc->group_head; + + spin_lock_bh(&mv_chan->lock); + cookie = mv_desc_assign_cookie(mv_chan, sw_desc); + + if (list_empty(&mv_chan->chain)) + list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); + else { + new_hw_chain = 0; + + old_chain_tail = list_entry(mv_chan->chain.prev, + struct mv_xor_desc_slot, + chain_node); + list_splice_init(&grp_start->async_tx.tx_list, + &old_chain_tail->chain_node); + + if (!mv_can_chain(grp_start)) + goto submit_done; + + dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n", + old_chain_tail->async_tx.phys); + + /* fix up the hardware chain */ + mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys); + + /* if the channel is not busy */ + if (!mv_chan_is_busy(mv_chan)) { + u32 current_desc = mv_chan_get_current_desc(mv_chan); + /* + * and the curren desc is the end of the chain before + * the append, then we need to start the channel + */ + if (current_desc == old_chain_tail->async_tx.phys) + new_hw_chain = 1; + } + } + + if (new_hw_chain) + mv_xor_start_new_chain(mv_chan, grp_start); + +submit_done: + spin_unlock_bh(&mv_chan->lock); + + return cookie; +} + +/* returns the number of allocated descriptors */ +static int mv_xor_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + char *hw_desc; + int idx; + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *slot = NULL; + struct mv_xor_platform_data *plat_data = + mv_chan->device->pdev->dev.platform_data; + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE; + + /* Allocate descriptor slots */ + idx = mv_chan->slots_allocated; + while (idx < num_descs_in_pool) { + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "MV XOR Channel only initialized" + " %d descriptor slots", idx); + break; + } + hw_desc = (char *) mv_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE]; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = mv_xor_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->async_tx.tx_list); + hw_desc = (char *) mv_chan->device->dma_desc_pool; + slot->async_tx.phys = + (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; + slot->idx = idx++; + + spin_lock_bh(&mv_chan->lock); + mv_chan->slots_allocated = idx; + list_add_tail(&slot->slot_node, &mv_chan->all_slots); + spin_unlock_bh(&mv_chan->lock); + } + + if (mv_chan->slots_allocated && !mv_chan->last_used) + mv_chan->last_used = list_entry(mv_chan->all_slots.next, + struct mv_xor_desc_slot, + slot_node); + + dev_dbg(mv_chan->device->common.dev, + "allocated %d descriptor slots last_used: %p\n", + mv_chan->slots_allocated, mv_chan->last_used); + + return mv_chan->slots_allocated ? : -ENOMEM; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + dev_dbg(mv_chan->device->common.dev, + "%s dest: %x src %x len: %u flags: %ld\n", + __func__, dest, src, len, flags); + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_memcpy_slot_count(len); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_MEMCPY; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + mv_desc_set_src_addr(grp_start, 0, src); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&mv_chan->lock); + + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p\n", + __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + dev_dbg(mv_chan->device->common.dev, + "%s dest: %x len: %u flags: %ld\n", + __func__, dest, len, flags); + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_memset_slot_count(len); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_MEMSET; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + mv_desc_set_block_fill_val(grp_start, value); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + + dev_dbg(mv_chan->device->common.dev, + "%s src_cnt: %d len: dest %x %u flags: %ld\n", + __func__, src_cnt, len, dest, flags); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_xor_slot_count(len, src_cnt); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_XOR; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + /* the byte count field is the same as in memcpy desc*/ + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + while (src_cnt--) + mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]); + } + spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void mv_xor_free_chan_resources(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *iter, *_iter; + int in_use_descs = 0; + + mv_xor_slot_cleanup(mv_chan); + + spin_lock_bh(&mv_chan->lock); + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + completed_node) { + in_use_descs++; + list_del(&iter->completed_node); + } + list_for_each_entry_safe_reverse( + iter, _iter, &mv_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + mv_chan->slots_allocated--; + } + mv_chan->last_used = NULL; + + dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n", + __func__, mv_chan->slots_allocated); + spin_unlock_bh(&mv_chan->lock); + + if (in_use_descs) + dev_err(mv_chan->device->common.dev, + "freeing %d in use descriptors!\n", in_use_descs); +} + +/** + * mv_xor_is_complete - poll the status of an XOR transaction + * @chan: XOR channel handle + * @cookie: XOR transaction identifier + */ +static enum dma_status mv_xor_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + last_used = chan->cookie; + last_complete = mv_chan->completed_cookie; + mv_chan->is_complete_cookie = cookie; + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) { + mv_xor_clean_completed_slots(mv_chan); + return ret; + } + mv_xor_slot_cleanup(mv_chan); + + last_used = chan->cookie; + last_complete = mv_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void mv_dump_xor_regs(struct mv_xor_chan *chan) +{ + u32 val; + + val = __raw_readl(XOR_CONFIG(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "config 0x%08x.\n", val); + + val = __raw_readl(XOR_ACTIVATION(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "activation 0x%08x.\n", val); + + val = __raw_readl(XOR_INTR_CAUSE(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "intr cause 0x%08x.\n", val); + + val = __raw_readl(XOR_INTR_MASK(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "intr mask 0x%08x.\n", val); + + val = __raw_readl(XOR_ERROR_CAUSE(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "error cause 0x%08x.\n", val); + + val = __raw_readl(XOR_ERROR_ADDR(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "error addr 0x%08x.\n", val); +} + +static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) +{ + if (intr_cause & (1 << 4)) { + dev_dbg(chan->device->common.dev, + "ignore this error\n"); + return; + } + + dev_printk(KERN_ERR, chan->device->common.dev, + "error on chan %d. intr cause 0x%08x.\n", + chan->idx, intr_cause); + + mv_dump_xor_regs(chan); + BUG(); +} + +static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) +{ + struct mv_xor_chan *chan = data; + u32 intr_cause = mv_chan_get_intr_cause(chan); + + dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause); + + if (mv_is_err_intr(intr_cause)) + mv_xor_err_interrupt_handler(chan, intr_cause); + + tasklet_schedule(&chan->irq_tasklet); + + mv_xor_device_clear_eoc_cause(chan); + + return IRQ_HANDLED; +} + +static void mv_xor_issue_pending(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + + if (mv_chan->pending >= MV_XOR_THRESHOLD) { + mv_chan->pending = 0; + mv_chan_activate(mv_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ +#define MV_XOR_TEST_SIZE 2000 + +static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device) +{ + int i; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + int err = 0; + struct mv_xor_chan *mv_chan; + + src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + + dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < MV_XOR_TEST_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) { + err = -ENODEV; + goto out; + } + + dest_dma = dma_map_single(dma_chan->device->dev, dest, + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + + src_dma = dma_map_single(dma_chan->device->dev, src, + MV_XOR_TEST_SIZE, DMA_TO_DEVICE); + + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma, + MV_XOR_TEST_SIZE, 0); + cookie = mv_xor_tx_submit(tx); + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(1); + + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_chan = to_mv_xor_chan(dma_chan); + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma, + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, MV_XOR_TEST_SIZE)) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + mv_xor_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ +static int __devinit +mv_xor_xor_self_test(struct mv_xor_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + int err = 0; + struct mv_xor_chan *mv_chan; + + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + + for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + + tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0); + + cookie = mv_xor_tx_submit(tx); + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_chan = to_mv_xor_chan(dma_chan); + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor failed compare, disabling." + " index %d, data %x, expected %x\n", i, + ptr[i], cmp_word); + err = -ENODEV; + goto free_resources; + } + } + +free_resources: + mv_xor_free_chan_resources(dma_chan); +out: + src_idx = MV_XOR_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devexit mv_xor_remove(struct platform_device *dev) +{ + struct mv_xor_device *device = platform_get_drvdata(dev); + struct dma_chan *chan, *_chan; + struct mv_xor_chan *mv_chan; + struct mv_xor_platform_data *plat_data = dev->dev.platform_data; + + dma_async_device_unregister(&device->common); + + dma_free_coherent(&dev->dev, plat_data->pool_size, + device->dma_desc_pool_virt, device->dma_desc_pool); + + list_for_each_entry_safe(chan, _chan, &device->common.channels, + device_node) { + mv_chan = to_mv_xor_chan(chan); + list_del(&chan->device_node); + } + + return 0; +} + +static int __devinit mv_xor_probe(struct platform_device *pdev) +{ + int ret = 0; + int irq; + struct mv_xor_device *adev; + struct mv_xor_chan *mv_chan; + struct dma_device *dma_dev; + struct mv_xor_platform_data *plat_data = pdev->dev.platform_data; + + + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + dma_dev = &adev->common; + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL); + if (!adev->dma_desc_pool_virt) + return -ENOMEM; + + adev->id = plat_data->hw_id; + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = plat_data->cap_mask; + adev->pdev = pdev; + platform_set_drvdata(pdev, adev); + + adev->shared = platform_get_drvdata(plat_data->shared); + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources; + dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; + dma_dev->device_is_tx_complete = mv_xor_is_complete; + dma_dev->device_issue_pending = mv_xor_issue_pending; + dma_dev->dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = 8; ; + dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; + } + + mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL); + if (!mv_chan) { + ret = -ENOMEM; + goto err_free_dma; + } + mv_chan->device = adev; + mv_chan->idx = plat_data->hw_id; + mv_chan->mmr_base = adev->shared->xor_base; + + if (!mv_chan->mmr_base) { + ret = -ENOMEM; + goto err_free_dma; + } + tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long) + mv_chan); + + /* clear errors before enabling interrupts */ + mv_xor_device_clear_err_status(mv_chan); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto err_free_dma; + } + ret = devm_request_irq(&pdev->dev, irq, + mv_xor_interrupt_handler, + 0, dev_name(&pdev->dev), mv_chan); + if (ret) + goto err_free_dma; + + mv_chan_unmask_interrupts(mv_chan); + + mv_set_mode(mv_chan, DMA_MEMCPY); + + spin_lock_init(&mv_chan->lock); + INIT_LIST_HEAD(&mv_chan->chain); + INIT_LIST_HEAD(&mv_chan->completed_slots); + INIT_LIST_HEAD(&mv_chan->all_slots); + INIT_RCU_HEAD(&mv_chan->common.rcu); + mv_chan->common.device = dma_dev; + + list_add_tail(&mv_chan->common.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = mv_xor_memcpy_self_test(adev); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_dma; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + ret = mv_xor_xor_self_test(adev); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_dma; + } + + dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: " + "( %s%s%s%s)\n", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + dma_async_device_register(dma_dev); + goto out; + + err_free_dma: + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + out: + return ret; +} + +static void +mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, + struct mbus_dram_target_info *dram) +{ + void __iomem *base = msp->xor_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + + for (i = 0; i < dram->num_cs; i++) { + struct mbus_dram_window *cs = dram->cs + i; + + writel((cs->base & 0xffff0000) | + (cs->mbus_attr << 8) | + dram->mbus_dram_target_id, base + WINDOW_BASE(i)); + writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i)); + + win_enable |= (1 << i); + win_enable |= 3 << (16 + (2 * i)); + } + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); +} + +static struct platform_driver mv_xor_driver = { + .probe = mv_xor_probe, + .remove = mv_xor_remove, + .driver = { + .owner = THIS_MODULE, + .name = MV_XOR_NAME, + }, +}; + +static int mv_xor_shared_probe(struct platform_device *pdev) +{ + struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data; + struct mv_xor_shared_private *msp; + struct resource *res; + + dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n"); + + msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL); + if (!msp) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + msp->xor_base = devm_ioremap(&pdev->dev, res->start, + res->end - res->start + 1); + if (!msp->xor_base) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return -ENODEV; + + msp->xor_high_base = devm_ioremap(&pdev->dev, res->start, + res->end - res->start + 1); + if (!msp->xor_high_base) + return -EBUSY; + + platform_set_drvdata(pdev, msp); + + /* + * (Re-)program MBUS remapping windows if we are asked to. + */ + if (msd != NULL && msd->dram != NULL) + mv_xor_conf_mbus_windows(msp, msd->dram); + + return 0; +} + +static int mv_xor_shared_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver mv_xor_shared_driver = { + .probe = mv_xor_shared_probe, + .remove = mv_xor_shared_remove, + .driver = { + .owner = THIS_MODULE, + .name = MV_XOR_SHARED_NAME, + }, +}; + + +static int __init mv_xor_init(void) +{ + int rc; + + rc = platform_driver_register(&mv_xor_shared_driver); + if (!rc) { + rc = platform_driver_register(&mv_xor_driver); + if (rc) + platform_driver_unregister(&mv_xor_shared_driver); + } + return rc; +} +module_init(mv_xor_init); + +/* it's currently unsafe to unload this module */ +#if 0 +static void __exit mv_xor_exit(void) +{ + platform_driver_unregister(&mv_xor_driver); + platform_driver_unregister(&mv_xor_shared_driver); + return; +} + +module_exit(mv_xor_exit); +#endif + +MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); +MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h new file mode 100644 index 0000000..06cafe1e --- /dev/null +++ b/drivers/dma/mv_xor.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2007, 2008, Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MV_XOR_H +#define MV_XOR_H + +#include <linux/types.h> +#include <linux/io.h> +#include <linux/dmaengine.h> +#include <linux/interrupt.h> + +#define USE_TIMER +#define MV_XOR_SLOT_SIZE 64 +#define MV_XOR_THRESHOLD 1 + +#define XOR_OPERATION_MODE_XOR 0 +#define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_MEMSET 4 + +#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4)) +#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4)) +#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4)) +#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4)) +#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4)) +#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0) +#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4) + +#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4)) +#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4)) +#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30) +#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40) +#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50) +#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60) +#define XOR_INTR_MASK_VALUE 0x3F5 + +#define WINDOW_BASE(w) (0x250 + ((w) << 2)) +#define WINDOW_SIZE(w) (0x270 + ((w) << 2)) +#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2)) +#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2)) + +struct mv_xor_shared_private { + void __iomem *xor_base; + void __iomem *xor_high_base; +}; + + +/** + * struct mv_xor_device - internal representation of a XOR device + * @pdev: Platform device + * @id: HW XOR Device selector + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @common: embedded struct dma_device + */ +struct mv_xor_device { + struct platform_device *pdev; + int id; + dma_addr_t dma_desc_pool; + void *dma_desc_pool_virt; + struct dma_device common; + struct mv_xor_shared_private *shared; +}; + +/** + * struct mv_xor_chan - internal representation of a XOR channel + * @pending: allows batching of hardware operations + * @completed_cookie: identifier for the most recently completed operation + * @lock: serializes enqueue/dequeue operations to the descriptors pool + * @mmr_base: memory mapped register base + * @idx: the index of the xor channel + * @chain: device chain view of the descriptors + * @completed_slots: slots completed by HW but still need to be acked + * @device: parent device + * @common: common dmaengine channel object members + * @last_used: place holder for allocation to continue from where it left off + * @all_slots: complete domain of slots usable by the channel + * @slots_allocated: records the actual size of the descriptor slot pool + * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + */ +struct mv_xor_chan { + int pending; + dma_cookie_t completed_cookie; + spinlock_t lock; /* protects the descriptor slot pool */ + void __iomem *mmr_base; + unsigned int idx; + enum dma_transaction_type current_type; + struct list_head chain; + struct list_head completed_slots; + struct mv_xor_device *device; + struct dma_chan common; + struct mv_xor_desc_slot *last_used; + struct list_head all_slots; + int slots_allocated; + struct tasklet_struct irq_tasklet; +#ifdef USE_TIMER + unsigned long cleanup_time; + u32 current_on_last_cleanup; + dma_cookie_t is_complete_cookie; +#endif +}; + +/** + * struct mv_xor_desc_slot - software descriptor + * @slot_node: node on the mv_xor_chan.all_slots list + * @chain_node: node on the mv_xor_chan.chain list + * @completed_node: node on the mv_xor_chan.completed_slots list + * @hw_desc: virtual address of the hardware descriptor chain + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @slot_cnt: total slots used in an transaction (group of operations) + * @slots_per_op: number of slots per operation + * @idx: pool index + * @unmap_src_cnt: number of xor sources + * @unmap_len: transaction bytecount + * @async_tx: support for the async_tx api + * @group_list: list of slots that make up a multi-descriptor transaction + * for example transfer lengths larger than the supported hw max + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct mv_xor_desc_slot { + struct list_head slot_node; + struct list_head chain_node; + struct list_head completed_node; + enum dma_transaction_type type; + void *hw_desc; + struct mv_xor_desc_slot *group_head; + u16 slot_cnt; + u16 slots_per_op; + u16 idx; + u16 unmap_src_cnt; + u32 value; + size_t unmap_len; + struct dma_async_tx_descriptor async_tx; + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +#ifdef USE_TIMER + unsigned long arrival_time; + struct timer_list timeout; +#endif +}; + +/* This structure describes XOR descriptor size 64bytes */ +struct mv_xor_desc { + u32 status; /* descriptor execution status */ + u32 crc32_result; /* result of CRC-32 calculation */ + u32 desc_command; /* type of operation to be carried out */ + u32 phy_next_desc; /* next descriptor address pointer */ + u32 byte_count; /* size of src/dst blocks in bytes */ + u32 phy_dest_addr; /* destination block address */ + u32 phy_src_addr[8]; /* source block addresses */ + u32 reserved0; + u32 reserved1; +}; + +#define to_mv_sw_desc(addr_hw_desc) \ + container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc) + +#define mv_hw_desc_slot_idx(hw_desc, idx) \ + ((void *)(((unsigned long)hw_desc) + ((idx) << 5))) + +#define MV_XOR_MIN_BYTE_COUNT (128) +#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1) +#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT + + +#endif diff --git a/include/asm-arm/arch-iop13xx/adma.h b/include/asm-arm/arch-iop13xx/adma.h index 90d14ee..ef4f5da 100644 --- a/include/asm-arm/arch-iop13xx/adma.h +++ b/include/asm-arm/arch-iop13xx/adma.h @@ -198,17 +198,13 @@ iop_chan_memset_slot_count(size_t len, int *slots_per_op) static inline int iop_chan_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) { - int num_slots; - /* slots_to_find = 1 for basic descriptor + 1 per 4 sources above 1 - * (1 source => 8 bytes) (1 slot => 32 bytes) - */ - num_slots = 1 + (((src_cnt - 1) << 3) >> 5); - if (((src_cnt - 1) << 3) & 0x1f) - num_slots++; - - *slots_per_op = num_slots; - - return num_slots; + static const char slot_count_table[] = { 1, 2, 2, 2, + 2, 3, 3, 3, + 3, 4, 4, 4, + 4, 5, 5, 5, + }; + *slots_per_op = slot_count_table[src_cnt - 1]; + return *slots_per_op; } #define ADMA_MAX_BYTE_COUNT (16 * 1024 * 1024) diff --git a/include/asm-arm/hardware/iop3xx-adma.h b/include/asm-arm/hardware/iop3xx-adma.h index a32b86a..af64676 100644 --- a/include/asm-arm/hardware/iop3xx-adma.h +++ b/include/asm-arm/hardware/iop3xx-adma.h @@ -260,7 +260,7 @@ static inline int iop_chan_memset_slot_count(size_t len, int *slots_per_op) static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, int *slots_per_op) { - static const int slot_count_table[] = { 0, + static const char slot_count_table[] = { 1, 1, 1, 1, /* 01 - 04 */ 2, 2, 2, 2, /* 05 - 08 */ 4, 4, 4, 4, /* 09 - 12 */ @@ -270,7 +270,7 @@ static inline int iop3xx_aau_xor_slot_count(size_t len, int src_cnt, 8, 8, 8, 8, /* 25 - 28 */ 8, 8, 8, 8, /* 29 - 32 */ }; - *slots_per_op = slot_count_table[src_cnt]; + *slots_per_op = slot_count_table[src_cnt - 1]; return *slots_per_op; } diff --git a/include/asm-arm/plat-orion/mv_xor.h b/include/asm-arm/plat-orion/mv_xor.h new file mode 100644 index 0000000..c349e8ff --- /dev/null +++ b/include/asm-arm/plat-orion/mv_xor.h @@ -0,0 +1,28 @@ +/* + * Marvell XOR platform device data definition file. + */ + +#ifndef __ASM_PLAT_ORION_MV_XOR_H +#define __ASM_PLAT_ORION_MV_XOR_H + +#include <linux/dmaengine.h> +#include <linux/mbus.h> + +#define MV_XOR_SHARED_NAME "mv_xor_shared" +#define MV_XOR_NAME "mv_xor" + +struct mbus_dram_target_info; + +struct mv_xor_platform_shared_data { + struct mbus_dram_target_info *dram; +}; + +struct mv_xor_platform_data { + struct platform_device *shared; + int hw_id; + dma_cap_mask_t cap_mask; + size_t pool_size; +}; + + +#endif diff --git a/include/asm-avr32/arch-at32ap/at32ap700x.h b/include/asm-avr32/arch-at32ap/at32ap700x.h index 31e48b0..d18a305 100644 --- a/include/asm-avr32/arch-at32ap/at32ap700x.h +++ b/include/asm-avr32/arch-at32ap/at32ap700x.h @@ -30,4 +30,20 @@ #define GPIO_PIN_PD(N) (GPIO_PIOD_BASE + (N)) #define GPIO_PIN_PE(N) (GPIO_PIOE_BASE + (N)) + +/* + * DMAC peripheral hardware handshaking interfaces, used with dw_dmac + */ +#define DMAC_MCI_RX 0 +#define DMAC_MCI_TX 1 +#define DMAC_DAC_TX 2 +#define DMAC_AC97_A_RX 3 +#define DMAC_AC97_A_TX 4 +#define DMAC_AC97_B_RX 5 +#define DMAC_AC97_B_TX 6 +#define DMAC_DMAREQ_0 7 +#define DMAC_DMAREQ_1 8 +#define DMAC_DMAREQ_2 9 +#define DMAC_DMAREQ_3 10 + #endif /* __ASM_ARCH_AT32AP700X_H__ */ diff --git a/include/linux/async_tx.h b/include/linux/async_tx.h index eb640f0..0f50d4c 100644 --- a/include/linux/async_tx.h +++ b/include/linux/async_tx.h @@ -101,21 +101,14 @@ async_tx_find_channel(struct dma_async_tx_descriptor *depend_tx, /** * async_tx_sync_epilog - actions to take if an operation is run synchronously - * @flags: async_tx flags - * @depend_tx: transaction depends on depend_tx * @cb_fn: function to call when the transaction completes * @cb_fn_param: parameter to pass to the callback routine */ static inline void -async_tx_sync_epilog(unsigned long flags, - struct dma_async_tx_descriptor *depend_tx, - dma_async_tx_callback cb_fn, void *cb_fn_param) +async_tx_sync_epilog(dma_async_tx_callback cb_fn, void *cb_fn_param) { if (cb_fn) cb_fn(cb_fn_param); - - if (depend_tx && (flags & ASYNC_TX_DEP_ACK)) - async_tx_ack(depend_tx); } void @@ -152,4 +145,6 @@ struct dma_async_tx_descriptor * async_trigger_callback(enum async_tx_flags flags, struct dma_async_tx_descriptor *depend_tx, dma_async_tx_callback cb_fn, void *cb_fn_param); + +void async_tx_quiesce(struct dma_async_tx_descriptor **tx); #endif /* _ASYNC_TX_H_ */ diff --git a/include/linux/dca.h b/include/linux/dca.h index af61cd1..b00a753 100644 --- a/include/linux/dca.h +++ b/include/linux/dca.h @@ -10,6 +10,7 @@ void dca_unregister_notify(struct notifier_block *nb); #define DCA_PROVIDER_REMOVE 0x0002 struct dca_provider { + struct list_head node; struct dca_ops *ops; struct device *cd; int id; @@ -18,7 +19,9 @@ struct dca_provider { struct dca_ops { int (*add_requester) (struct dca_provider *, struct device *); int (*remove_requester) (struct dca_provider *, struct device *); - u8 (*get_tag) (struct dca_provider *, int cpu); + u8 (*get_tag) (struct dca_provider *, struct device *, + int cpu); + int (*dev_managed) (struct dca_provider *, struct device *); }; struct dca_provider *alloc_dca_provider(struct dca_ops *ops, int priv_size); @@ -32,9 +35,11 @@ static inline void *dca_priv(struct dca_provider *dca) } /* Requester API */ +#define DCA_GET_TAG_TWO_ARGS int dca_add_requester(struct device *dev); int dca_remove_requester(struct device *dev); u8 dca_get_tag(int cpu); +u8 dca3_get_tag(struct device *dev, int cpu); /* internal stuff */ int __init dca_sysfs_init(void); diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index d08a5c5..adb0b08 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -89,10 +89,23 @@ enum dma_transaction_type { DMA_MEMSET, DMA_MEMCPY_CRC32C, DMA_INTERRUPT, + DMA_SLAVE, }; /* last transaction type for creation of the capabilities mask */ -#define DMA_TX_TYPE_END (DMA_INTERRUPT + 1) +#define DMA_TX_TYPE_END (DMA_SLAVE + 1) + +/** + * enum dma_slave_width - DMA slave register access width. + * @DMA_SLAVE_WIDTH_8BIT: Do 8-bit slave register accesses + * @DMA_SLAVE_WIDTH_16BIT: Do 16-bit slave register accesses + * @DMA_SLAVE_WIDTH_32BIT: Do 32-bit slave register accesses + */ +enum dma_slave_width { + DMA_SLAVE_WIDTH_8BIT, + DMA_SLAVE_WIDTH_16BIT, + DMA_SLAVE_WIDTH_32BIT, +}; /** * enum dma_ctrl_flags - DMA flags to augment operation preparation, @@ -102,10 +115,14 @@ enum dma_transaction_type { * @DMA_CTRL_ACK - the descriptor cannot be reused until the client * acknowledges receipt, i.e. has has a chance to establish any * dependency chains + * @DMA_COMPL_SKIP_SRC_UNMAP - set to disable dma-unmapping the source buffer(s) + * @DMA_COMPL_SKIP_DEST_UNMAP - set to disable dma-unmapping the destination(s) */ enum dma_ctrl_flags { DMA_PREP_INTERRUPT = (1 << 0), DMA_CTRL_ACK = (1 << 1), + DMA_COMPL_SKIP_SRC_UNMAP = (1 << 2), + DMA_COMPL_SKIP_DEST_UNMAP = (1 << 3), }; /** @@ -115,6 +132,32 @@ enum dma_ctrl_flags { typedef struct { DECLARE_BITMAP(bits, DMA_TX_TYPE_END); } dma_cap_mask_t; /** + * struct dma_slave - Information about a DMA slave + * @dev: device acting as DMA slave + * @dma_dev: required DMA master device. If non-NULL, the client can not be + * bound to other masters than this. + * @tx_reg: physical address of data register used for + * memory-to-peripheral transfers + * @rx_reg: physical address of data register used for + * peripheral-to-memory transfers + * @reg_width: peripheral register width + * + * If dma_dev is non-NULL, the client can not be bound to other DMA + * masters than the one corresponding to this device. The DMA master + * driver may use this to determine if there is controller-specific + * data wrapped around this struct. Drivers of platform code that sets + * the dma_dev field must therefore make sure to use an appropriate + * controller-specific dma slave structure wrapping this struct. + */ +struct dma_slave { + struct device *dev; + struct device *dma_dev; + dma_addr_t tx_reg; + dma_addr_t rx_reg; + enum dma_slave_width reg_width; +}; + +/** * struct dma_chan_percpu - the per-CPU part of struct dma_chan * @refcount: local_t used for open-coded "bigref" counting * @memcpy_count: transaction counter @@ -139,6 +182,7 @@ struct dma_chan_percpu { * @rcu: the DMA channel's RCU head * @device_node: used to add this to the device chan list * @local: per-cpu pointer to a struct dma_chan_percpu + * @client-count: how many clients are using this channel */ struct dma_chan { struct dma_device *device; @@ -154,6 +198,7 @@ struct dma_chan { struct list_head device_node; struct dma_chan_percpu *local; + int client_count; }; #define to_dma_chan(p) container_of(p, struct dma_chan, dev) @@ -202,11 +247,14 @@ typedef enum dma_state_client (*dma_event_callback) (struct dma_client *client, * @event_callback: func ptr to call when something happens * @cap_mask: only return channels that satisfy the requested capabilities * a value of zero corresponds to any capability + * @slave: data for preparing slave transfer. Must be non-NULL iff the + * DMA_SLAVE capability is requested. * @global_node: list_head for global dma_client_list */ struct dma_client { dma_event_callback event_callback; dma_cap_mask_t cap_mask; + struct dma_slave *slave; struct list_head global_node; }; @@ -263,6 +311,8 @@ struct dma_async_tx_descriptor { * @device_prep_dma_zero_sum: prepares a zero_sum operation * @device_prep_dma_memset: prepares a memset operation * @device_prep_dma_interrupt: prepares an end of chain interrupt operation + * @device_prep_slave_sg: prepares a slave dma operation + * @device_terminate_all: terminate all pending operations * @device_issue_pending: push pending transactions to hardware */ struct dma_device { @@ -279,7 +329,8 @@ struct dma_device { int dev_id; struct device *dev; - int (*device_alloc_chan_resources)(struct dma_chan *chan); + int (*device_alloc_chan_resources)(struct dma_chan *chan, + struct dma_client *client); void (*device_free_chan_resources)(struct dma_chan *chan); struct dma_async_tx_descriptor *(*device_prep_dma_memcpy)( @@ -297,6 +348,12 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_dma_interrupt)( struct dma_chan *chan, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_slave_sg)( + struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags); + void (*device_terminate_all)(struct dma_chan *chan); + enum dma_status (*device_is_tx_complete)(struct dma_chan *chan, dma_cookie_t cookie, dma_cookie_t *last, dma_cookie_t *used); @@ -318,16 +375,14 @@ dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan, void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, struct dma_chan *chan); -static inline void -async_tx_ack(struct dma_async_tx_descriptor *tx) +static inline void async_tx_ack(struct dma_async_tx_descriptor *tx) { tx->flags |= DMA_CTRL_ACK; } -static inline int -async_tx_test_ack(struct dma_async_tx_descriptor *tx) +static inline bool async_tx_test_ack(struct dma_async_tx_descriptor *tx) { - return tx->flags & DMA_CTRL_ACK; + return (tx->flags & DMA_CTRL_ACK) == DMA_CTRL_ACK; } #define first_dma_cap(mask) __first_dma_cap(&(mask)) diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h new file mode 100644 index 0000000..04d217b --- /dev/null +++ b/include/linux/dw_dmac.h @@ -0,0 +1,62 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on + * AVR32 systems.) + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef DW_DMAC_H +#define DW_DMAC_H + +#include <linux/dmaengine.h> + +/** + * struct dw_dma_platform_data - Controller configuration parameters + * @nr_channels: Number of channels supported by hardware (max 8) + */ +struct dw_dma_platform_data { + unsigned int nr_channels; +}; + +/** + * struct dw_dma_slave - Controller-specific information about a slave + * @slave: Generic information about the slave + * @ctl_lo: Platform-specific initializer for the CTL_LO register + * @cfg_hi: Platform-specific initializer for the CFG_HI register + * @cfg_lo: Platform-specific initializer for the CFG_LO register + */ +struct dw_dma_slave { + struct dma_slave slave; + u32 cfg_hi; + u32 cfg_lo; +}; + +/* Platform-configurable bits in CFG_HI */ +#define DWC_CFGH_FCMODE (1 << 0) +#define DWC_CFGH_FIFO_MODE (1 << 1) +#define DWC_CFGH_PROTCTL(x) ((x) << 2) +#define DWC_CFGH_SRC_PER(x) ((x) << 7) +#define DWC_CFGH_DST_PER(x) ((x) << 11) + +/* Platform-configurable bits in CFG_LO */ +#define DWC_CFGL_PRIO(x) ((x) << 5) /* priority */ +#define DWC_CFGL_LOCK_CH_XFER (0 << 12) /* scope of LOCK_CH */ +#define DWC_CFGL_LOCK_CH_BLOCK (1 << 12) +#define DWC_CFGL_LOCK_CH_XACT (2 << 12) +#define DWC_CFGL_LOCK_BUS_XFER (0 << 14) /* scope of LOCK_BUS */ +#define DWC_CFGL_LOCK_BUS_BLOCK (1 << 14) +#define DWC_CFGL_LOCK_BUS_XACT (2 << 14) +#define DWC_CFGL_LOCK_CH (1 << 15) /* channel lockout */ +#define DWC_CFGL_LOCK_BUS (1 << 16) /* busmaster lockout */ +#define DWC_CFGL_HS_DST_POL (1 << 18) /* dst handshake active low */ +#define DWC_CFGL_HS_SRC_POL (1 << 19) /* src handshake active low */ + +static inline struct dw_dma_slave *to_dw_dma_slave(struct dma_slave *slave) +{ + return container_of(slave, struct dw_dma_slave, slave); +} + +#endif /* DW_DMAC_H */ diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index d8507eb..119ae7b 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2371,6 +2371,14 @@ #define PCI_DEVICE_ID_INTEL_ICH9_7 0x2916 #define PCI_DEVICE_ID_INTEL_ICH9_8 0x2918 #define PCI_DEVICE_ID_INTEL_82855PM_HB 0x3340 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG4 0x3429 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG5 0x342a +#define PCI_DEVICE_ID_INTEL_IOAT_TBG6 0x342b +#define PCI_DEVICE_ID_INTEL_IOAT_TBG7 0x342c +#define PCI_DEVICE_ID_INTEL_IOAT_TBG0 0x3430 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG1 0x3431 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG2 0x3432 +#define PCI_DEVICE_ID_INTEL_IOAT_TBG3 0x3433 #define PCI_DEVICE_ID_INTEL_82830_HB 0x3575 #define PCI_DEVICE_ID_INTEL_82830_CGC 0x3577 #define PCI_DEVICE_ID_INTEL_82855GM_HB 0x3580 diff --git a/net/core/user_dma.c b/net/core/user_dma.c index c77aff9..8c6b706 100644 --- a/net/core/user_dma.c +++ b/net/core/user_dma.c @@ -34,6 +34,7 @@ #define NET_DMA_DEFAULT_COPYBREAK 4096 int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK; +EXPORT_SYMBOL(sysctl_tcp_dma_copybreak); /** * dma_skb_copy_datagram_iovec - Copy a datagram to an iovec. |