diff options
Diffstat (limited to 'drivers/dma')
-rw-r--r-- | drivers/dma/Kconfig | 91 | ||||
-rw-r--r-- | drivers/dma/Makefile | 9 | ||||
-rw-r--r-- | drivers/dma/dmaengine.c | 635 | ||||
-rw-r--r-- | drivers/dma/dmatest.c | 449 | ||||
-rw-r--r-- | drivers/dma/dw_dmac.c | 1122 | ||||
-rw-r--r-- | drivers/dma/dw_dmac_regs.h | 225 | ||||
-rw-r--r-- | drivers/dma/fsldma.c | 1036 | ||||
-rw-r--r-- | drivers/dma/fsldma.h | 198 | ||||
-rw-r--r-- | drivers/dma/ioat.c | 226 | ||||
-rw-r--r-- | drivers/dma/ioat_dca.c | 657 | ||||
-rw-r--r-- | drivers/dma/ioat_dma.c | 1723 | ||||
-rw-r--r-- | drivers/dma/ioatdma.h | 163 | ||||
-rw-r--r-- | drivers/dma/ioatdma_hw.h | 70 | ||||
-rw-r--r-- | drivers/dma/ioatdma_registers.h | 226 | ||||
-rw-r--r-- | drivers/dma/iop-adma.c | 1448 | ||||
-rw-r--r-- | drivers/dma/iovlock.c | 269 | ||||
-rw-r--r-- | drivers/dma/mv_xor.c | 1384 | ||||
-rw-r--r-- | drivers/dma/mv_xor.h | 183 |
18 files changed, 10114 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig new file mode 100644 index 0000000..904e575 --- /dev/null +++ b/drivers/dma/Kconfig @@ -0,0 +1,91 @@ +# +# DMA engine configuration +# + +menuconfig DMADEVICES + bool "DMA Engine support" + depends on !HIGHMEM64G && HAS_DMA + help + DMA engines can do asynchronous data transfers without + involving the host CPU. Currently, this framework can be + used to offload memory copies in the network stack and + RAID operations in the MD driver. This menu only presents + DMA Device drivers supported by the configured arch, it may + be empty in some cases. + +if DMADEVICES + +comment "DMA Devices" + +config INTEL_IOATDMA + tristate "Intel I/OAT DMA support" + depends on PCI && X86 + select DMA_ENGINE + select DCA + help + Enable support for the Intel(R) I/OAT DMA engine present + in recent Intel Xeon chipsets. + + Say Y here if you have such a chipset. + + If unsure, say N. + +config INTEL_IOP_ADMA + tristate "Intel IOP ADMA support" + depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX + select ASYNC_CORE + select DMA_ENGINE + help + Enable support for the Intel(R) IOP Series RAID engines. + +config DW_DMAC + tristate "Synopsys DesignWare AHB DMA support" + depends on AVR32 + select DMA_ENGINE + default y if CPU_AT32AP7000 + help + Support the Synopsys DesignWare AHB DMA controller. This + can be integrated in chips such as the Atmel AT32ap7000. + +config FSL_DMA + tristate "Freescale Elo and Elo Plus DMA support" + depends on FSL_SOC + select DMA_ENGINE + ---help--- + Enable support for the Freescale Elo and Elo Plus DMA controllers. + The Elo is the DMA controller on some 82xx and 83xx parts, and the + Elo Plus is the DMA controller on 85xx and 86xx parts. + +config MV_XOR + bool "Marvell XOR engine support" + depends on PLAT_ORION + select ASYNC_CORE + select DMA_ENGINE + ---help--- + Enable support for the Marvell XOR engine. + +config DMA_ENGINE + bool + +comment "DMA Clients" + depends on DMA_ENGINE + +config NET_DMA + bool "Network: TCP receive copy offload" + depends on DMA_ENGINE && NET + default (INTEL_IOATDMA || FSL_DMA) + help + This enables the use of DMA engines in the network stack to + offload receive copy-to-user operations, freeing CPU cycles. + + Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise + say N. + +config DMATEST + tristate "DMA Test client" + depends on DMA_ENGINE + help + Simple DMA test client. Say N unless you're debugging a + DMA Device driver. + +endif diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile new file mode 100644 index 0000000..14f5952 --- /dev/null +++ b/drivers/dma/Makefile @@ -0,0 +1,9 @@ +obj-$(CONFIG_DMA_ENGINE) += dmaengine.o +obj-$(CONFIG_NET_DMA) += iovlock.o +obj-$(CONFIG_DMATEST) += dmatest.o +obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o +ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o +obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o +obj-$(CONFIG_FSL_DMA) += fsldma.o +obj-$(CONFIG_MV_XOR) += mv_xor.o +obj-$(CONFIG_DW_DMAC) += dw_dmac.o diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c new file mode 100644 index 0000000..6579965 --- /dev/null +++ b/drivers/dma/dmaengine.c @@ -0,0 +1,635 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code implements the DMA subsystem. It provides a HW-neutral interface + * for other kernel code to use asynchronous memory copy capabilities, + * if present, and allows different HW DMA drivers to register as providing + * this capability. + * + * Due to the fact we are accelerating what is already a relatively fast + * operation, the code goes to great lengths to avoid additional overhead, + * such as locking. + * + * LOCKING: + * + * The subsystem keeps two global lists, dma_device_list and dma_client_list. + * Both of these are protected by a mutex, dma_list_mutex. + * + * Each device has a channels list, which runs unlocked but is never modified + * once the device is registered, it's just setup by the driver. + * + * Each client is responsible for keeping track of the channels it uses. See + * the definition of dma_event_callback in dmaengine.h. + * + * Each device has a kref, which is initialized to 1 when the device is + * registered. A kref_get is done for each device registered. When the + * device is released, the corresponding kref_put is done in the release + * method. Every time one of the device's channels is allocated to a client, + * a kref_get occurs. When the channel is freed, the corresponding kref_put + * happens. The device's release function does a completion, so + * unregister_device does a remove event, device_unregister, a kref_put + * for the first reference, then waits on the completion for all other + * references to finish. + * + * Each channel has an open-coded implementation of Rusty Russell's "bigref," + * with a kref and a per_cpu local_t. A dma_chan_get is called when a client + * signals that it wants to use a channel, and dma_chan_put is called when + * a channel is removed or a client using it is unregistered. A client can + * take extra references per outstanding transaction, as is the case with + * the NET DMA client. The release function does a kref_put on the device. + * -ChrisL, DanW + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/device.h> +#include <linux/dmaengine.h> +#include <linux/hardirq.h> +#include <linux/spinlock.h> +#include <linux/percpu.h> +#include <linux/rcupdate.h> +#include <linux/mutex.h> +#include <linux/jiffies.h> + +static DEFINE_MUTEX(dma_list_mutex); +static LIST_HEAD(dma_device_list); +static LIST_HEAD(dma_client_list); + +/* --- sysfs implementation --- */ + +static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dma_chan *chan = to_dma_chan(dev); + unsigned long count = 0; + int i; + + for_each_possible_cpu(i) + count += per_cpu_ptr(chan->local, i)->memcpy_count; + + return sprintf(buf, "%lu\n", count); +} + +static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct dma_chan *chan = to_dma_chan(dev); + unsigned long count = 0; + int i; + + for_each_possible_cpu(i) + count += per_cpu_ptr(chan->local, i)->bytes_transferred; + + return sprintf(buf, "%lu\n", count); +} + +static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct dma_chan *chan = to_dma_chan(dev); + int in_use = 0; + + if (unlikely(chan->slow_ref) && + atomic_read(&chan->refcount.refcount) > 1) + in_use = 1; + else { + if (local_read(&(per_cpu_ptr(chan->local, + get_cpu())->refcount)) > 0) + in_use = 1; + put_cpu(); + } + + return sprintf(buf, "%d\n", in_use); +} + +static struct device_attribute dma_attrs[] = { + __ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL), + __ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL), + __ATTR(in_use, S_IRUGO, show_in_use, NULL), + __ATTR_NULL +}; + +static void dma_async_device_cleanup(struct kref *kref); + +static void dma_dev_release(struct device *dev) +{ + struct dma_chan *chan = to_dma_chan(dev); + kref_put(&chan->device->refcount, dma_async_device_cleanup); +} + +static struct class dma_devclass = { + .name = "dma", + .dev_attrs = dma_attrs, + .dev_release = dma_dev_release, +}; + +/* --- client and device registration --- */ + +#define dma_chan_satisfies_mask(chan, mask) \ + __dma_chan_satisfies_mask((chan), &(mask)) +static int +__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want) +{ + dma_cap_mask_t has; + + bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits, + DMA_TX_TYPE_END); + return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END); +} + +/** + * dma_client_chan_alloc - try to allocate channels to a client + * @client: &dma_client + * + * Called with dma_list_mutex held. + */ +static void dma_client_chan_alloc(struct dma_client *client) +{ + struct dma_device *device; + struct dma_chan *chan; + int desc; /* allocated descriptor count */ + enum dma_state_client ack; + + /* Find a channel */ + list_for_each_entry(device, &dma_device_list, global_node) { + /* Does the client require a specific DMA controller? */ + if (client->slave && client->slave->dma_dev + && client->slave->dma_dev != device->dev) + continue; + + list_for_each_entry(chan, &device->channels, device_node) { + if (!dma_chan_satisfies_mask(chan, client->cap_mask)) + continue; + + desc = chan->device->device_alloc_chan_resources( + chan, client); + if (desc >= 0) { + ack = client->event_callback(client, + chan, + DMA_RESOURCE_AVAILABLE); + + /* we are done once this client rejects + * an available resource + */ + if (ack == DMA_ACK) { + dma_chan_get(chan); + chan->client_count++; + } else if (ack == DMA_NAK) + return; + } + } + } +} + +enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie) +{ + enum dma_status status; + unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000); + + dma_async_issue_pending(chan); + do { + status = dma_async_is_tx_complete(chan, cookie, NULL, NULL); + if (time_after_eq(jiffies, dma_sync_wait_timeout)) { + printk(KERN_ERR "dma_sync_wait_timeout!\n"); + return DMA_ERROR; + } + } while (status == DMA_IN_PROGRESS); + + return status; +} +EXPORT_SYMBOL(dma_sync_wait); + +/** + * dma_chan_cleanup - release a DMA channel's resources + * @kref: kernel reference structure that contains the DMA channel device + */ +void dma_chan_cleanup(struct kref *kref) +{ + struct dma_chan *chan = container_of(kref, struct dma_chan, refcount); + chan->device->device_free_chan_resources(chan); + kref_put(&chan->device->refcount, dma_async_device_cleanup); +} +EXPORT_SYMBOL(dma_chan_cleanup); + +static void dma_chan_free_rcu(struct rcu_head *rcu) +{ + struct dma_chan *chan = container_of(rcu, struct dma_chan, rcu); + int bias = 0x7FFFFFFF; + int i; + for_each_possible_cpu(i) + bias -= local_read(&per_cpu_ptr(chan->local, i)->refcount); + atomic_sub(bias, &chan->refcount.refcount); + kref_put(&chan->refcount, dma_chan_cleanup); +} + +static void dma_chan_release(struct dma_chan *chan) +{ + atomic_add(0x7FFFFFFF, &chan->refcount.refcount); + chan->slow_ref = 1; + call_rcu(&chan->rcu, dma_chan_free_rcu); +} + +/** + * dma_chans_notify_available - broadcast available channels to the clients + */ +static void dma_clients_notify_available(void) +{ + struct dma_client *client; + + mutex_lock(&dma_list_mutex); + + list_for_each_entry(client, &dma_client_list, global_node) + dma_client_chan_alloc(client); + + mutex_unlock(&dma_list_mutex); +} + +/** + * dma_chans_notify_available - tell the clients that a channel is going away + * @chan: channel on its way out + */ +static void dma_clients_notify_removed(struct dma_chan *chan) +{ + struct dma_client *client; + enum dma_state_client ack; + + mutex_lock(&dma_list_mutex); + + list_for_each_entry(client, &dma_client_list, global_node) { + ack = client->event_callback(client, chan, + DMA_RESOURCE_REMOVED); + + /* client was holding resources for this channel so + * free it + */ + if (ack == DMA_ACK) { + dma_chan_put(chan); + chan->client_count--; + } + } + + mutex_unlock(&dma_list_mutex); +} + +/** + * dma_async_client_register - register a &dma_client + * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask' + */ +void dma_async_client_register(struct dma_client *client) +{ + /* validate client data */ + BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) && + !client->slave); + + mutex_lock(&dma_list_mutex); + list_add_tail(&client->global_node, &dma_client_list); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dma_async_client_register); + +/** + * dma_async_client_unregister - unregister a client and free the &dma_client + * @client: &dma_client to free + * + * Force frees any allocated DMA channels, frees the &dma_client memory + */ +void dma_async_client_unregister(struct dma_client *client) +{ + struct dma_device *device; + struct dma_chan *chan; + enum dma_state_client ack; + + if (!client) + return; + + mutex_lock(&dma_list_mutex); + /* free all channels the client is holding */ + list_for_each_entry(device, &dma_device_list, global_node) + list_for_each_entry(chan, &device->channels, device_node) { + ack = client->event_callback(client, chan, + DMA_RESOURCE_REMOVED); + + if (ack == DMA_ACK) { + dma_chan_put(chan); + chan->client_count--; + } + } + + list_del(&client->global_node); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dma_async_client_unregister); + +/** + * dma_async_client_chan_request - send all available channels to the + * client that satisfy the capability mask + * @client - requester + */ +void dma_async_client_chan_request(struct dma_client *client) +{ + mutex_lock(&dma_list_mutex); + dma_client_chan_alloc(client); + mutex_unlock(&dma_list_mutex); +} +EXPORT_SYMBOL(dma_async_client_chan_request); + +/** + * dma_async_device_register - registers DMA devices found + * @device: &dma_device + */ +int dma_async_device_register(struct dma_device *device) +{ + static int id; + int chancnt = 0, rc; + struct dma_chan* chan; + + if (!device) + return -ENODEV; + + /* validate device routines */ + BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) && + !device->device_prep_dma_memcpy); + BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) && + !device->device_prep_dma_xor); + BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) && + !device->device_prep_dma_zero_sum); + BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) && + !device->device_prep_dma_memset); + BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) && + !device->device_prep_dma_interrupt); + BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && + !device->device_prep_slave_sg); + BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) && + !device->device_terminate_all); + + BUG_ON(!device->device_alloc_chan_resources); + BUG_ON(!device->device_free_chan_resources); + BUG_ON(!device->device_is_tx_complete); + BUG_ON(!device->device_issue_pending); + BUG_ON(!device->dev); + + init_completion(&device->done); + kref_init(&device->refcount); + + mutex_lock(&dma_list_mutex); + device->dev_id = id++; + mutex_unlock(&dma_list_mutex); + + /* represent channels in sysfs. Probably want devs too */ + list_for_each_entry(chan, &device->channels, device_node) { + chan->local = alloc_percpu(typeof(*chan->local)); + if (chan->local == NULL) + continue; + + chan->chan_id = chancnt++; + chan->dev.class = &dma_devclass; + chan->dev.parent = device->dev; + dev_set_name(&chan->dev, "dma%dchan%d", + device->dev_id, chan->chan_id); + + rc = device_register(&chan->dev); + if (rc) { + chancnt--; + free_percpu(chan->local); + chan->local = NULL; + goto err_out; + } + + /* One for the channel, one of the class device */ + kref_get(&device->refcount); + kref_get(&device->refcount); + kref_init(&chan->refcount); + chan->client_count = 0; + chan->slow_ref = 0; + INIT_RCU_HEAD(&chan->rcu); + } + + mutex_lock(&dma_list_mutex); + list_add_tail(&device->global_node, &dma_device_list); + mutex_unlock(&dma_list_mutex); + + dma_clients_notify_available(); + + return 0; + +err_out: + list_for_each_entry(chan, &device->channels, device_node) { + if (chan->local == NULL) + continue; + kref_put(&device->refcount, dma_async_device_cleanup); + device_unregister(&chan->dev); + chancnt--; + free_percpu(chan->local); + } + return rc; +} +EXPORT_SYMBOL(dma_async_device_register); + +/** + * dma_async_device_cleanup - function called when all references are released + * @kref: kernel reference object + */ +static void dma_async_device_cleanup(struct kref *kref) +{ + struct dma_device *device; + + device = container_of(kref, struct dma_device, refcount); + complete(&device->done); +} + +/** + * dma_async_device_unregister - unregisters DMA devices + * @device: &dma_device + */ +void dma_async_device_unregister(struct dma_device *device) +{ + struct dma_chan *chan; + + mutex_lock(&dma_list_mutex); + list_del(&device->global_node); + mutex_unlock(&dma_list_mutex); + + list_for_each_entry(chan, &device->channels, device_node) { + dma_clients_notify_removed(chan); + device_unregister(&chan->dev); + dma_chan_release(chan); + } + + kref_put(&device->refcount, dma_async_device_cleanup); + wait_for_completion(&device->done); +} +EXPORT_SYMBOL(dma_async_device_unregister); + +/** + * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses + * @chan: DMA channel to offload copy to + * @dest: destination address (virtual) + * @src: source address (virtual) + * @len: length + * + * Both @dest and @src must be mappable to a bus address according to the + * DMA mapping API rules for streaming mappings. + * Both @dest and @src must stay memory resident (kernel memory or locked + * user space pages). + */ +dma_cookie_t +dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest, + void *src, size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int cpu; + + dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE); + dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE); + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, + DMA_CTRL_ACK); + + if (!tx) { + dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); + dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + return -ENOMEM; + } + + tx->callback = NULL; + cookie = tx->tx_submit(tx); + + cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf); + +/** + * dma_async_memcpy_buf_to_pg - offloaded copy from address to page + * @chan: DMA channel to offload copy to + * @page: destination page + * @offset: offset in page to copy to + * @kdata: source address (virtual) + * @len: length + * + * Both @page/@offset and @kdata must be mappable to a bus address according + * to the DMA mapping API rules for streaming mappings. + * Both @page/@offset and @kdata must stay memory resident (kernel memory or + * locked user space pages) + */ +dma_cookie_t +dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page, + unsigned int offset, void *kdata, size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int cpu; + + dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE); + dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE); + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, + DMA_CTRL_ACK); + + if (!tx) { + dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE); + dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + return -ENOMEM; + } + + tx->callback = NULL; + cookie = tx->tx_submit(tx); + + cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg); + +/** + * dma_async_memcpy_pg_to_pg - offloaded copy from page to page + * @chan: DMA channel to offload copy to + * @dest_pg: destination page + * @dest_off: offset in page to copy to + * @src_pg: source page + * @src_off: offset in page to copy from + * @len: length + * + * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus + * address according to the DMA mapping API rules for streaming mappings. + * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident + * (kernel memory or locked user space pages). + */ +dma_cookie_t +dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg, + unsigned int dest_off, struct page *src_pg, unsigned int src_off, + size_t len) +{ + struct dma_device *dev = chan->device; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int cpu; + + dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE); + dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len, + DMA_FROM_DEVICE); + tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len, + DMA_CTRL_ACK); + + if (!tx) { + dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE); + dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE); + return -ENOMEM; + } + + tx->callback = NULL; + cookie = tx->tx_submit(tx); + + cpu = get_cpu(); + per_cpu_ptr(chan->local, cpu)->bytes_transferred += len; + per_cpu_ptr(chan->local, cpu)->memcpy_count++; + put_cpu(); + + return cookie; +} +EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg); + +void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx, + struct dma_chan *chan) +{ + tx->chan = chan; + spin_lock_init(&tx->lock); +} +EXPORT_SYMBOL(dma_async_tx_descriptor_init); + +static int __init dma_bus_init(void) +{ + mutex_init(&dma_list_mutex); + return class_register(&dma_devclass); +} +subsys_initcall(dma_bus_init); + diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c new file mode 100644 index 0000000..ed9636b --- /dev/null +++ b/drivers/dma/dmatest.c @@ -0,0 +1,449 @@ +/* + * DMA Engine test module + * + * Copyright (C) 2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/init.h> +#include <linux/kthread.h> +#include <linux/module.h> +#include <linux/moduleparam.h> +#include <linux/random.h> +#include <linux/wait.h> + +static unsigned int test_buf_size = 16384; +module_param(test_buf_size, uint, S_IRUGO); +MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer"); + +static char test_channel[20]; +module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO); +MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)"); + +static char test_device[20]; +module_param_string(device, test_device, sizeof(test_device), S_IRUGO); +MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)"); + +static unsigned int threads_per_chan = 1; +module_param(threads_per_chan, uint, S_IRUGO); +MODULE_PARM_DESC(threads_per_chan, + "Number of threads to start per channel (default: 1)"); + +static unsigned int max_channels; +module_param(max_channels, uint, S_IRUGO); +MODULE_PARM_DESC(nr_channels, + "Maximum number of channels to use (default: all)"); + +/* + * Initialization patterns. All bytes in the source buffer has bit 7 + * set, all bytes in the destination buffer has bit 7 cleared. + * + * Bit 6 is set for all bytes which are to be copied by the DMA + * engine. Bit 5 is set for all bytes which are to be overwritten by + * the DMA engine. + * + * The remaining bits are the inverse of a counter which increments by + * one for each byte address. + */ +#define PATTERN_SRC 0x80 +#define PATTERN_DST 0x00 +#define PATTERN_COPY 0x40 +#define PATTERN_OVERWRITE 0x20 +#define PATTERN_COUNT_MASK 0x1f + +struct dmatest_thread { + struct list_head node; + struct task_struct *task; + struct dma_chan *chan; + u8 *srcbuf; + u8 *dstbuf; +}; + +struct dmatest_chan { + struct list_head node; + struct dma_chan *chan; + struct list_head threads; +}; + +/* + * These are protected by dma_list_mutex since they're only used by + * the DMA client event callback + */ +static LIST_HEAD(dmatest_channels); +static unsigned int nr_channels; + +static bool dmatest_match_channel(struct dma_chan *chan) +{ + if (test_channel[0] == '\0') + return true; + return strcmp(dev_name(&chan->dev), test_channel) == 0; +} + +static bool dmatest_match_device(struct dma_device *device) +{ + if (test_device[0] == '\0') + return true; + return strcmp(dev_name(device->dev), test_device) == 0; +} + +static unsigned long dmatest_random(void) +{ + unsigned long buf; + + get_random_bytes(&buf, sizeof(buf)); + return buf; +} + +static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < start; i++) + buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); + for ( ; i < start + len; i++) + buf[i] = PATTERN_SRC | PATTERN_COPY + | (~i & PATTERN_COUNT_MASK);; + for ( ; i < test_buf_size; i++) + buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK); +} + +static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len) +{ + unsigned int i; + + for (i = 0; i < start; i++) + buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); + for ( ; i < start + len; i++) + buf[i] = PATTERN_DST | PATTERN_OVERWRITE + | (~i & PATTERN_COUNT_MASK); + for ( ; i < test_buf_size; i++) + buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK); +} + +static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index, + unsigned int counter, bool is_srcbuf) +{ + u8 diff = actual ^ pattern; + u8 expected = pattern | (~counter & PATTERN_COUNT_MASK); + const char *thread_name = current->comm; + + if (is_srcbuf) + pr_warning("%s: srcbuf[0x%x] overwritten!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if ((pattern & PATTERN_COPY) + && (diff & (PATTERN_COPY | PATTERN_OVERWRITE))) + pr_warning("%s: dstbuf[0x%x] not copied!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else if (diff & PATTERN_SRC) + pr_warning("%s: dstbuf[0x%x] was copied!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); + else + pr_warning("%s: dstbuf[0x%x] mismatch!" + " Expected %02x, got %02x\n", + thread_name, index, expected, actual); +} + +static unsigned int dmatest_verify(u8 *buf, unsigned int start, + unsigned int end, unsigned int counter, u8 pattern, + bool is_srcbuf) +{ + unsigned int i; + unsigned int error_count = 0; + u8 actual; + + for (i = start; i < end; i++) { + actual = buf[i]; + if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) { + if (error_count < 32) + dmatest_mismatch(actual, pattern, i, counter, + is_srcbuf); + error_count++; + } + counter++; + } + + if (error_count > 32) + pr_warning("%s: %u errors suppressed\n", + current->comm, error_count - 32); + + return error_count; +} + +/* + * This function repeatedly tests DMA transfers of various lengths and + * offsets until it is told to exit by kthread_stop(). There may be + * multiple threads running this function in parallel for a single + * channel, and there may be multiple channels being tested in + * parallel. + * + * Before each test, the source and destination buffer is initialized + * with a known pattern. This pattern is different depending on + * whether it's in an area which is supposed to be copied or + * overwritten, and different in the source and destination buffers. + * So if the DMA engine doesn't copy exactly what we tell it to copy, + * we'll notice. + */ +static int dmatest_func(void *data) +{ + struct dmatest_thread *thread = data; + struct dma_chan *chan; + const char *thread_name; + unsigned int src_off, dst_off, len; + unsigned int error_count; + unsigned int failed_tests = 0; + unsigned int total_tests = 0; + dma_cookie_t cookie; + enum dma_status status; + int ret; + + thread_name = current->comm; + + ret = -ENOMEM; + thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL); + if (!thread->srcbuf) + goto err_srcbuf; + thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL); + if (!thread->dstbuf) + goto err_dstbuf; + + smp_rmb(); + chan = thread->chan; + dma_chan_get(chan); + + while (!kthread_should_stop()) { + total_tests++; + + len = dmatest_random() % test_buf_size + 1; + src_off = dmatest_random() % (test_buf_size - len + 1); + dst_off = dmatest_random() % (test_buf_size - len + 1); + + dmatest_init_srcbuf(thread->srcbuf, src_off, len); + dmatest_init_dstbuf(thread->dstbuf, dst_off, len); + + cookie = dma_async_memcpy_buf_to_buf(chan, + thread->dstbuf + dst_off, + thread->srcbuf + src_off, + len); + if (dma_submit_error(cookie)) { + pr_warning("%s: #%u: submit error %d with src_off=0x%x " + "dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, cookie, + src_off, dst_off, len); + msleep(100); + failed_tests++; + continue; + } + dma_async_memcpy_issue_pending(chan); + + do { + msleep(1); + status = dma_async_memcpy_complete( + chan, cookie, NULL, NULL); + } while (status == DMA_IN_PROGRESS); + + if (status == DMA_ERROR) { + pr_warning("%s: #%u: error during copy\n", + thread_name, total_tests - 1); + failed_tests++; + continue; + } + + error_count = 0; + + pr_debug("%s: verifying source buffer...\n", thread_name); + error_count += dmatest_verify(thread->srcbuf, 0, src_off, + 0, PATTERN_SRC, true); + error_count += dmatest_verify(thread->srcbuf, src_off, + src_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, true); + error_count += dmatest_verify(thread->srcbuf, src_off + len, + test_buf_size, src_off + len, + PATTERN_SRC, true); + + pr_debug("%s: verifying dest buffer...\n", + thread->task->comm); + error_count += dmatest_verify(thread->dstbuf, 0, dst_off, + 0, PATTERN_DST, false); + error_count += dmatest_verify(thread->dstbuf, dst_off, + dst_off + len, src_off, + PATTERN_SRC | PATTERN_COPY, false); + error_count += dmatest_verify(thread->dstbuf, dst_off + len, + test_buf_size, dst_off + len, + PATTERN_DST, false); + + if (error_count) { + pr_warning("%s: #%u: %u errors with " + "src_off=0x%x dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, error_count, + src_off, dst_off, len); + failed_tests++; + } else { + pr_debug("%s: #%u: No errors with " + "src_off=0x%x dst_off=0x%x len=0x%x\n", + thread_name, total_tests - 1, + src_off, dst_off, len); + } + } + + ret = 0; + dma_chan_put(chan); + kfree(thread->dstbuf); +err_dstbuf: + kfree(thread->srcbuf); +err_srcbuf: + pr_notice("%s: terminating after %u tests, %u failures (status %d)\n", + thread_name, total_tests, failed_tests, ret); + return ret; +} + +static void dmatest_cleanup_channel(struct dmatest_chan *dtc) +{ + struct dmatest_thread *thread; + struct dmatest_thread *_thread; + int ret; + + list_for_each_entry_safe(thread, _thread, &dtc->threads, node) { + ret = kthread_stop(thread->task); + pr_debug("dmatest: thread %s exited with status %d\n", + thread->task->comm, ret); + list_del(&thread->node); + kfree(thread); + } + kfree(dtc); +} + +static enum dma_state_client dmatest_add_channel(struct dma_chan *chan) +{ + struct dmatest_chan *dtc; + struct dmatest_thread *thread; + unsigned int i; + + /* Have we already been told about this channel? */ + list_for_each_entry(dtc, &dmatest_channels, node) + if (dtc->chan == chan) + return DMA_DUP; + + dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL); + if (!dtc) { + pr_warning("dmatest: No memory for %s\n", dev_name(&chan->dev)); + return DMA_NAK; + } + + dtc->chan = chan; + INIT_LIST_HEAD(&dtc->threads); + + for (i = 0; i < threads_per_chan; i++) { + thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL); + if (!thread) { + pr_warning("dmatest: No memory for %s-test%u\n", + dev_name(&chan->dev), i); + break; + } + thread->chan = dtc->chan; + smp_wmb(); + thread->task = kthread_run(dmatest_func, thread, "%s-test%u", + dev_name(&chan->dev), i); + if (IS_ERR(thread->task)) { + pr_warning("dmatest: Failed to run thread %s-test%u\n", + dev_name(&chan->dev), i); + kfree(thread); + break; + } + + /* srcbuf and dstbuf are allocated by the thread itself */ + + list_add_tail(&thread->node, &dtc->threads); + } + + pr_info("dmatest: Started %u threads using %s\n", i, dev_name(&chan->dev)); + + list_add_tail(&dtc->node, &dmatest_channels); + nr_channels++; + + return DMA_ACK; +} + +static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan) +{ + struct dmatest_chan *dtc, *_dtc; + + list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) { + if (dtc->chan == chan) { + list_del(&dtc->node); + dmatest_cleanup_channel(dtc); + pr_debug("dmatest: lost channel %s\n", + dev_name(&chan->dev)); + return DMA_ACK; + } + } + + return DMA_DUP; +} + +/* + * Start testing threads as new channels are assigned to us, and kill + * them when the channels go away. + * + * When we unregister the client, all channels are removed so this + * will also take care of cleaning things up when the module is + * unloaded. + */ +static enum dma_state_client +dmatest_event(struct dma_client *client, struct dma_chan *chan, + enum dma_state state) +{ + enum dma_state_client ack = DMA_NAK; + + switch (state) { + case DMA_RESOURCE_AVAILABLE: + if (!dmatest_match_channel(chan) + || !dmatest_match_device(chan->device)) + ack = DMA_DUP; + else if (max_channels && nr_channels >= max_channels) + ack = DMA_NAK; + else + ack = dmatest_add_channel(chan); + break; + + case DMA_RESOURCE_REMOVED: + ack = dmatest_remove_channel(chan); + break; + + default: + pr_info("dmatest: Unhandled event %u (%s)\n", + state, dev_name(&chan->dev)); + break; + } + + return ack; +} + +static struct dma_client dmatest_client = { + .event_callback = dmatest_event, +}; + +static int __init dmatest_init(void) +{ + dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask); + dma_async_client_register(&dmatest_client); + dma_async_client_chan_request(&dmatest_client); + + return 0; +} +module_init(dmatest_init); + +static void __exit dmatest_exit(void) +{ + dma_async_client_unregister(&dmatest_client); +} +module_exit(dmatest_exit); + +MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c new file mode 100644 index 0000000..0778d99 --- /dev/null +++ b/drivers/dma/dw_dmac.c @@ -0,0 +1,1122 @@ +/* + * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on + * AVR32 systems.) + * + * Copyright (C) 2007-2008 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#include <linux/clk.h> +#include <linux/delay.h> +#include <linux/dmaengine.h> +#include <linux/dma-mapping.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/io.h> +#include <linux/mm.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/slab.h> + +#include "dw_dmac_regs.h" + +/* + * This supports the Synopsys "DesignWare AHB Central DMA Controller", + * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all + * of which use ARM any more). See the "Databook" from Synopsys for + * information beyond what licensees probably provide. + * + * The driver has currently been tested only with the Atmel AT32AP7000, + * which does not support descriptor writeback. + */ + +/* NOTE: DMS+SMS is system-specific. We should get this information + * from the platform code somehow. + */ +#define DWC_DEFAULT_CTLLO (DWC_CTLL_DST_MSIZE(0) \ + | DWC_CTLL_SRC_MSIZE(0) \ + | DWC_CTLL_DMS(0) \ + | DWC_CTLL_SMS(1) \ + | DWC_CTLL_LLP_D_EN \ + | DWC_CTLL_LLP_S_EN) + +/* + * This is configuration-dependent and usually a funny size like 4095. + * Let's round it down to the nearest power of two. + * + * Note that this is a transfer count, i.e. if we transfer 32-bit + * words, we can do 8192 bytes per descriptor. + * + * This parameter is also system-specific. + */ +#define DWC_MAX_COUNT 2048U + +/* + * Number of descriptors to allocate for each channel. This should be + * made configurable somehow; preferably, the clients (at least the + * ones using slave transfers) should be able to give us a hint. + */ +#define NR_DESCS_PER_CHANNEL 64 + +/*----------------------------------------------------------------------*/ + +/* + * Because we're not relying on writeback from the controller (it may not + * even be configured into the core!) we don't need to use dma_pool. These + * descriptors -- and associated data -- are cacheable. We do need to make + * sure their dcache entries are written back before handing them off to + * the controller, though. + */ + +static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc) +{ + return list_entry(dwc->active_list.next, struct dw_desc, desc_node); +} + +static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc) +{ + return list_entry(dwc->queue.next, struct dw_desc, desc_node); +} + +static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + struct dw_desc *ret = NULL; + unsigned int i = 0; + + spin_lock_bh(&dwc->lock); + list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) { + if (async_tx_test_ack(&desc->txd)) { + list_del(&desc->desc_node); + ret = desc; + break; + } + dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc); + i++; + } + spin_unlock_bh(&dwc->lock); + + dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i); + + return ret; +} + +static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + struct dw_desc *child; + + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + dma_sync_single_for_cpu(dwc->chan.dev.parent, + child->txd.phys, sizeof(child->lli), + DMA_TO_DEVICE); + dma_sync_single_for_cpu(dwc->chan.dev.parent, + desc->txd.phys, sizeof(desc->lli), + DMA_TO_DEVICE); +} + +/* + * Move a descriptor, including any children, to the free list. + * `desc' must not be on any lists. + */ +static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + if (desc) { + struct dw_desc *child; + + dwc_sync_desc_for_cpu(dwc, desc); + + spin_lock_bh(&dwc->lock); + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + dev_vdbg(&dwc->chan.dev, + "moving child desc %p to freelist\n", + child); + list_splice_init(&desc->txd.tx_list, &dwc->free_list); + dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc); + list_add(&desc->desc_node, &dwc->free_list); + spin_unlock_bh(&dwc->lock); + } +} + +/* Called with dwc->lock held and bh disabled */ +static dma_cookie_t +dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + dma_cookie_t cookie = dwc->chan.cookie; + + if (++cookie < 0) + cookie = 1; + + dwc->chan.cookie = cookie; + desc->txd.cookie = cookie; + + return cookie; +} + +/*----------------------------------------------------------------------*/ + +/* Called with dwc->lock held and bh disabled */ +static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first) +{ + struct dw_dma *dw = to_dw_dma(dwc->chan.device); + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(&dwc->chan.dev, + "BUG: Attempted to start non-idle channel\n"); + dev_err(&dwc->chan.dev, + " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n", + channel_readl(dwc, SAR), + channel_readl(dwc, DAR), + channel_readl(dwc, LLP), + channel_readl(dwc, CTL_HI), + channel_readl(dwc, CTL_LO)); + + /* The tasklet will hopefully advance the queue... */ + return; + } + + channel_writel(dwc, LLP, first->txd.phys); + channel_writel(dwc, CTL_LO, + DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN); + channel_writel(dwc, CTL_HI, 0); + channel_set_bit(dw, CH_EN, dwc->mask); +} + +/*----------------------------------------------------------------------*/ + +static void +dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc) +{ + dma_async_tx_callback callback; + void *param; + struct dma_async_tx_descriptor *txd = &desc->txd; + + dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie); + + dwc->completed = txd->cookie; + callback = txd->callback; + param = txd->callback_param; + + dwc_sync_desc_for_cpu(dwc, desc); + list_splice_init(&txd->tx_list, &dwc->free_list); + list_move(&desc->desc_node, &dwc->free_list); + + /* + * We use dma_unmap_page() regardless of how the buffers were + * mapped before they were submitted... + */ + if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP)) + dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len, + DMA_FROM_DEVICE); + if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP)) + dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len, + DMA_TO_DEVICE); + + /* + * The API requires that no submissions are done from a + * callback, so we don't need to drop the lock here + */ + if (callback) + callback(param); +} + +static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_err(&dwc->chan.dev, + "BUG: XFER bit set, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + } + + /* + * Submit queued descriptors ASAP, i.e. before we go through + * the completed ones. + */ + if (!list_empty(&dwc->queue)) + dwc_dostart(dwc, dwc_first_queued(dwc)); + list_splice_init(&dwc->active_list, &list); + list_splice_init(&dwc->queue, &dwc->active_list); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc); +} + +static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + dma_addr_t llp; + struct dw_desc *desc, *_desc; + struct dw_desc *child; + u32 status_xfer; + + /* + * Clear block interrupt flag before scanning so that we don't + * miss any, and read LLP before RAW_XFER to ensure it is + * valid if we decide to scan the list. + */ + dma_writel(dw, CLEAR.BLOCK, dwc->mask); + llp = channel_readl(dwc, LLP); + status_xfer = dma_readl(dw, RAW.XFER); + + if (status_xfer & dwc->mask) { + /* Everything we've submitted is done */ + dma_writel(dw, CLEAR.XFER, dwc->mask); + dwc_complete_all(dw, dwc); + return; + } + + dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp); + + list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) { + if (desc->lli.llp == llp) + /* This one is currently in progress */ + return; + + list_for_each_entry(child, &desc->txd.tx_list, desc_node) + if (child->lli.llp == llp) + /* Currently in progress */ + return; + + /* + * No descriptors so far seem to be in progress, i.e. + * this one must be done. + */ + dwc_descriptor_complete(dwc, desc); + } + + dev_err(&dwc->chan.dev, + "BUG: All descriptors done, but channel not idle!\n"); + + /* Try to continue after resetting the channel... */ + channel_clear_bit(dw, CH_EN, dwc->mask); + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + if (!list_empty(&dwc->queue)) { + dwc_dostart(dwc, dwc_first_queued(dwc)); + list_splice_init(&dwc->queue, &dwc->active_list); + } +} + +static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli) +{ + dev_printk(KERN_CRIT, &dwc->chan.dev, + " desc: s0x%x d0x%x l0x%x c0x%x:%x\n", + lli->sar, lli->dar, lli->llp, + lli->ctlhi, lli->ctllo); +} + +static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc) +{ + struct dw_desc *bad_desc; + struct dw_desc *child; + + dwc_scan_descriptors(dw, dwc); + + /* + * The descriptor currently at the head of the active list is + * borked. Since we don't have any way to report errors, we'll + * just have to scream loudly and try to carry on. + */ + bad_desc = dwc_first_active(dwc); + list_del_init(&bad_desc->desc_node); + list_splice_init(&dwc->queue, dwc->active_list.prev); + + /* Clear the error flag and try to restart the controller */ + dma_writel(dw, CLEAR.ERROR, dwc->mask); + if (!list_empty(&dwc->active_list)) + dwc_dostart(dwc, dwc_first_active(dwc)); + + /* + * KERN_CRITICAL may seem harsh, but since this only happens + * when someone submits a bad physical address in a + * descriptor, we should consider ourselves lucky that the + * controller flagged an error instead of scribbling over + * random memory locations. + */ + dev_printk(KERN_CRIT, &dwc->chan.dev, + "Bad descriptor submitted for DMA!\n"); + dev_printk(KERN_CRIT, &dwc->chan.dev, + " cookie: %d\n", bad_desc->txd.cookie); + dwc_dump_lli(dwc, &bad_desc->lli); + list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node) + dwc_dump_lli(dwc, &child->lli); + + /* Pretend the descriptor completed successfully */ + dwc_descriptor_complete(dwc, bad_desc); +} + +static void dw_dma_tasklet(unsigned long data) +{ + struct dw_dma *dw = (struct dw_dma *)data; + struct dw_dma_chan *dwc; + u32 status_block; + u32 status_xfer; + u32 status_err; + int i; + + status_block = dma_readl(dw, RAW.BLOCK); + status_xfer = dma_readl(dw, RAW.XFER); + status_err = dma_readl(dw, RAW.ERROR); + + dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n", + status_block, status_err); + + for (i = 0; i < dw->dma.chancnt; i++) { + dwc = &dw->chan[i]; + spin_lock(&dwc->lock); + if (status_err & (1 << i)) + dwc_handle_error(dw, dwc); + else if ((status_block | status_xfer) & (1 << i)) + dwc_scan_descriptors(dw, dwc); + spin_unlock(&dwc->lock); + } + + /* + * Re-enable interrupts. Block Complete interrupts are only + * enabled if the INT_EN bit in the descriptor is set. This + * will trigger a scan before the whole list is done. + */ + channel_set_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask); +} + +static irqreturn_t dw_dma_interrupt(int irq, void *dev_id) +{ + struct dw_dma *dw = dev_id; + u32 status; + + dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n", + dma_readl(dw, STATUS_INT)); + + /* + * Just disable the interrupts. We'll turn them back on in the + * softirq handler. + */ + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + status = dma_readl(dw, STATUS_INT); + if (status) { + dev_err(dw->dma.dev, + "BUG: Unexpected interrupts pending: 0x%x\n", + status); + + /* Try to recover */ + channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1); + channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1); + channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1); + channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1); + } + + tasklet_schedule(&dw->tasklet); + + return IRQ_HANDLED; +} + +/*----------------------------------------------------------------------*/ + +static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct dw_desc *desc = txd_to_dw_desc(tx); + struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan); + dma_cookie_t cookie; + + spin_lock_bh(&dwc->lock); + cookie = dwc_assign_cookie(dwc, desc); + + /* + * REVISIT: We should attempt to chain as many descriptors as + * possible, perhaps even appending to those already submitted + * for DMA. But this is hard to do in a race-free manner. + */ + if (list_empty(&dwc->active_list)) { + dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n", + desc->txd.cookie); + dwc_dostart(dwc, desc); + list_add_tail(&desc->desc_node, &dwc->active_list); + } else { + dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n", + desc->txd.cookie); + + list_add_tail(&desc->desc_node, &dwc->queue); + } + + spin_unlock_bh(&dwc->lock); + + return cookie; +} + +static struct dma_async_tx_descriptor * +dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_desc *desc; + struct dw_desc *first; + struct dw_desc *prev; + size_t xfer_count; + size_t offset; + unsigned int src_width; + unsigned int dst_width; + u32 ctllo; + + dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n", + dest, src, len, flags); + + if (unlikely(!len)) { + dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n"); + return NULL; + } + + /* + * We can be a lot more clever here, but this should take care + * of the most common optimization. + */ + if (!((src | dest | len) & 3)) + src_width = dst_width = 2; + else if (!((src | dest | len) & 1)) + src_width = dst_width = 1; + else + src_width = dst_width = 0; + + ctllo = DWC_DEFAULT_CTLLO + | DWC_CTLL_DST_WIDTH(dst_width) + | DWC_CTLL_SRC_WIDTH(src_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_INC + | DWC_CTLL_FC_M2M; + prev = first = NULL; + + for (offset = 0; offset < len; offset += xfer_count << src_width) { + xfer_count = min_t(size_t, (len - offset) >> src_width, + DWC_MAX_COUNT); + + desc = dwc_desc_get(dwc); + if (!desc) + goto err_desc_get; + + desc->lli.sar = src + offset; + desc->lli.dar = dest + offset; + desc->lli.ctllo = ctllo; + desc->lli.ctlhi = xfer_count; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + } + + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + prev->lli.ctllo |= DWC_CTLL_INT_EN; + + prev->lli.llp = 0; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + + first->txd.flags = flags; + first->len = len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +static struct dma_async_tx_descriptor * +dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl, + unsigned int sg_len, enum dma_data_direction direction, + unsigned long flags) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma_slave *dws = dwc->dws; + struct dw_desc *prev; + struct dw_desc *first; + u32 ctllo; + dma_addr_t reg; + unsigned int reg_width; + unsigned int mem_width; + unsigned int i; + struct scatterlist *sg; + size_t total_len = 0; + + dev_vdbg(&chan->dev, "prep_dma_slave\n"); + + if (unlikely(!dws || !sg_len)) + return NULL; + + reg_width = dws->slave.reg_width; + prev = first = NULL; + + sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction); + + switch (direction) { + case DMA_TO_DEVICE: + ctllo = (DWC_DEFAULT_CTLLO + | DWC_CTLL_DST_WIDTH(reg_width) + | DWC_CTLL_DST_FIX + | DWC_CTLL_SRC_INC + | DWC_CTLL_FC_M2P); + reg = dws->slave.tx_reg; + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len; + u32 mem; + + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(&chan->dev, + "not enough descriptors available\n"); + goto err_desc_get; + } + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.sar = mem; + desc->lli.dar = reg; + desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width); + desc->lli.ctlhi = len >> mem_width; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, + sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + total_len += len; + } + break; + case DMA_FROM_DEVICE: + ctllo = (DWC_DEFAULT_CTLLO + | DWC_CTLL_SRC_WIDTH(reg_width) + | DWC_CTLL_DST_INC + | DWC_CTLL_SRC_FIX + | DWC_CTLL_FC_P2M); + + reg = dws->slave.rx_reg; + for_each_sg(sgl, sg, sg_len, i) { + struct dw_desc *desc; + u32 len; + u32 mem; + + desc = dwc_desc_get(dwc); + if (!desc) { + dev_err(&chan->dev, + "not enough descriptors available\n"); + goto err_desc_get; + } + + mem = sg_phys(sg); + len = sg_dma_len(sg); + mem_width = 2; + if (unlikely(mem & 3 || len & 3)) + mem_width = 0; + + desc->lli.sar = reg; + desc->lli.dar = mem; + desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width); + desc->lli.ctlhi = len >> reg_width; + + if (!first) { + first = desc; + } else { + prev->lli.llp = desc->txd.phys; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, + sizeof(prev->lli), + DMA_TO_DEVICE); + list_add_tail(&desc->desc_node, + &first->txd.tx_list); + } + prev = desc; + total_len += len; + } + break; + default: + return NULL; + } + + if (flags & DMA_PREP_INTERRUPT) + /* Trigger interrupt after last block */ + prev->lli.ctllo |= DWC_CTLL_INT_EN; + + prev->lli.llp = 0; + dma_sync_single_for_device(chan->dev.parent, + prev->txd.phys, sizeof(prev->lli), + DMA_TO_DEVICE); + + first->len = total_len; + + return &first->txd; + +err_desc_get: + dwc_desc_put(dwc, first); + return NULL; +} + +static void dwc_terminate_all(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + + /* + * This is only called when something went wrong elsewhere, so + * we don't really care about the data. Just disable the + * channel. We still have to poll the channel enable bit due + * to AHB/HSB limitations. + */ + spin_lock_bh(&dwc->lock); + + channel_clear_bit(dw, CH_EN, dwc->mask); + + while (dma_readl(dw, CH_EN) & dwc->mask) + cpu_relax(); + + /* active_list entries will end up before queued entries */ + list_splice_init(&dwc->queue, &list); + list_splice_init(&dwc->active_list, &list); + + spin_unlock_bh(&dwc->lock); + + /* Flush all pending and queued descriptors */ + list_for_each_entry_safe(desc, _desc, &list, desc_node) + dwc_descriptor_complete(dwc, desc); +} + +static enum dma_status +dwc_is_tx_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, dma_cookie_t *used) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + int ret; + + last_complete = dwc->completed; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret != DMA_SUCCESS) { + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + + last_complete = dwc->completed; + last_used = chan->cookie; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + } + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return ret; +} + +static void dwc_issue_pending(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + + spin_lock_bh(&dwc->lock); + if (!list_empty(&dwc->queue)) + dwc_scan_descriptors(to_dw_dma(chan->device), dwc); + spin_unlock_bh(&dwc->lock); +} + +static int dwc_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc; + struct dma_slave *slave; + struct dw_dma_slave *dws; + int i; + u32 cfghi; + u32 cfglo; + + dev_vdbg(&chan->dev, "alloc_chan_resources\n"); + + /* Channels doing slave DMA can only handle one client. */ + if (dwc->dws || client->slave) { + if (chan->client_count) + return -EBUSY; + } + + /* ASSERT: channel is idle */ + if (dma_readl(dw, CH_EN) & dwc->mask) { + dev_dbg(&chan->dev, "DMA channel not idle?\n"); + return -EIO; + } + + dwc->completed = chan->cookie = 1; + + cfghi = DWC_CFGH_FIFO_MODE; + cfglo = 0; + + slave = client->slave; + if (slave) { + /* + * We need controller-specific data to set up slave + * transfers. + */ + BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev); + + dws = container_of(slave, struct dw_dma_slave, slave); + + dwc->dws = dws; + cfghi = dws->cfg_hi; + cfglo = dws->cfg_lo; + } else { + dwc->dws = NULL; + } + + channel_writel(dwc, CFG_LO, cfglo); + channel_writel(dwc, CFG_HI, cfghi); + + /* + * NOTE: some controllers may have additional features that we + * need to initialize here, like "scatter-gather" (which + * doesn't mean what you think it means), and status writeback. + */ + + spin_lock_bh(&dwc->lock); + i = dwc->descs_allocated; + while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) { + spin_unlock_bh(&dwc->lock); + + desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL); + if (!desc) { + dev_info(&chan->dev, + "only allocated %d descriptors\n", i); + spin_lock_bh(&dwc->lock); + break; + } + + dma_async_tx_descriptor_init(&desc->txd, chan); + desc->txd.tx_submit = dwc_tx_submit; + desc->txd.flags = DMA_CTRL_ACK; + INIT_LIST_HEAD(&desc->txd.tx_list); + desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli, + sizeof(desc->lli), DMA_TO_DEVICE); + dwc_desc_put(dwc, desc); + + spin_lock_bh(&dwc->lock); + i = ++dwc->descs_allocated; + } + + /* Enable interrupts */ + channel_set_bit(dw, MASK.XFER, dwc->mask); + channel_set_bit(dw, MASK.BLOCK, dwc->mask); + channel_set_bit(dw, MASK.ERROR, dwc->mask); + + spin_unlock_bh(&dwc->lock); + + dev_dbg(&chan->dev, + "alloc_chan_resources allocated %d descriptors\n", i); + + return i; +} + +static void dwc_free_chan_resources(struct dma_chan *chan) +{ + struct dw_dma_chan *dwc = to_dw_dma_chan(chan); + struct dw_dma *dw = to_dw_dma(chan->device); + struct dw_desc *desc, *_desc; + LIST_HEAD(list); + + dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n", + dwc->descs_allocated); + + /* ASSERT: channel is idle */ + BUG_ON(!list_empty(&dwc->active_list)); + BUG_ON(!list_empty(&dwc->queue)); + BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask); + + spin_lock_bh(&dwc->lock); + list_splice_init(&dwc->free_list, &list); + dwc->descs_allocated = 0; + dwc->dws = NULL; + + /* Disable interrupts */ + channel_clear_bit(dw, MASK.XFER, dwc->mask); + channel_clear_bit(dw, MASK.BLOCK, dwc->mask); + channel_clear_bit(dw, MASK.ERROR, dwc->mask); + + spin_unlock_bh(&dwc->lock); + + list_for_each_entry_safe(desc, _desc, &list, desc_node) { + dev_vdbg(&chan->dev, " freeing descriptor %p\n", desc); + dma_unmap_single(chan->dev.parent, desc->txd.phys, + sizeof(desc->lli), DMA_TO_DEVICE); + kfree(desc); + } + + dev_vdbg(&chan->dev, "free_chan_resources done\n"); +} + +/*----------------------------------------------------------------------*/ + +static void dw_dma_off(struct dw_dma *dw) +{ + dma_writel(dw, CFG, 0); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + while (dma_readl(dw, CFG) & DW_CFG_DMA_EN) + cpu_relax(); +} + +static int __init dw_probe(struct platform_device *pdev) +{ + struct dw_dma_platform_data *pdata; + struct resource *io; + struct dw_dma *dw; + size_t size; + int irq; + int err; + int i; + + pdata = pdev->dev.platform_data; + if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS) + return -EINVAL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!io) + return -EINVAL; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + size = sizeof(struct dw_dma); + size += pdata->nr_channels * sizeof(struct dw_dma_chan); + dw = kzalloc(size, GFP_KERNEL); + if (!dw) + return -ENOMEM; + + if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) { + err = -EBUSY; + goto err_kfree; + } + + memset(dw, 0, sizeof *dw); + + dw->regs = ioremap(io->start, DW_REGLEN); + if (!dw->regs) { + err = -ENOMEM; + goto err_release_r; + } + + dw->clk = clk_get(&pdev->dev, "hclk"); + if (IS_ERR(dw->clk)) { + err = PTR_ERR(dw->clk); + goto err_clk; + } + clk_enable(dw->clk); + + /* force dma off, just in case */ + dw_dma_off(dw); + + err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw); + if (err) + goto err_irq; + + platform_set_drvdata(pdev, dw); + + tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw); + + dw->all_chan_mask = (1 << pdata->nr_channels) - 1; + + INIT_LIST_HEAD(&dw->dma.channels); + for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) { + struct dw_dma_chan *dwc = &dw->chan[i]; + + dwc->chan.device = &dw->dma; + dwc->chan.cookie = dwc->completed = 1; + dwc->chan.chan_id = i; + list_add_tail(&dwc->chan.device_node, &dw->dma.channels); + + dwc->ch_regs = &__dw_regs(dw)->CHAN[i]; + spin_lock_init(&dwc->lock); + dwc->mask = 1 << i; + + INIT_LIST_HEAD(&dwc->active_list); + INIT_LIST_HEAD(&dwc->queue); + INIT_LIST_HEAD(&dwc->free_list); + + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + /* Clear/disable all interrupts on all channels. */ + dma_writel(dw, CLEAR.XFER, dw->all_chan_mask); + dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask); + dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask); + dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask); + + channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask); + channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask); + channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask); + channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask); + + dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask); + dma_cap_set(DMA_SLAVE, dw->dma.cap_mask); + dw->dma.dev = &pdev->dev; + dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources; + dw->dma.device_free_chan_resources = dwc_free_chan_resources; + + dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy; + + dw->dma.device_prep_slave_sg = dwc_prep_slave_sg; + dw->dma.device_terminate_all = dwc_terminate_all; + + dw->dma.device_is_tx_complete = dwc_is_tx_complete; + dw->dma.device_issue_pending = dwc_issue_pending; + + dma_writel(dw, CFG, DW_CFG_DMA_EN); + + printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n", + pdev->dev.bus_id, dw->dma.chancnt); + + dma_async_device_register(&dw->dma); + + return 0; + +err_irq: + clk_disable(dw->clk); + clk_put(dw->clk); +err_clk: + iounmap(dw->regs); + dw->regs = NULL; +err_release_r: + release_resource(io); +err_kfree: + kfree(dw); + return err; +} + +static int __exit dw_remove(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + struct dw_dma_chan *dwc, *_dwc; + struct resource *io; + + dw_dma_off(dw); + dma_async_device_unregister(&dw->dma); + + free_irq(platform_get_irq(pdev, 0), dw); + tasklet_kill(&dw->tasklet); + + list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels, + chan.device_node) { + list_del(&dwc->chan.device_node); + channel_clear_bit(dw, CH_EN, dwc->mask); + } + + clk_disable(dw->clk); + clk_put(dw->clk); + + iounmap(dw->regs); + dw->regs = NULL; + + io = platform_get_resource(pdev, IORESOURCE_MEM, 0); + release_mem_region(io->start, DW_REGLEN); + + kfree(dw); + + return 0; +} + +static void dw_shutdown(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); +} + +static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + dw_dma_off(platform_get_drvdata(pdev)); + clk_disable(dw->clk); + return 0; +} + +static int dw_resume_early(struct platform_device *pdev) +{ + struct dw_dma *dw = platform_get_drvdata(pdev); + + clk_enable(dw->clk); + dma_writel(dw, CFG, DW_CFG_DMA_EN); + return 0; + +} + +static struct platform_driver dw_driver = { + .remove = __exit_p(dw_remove), + .shutdown = dw_shutdown, + .suspend_late = dw_suspend_late, + .resume_early = dw_resume_early, + .driver = { + .name = "dw_dmac", + }, +}; + +static int __init dw_init(void) +{ + return platform_driver_probe(&dw_driver, dw_probe); +} +module_init(dw_init); + +static void __exit dw_exit(void) +{ + platform_driver_unregister(&dw_driver); +} +module_exit(dw_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver"); +MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>"); diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h new file mode 100644 index 0000000..00fdd18 --- /dev/null +++ b/drivers/dma/dw_dmac_regs.h @@ -0,0 +1,225 @@ +/* + * Driver for the Synopsys DesignWare AHB DMA Controller + * + * Copyright (C) 2005-2007 Atmel Corporation + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/dw_dmac.h> + +#define DW_DMA_MAX_NR_CHANNELS 8 + +/* + * Redefine this macro to handle differences between 32- and 64-bit + * addressing, big vs. little endian, etc. + */ +#define DW_REG(name) u32 name; u32 __pad_##name + +/* Hardware register definitions. */ +struct dw_dma_chan_regs { + DW_REG(SAR); /* Source Address Register */ + DW_REG(DAR); /* Destination Address Register */ + DW_REG(LLP); /* Linked List Pointer */ + u32 CTL_LO; /* Control Register Low */ + u32 CTL_HI; /* Control Register High */ + DW_REG(SSTAT); + DW_REG(DSTAT); + DW_REG(SSTATAR); + DW_REG(DSTATAR); + u32 CFG_LO; /* Configuration Register Low */ + u32 CFG_HI; /* Configuration Register High */ + DW_REG(SGR); + DW_REG(DSR); +}; + +struct dw_dma_irq_regs { + DW_REG(XFER); + DW_REG(BLOCK); + DW_REG(SRC_TRAN); + DW_REG(DST_TRAN); + DW_REG(ERROR); +}; + +struct dw_dma_regs { + /* per-channel registers */ + struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS]; + + /* irq handling */ + struct dw_dma_irq_regs RAW; /* r */ + struct dw_dma_irq_regs STATUS; /* r (raw & mask) */ + struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */ + struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */ + + DW_REG(STATUS_INT); /* r */ + + /* software handshaking */ + DW_REG(REQ_SRC); + DW_REG(REQ_DST); + DW_REG(SGL_REQ_SRC); + DW_REG(SGL_REQ_DST); + DW_REG(LAST_SRC); + DW_REG(LAST_DST); + + /* miscellaneous */ + DW_REG(CFG); + DW_REG(CH_EN); + DW_REG(ID); + DW_REG(TEST); + + /* optional encoded params, 0x3c8..0x3 */ +}; + +/* Bitfields in CTL_LO */ +#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */ +#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */ +#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4) +#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */ +#define DWC_CTLL_DST_DEC (1<<7) +#define DWC_CTLL_DST_FIX (2<<7) +#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */ +#define DWC_CTLL_SRC_DEC (1<<9) +#define DWC_CTLL_SRC_FIX (2<<9) +#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */ +#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14) +#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */ +#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */ +#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */ +#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */ +#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */ +#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */ +/* plus 4 transfer types for peripheral-as-flow-controller */ +#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */ +#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */ +#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */ +#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */ + +/* Bitfields in CTL_HI */ +#define DWC_CTLH_DONE 0x00001000 +#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff + +/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */ +#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */ +#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */ +#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */ +#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */ +#define DWC_CFGL_MAX_BURST(x) ((x) << 20) +#define DWC_CFGL_RELOAD_SAR (1 << 30) +#define DWC_CFGL_RELOAD_DAR (1 << 31) + +/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */ +#define DWC_CFGH_DS_UPD_EN (1 << 5) +#define DWC_CFGH_SS_UPD_EN (1 << 6) + +/* Bitfields in SGR */ +#define DWC_SGR_SGI(x) ((x) << 0) +#define DWC_SGR_SGC(x) ((x) << 20) + +/* Bitfields in DSR */ +#define DWC_DSR_DSI(x) ((x) << 0) +#define DWC_DSR_DSC(x) ((x) << 20) + +/* Bitfields in CFG */ +#define DW_CFG_DMA_EN (1 << 0) + +#define DW_REGLEN 0x400 + +struct dw_dma_chan { + struct dma_chan chan; + void __iomem *ch_regs; + u8 mask; + + spinlock_t lock; + + /* these other elements are all protected by lock */ + dma_cookie_t completed; + struct list_head active_list; + struct list_head queue; + struct list_head free_list; + + struct dw_dma_slave *dws; + + unsigned int descs_allocated; +}; + +static inline struct dw_dma_chan_regs __iomem * +__dwc_regs(struct dw_dma_chan *dwc) +{ + return dwc->ch_regs; +} + +#define channel_readl(dwc, name) \ + __raw_readl(&(__dwc_regs(dwc)->name)) +#define channel_writel(dwc, name, val) \ + __raw_writel((val), &(__dwc_regs(dwc)->name)) + +static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan) +{ + return container_of(chan, struct dw_dma_chan, chan); +} + + +struct dw_dma { + struct dma_device dma; + void __iomem *regs; + struct tasklet_struct tasklet; + struct clk *clk; + + u8 all_chan_mask; + + struct dw_dma_chan chan[0]; +}; + +static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw) +{ + return dw->regs; +} + +#define dma_readl(dw, name) \ + __raw_readl(&(__dw_regs(dw)->name)) +#define dma_writel(dw, name, val) \ + __raw_writel((val), &(__dw_regs(dw)->name)) + +#define channel_set_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | (mask)) +#define channel_clear_bit(dw, reg, mask) \ + dma_writel(dw, reg, ((mask) << 8) | 0) + +static inline struct dw_dma *to_dw_dma(struct dma_device *ddev) +{ + return container_of(ddev, struct dw_dma, dma); +} + +/* LLI == Linked List Item; a.k.a. DMA block descriptor */ +struct dw_lli { + /* values that are not changed by hardware */ + dma_addr_t sar; + dma_addr_t dar; + dma_addr_t llp; /* chain to next lli */ + u32 ctllo; + /* values that may get written back: */ + u32 ctlhi; + /* sstat and dstat can snapshot peripheral register state. + * silicon config may discard either or both... + */ + u32 sstat; + u32 dstat; +}; + +struct dw_desc { + /* FIRST values the hardware uses */ + struct dw_lli lli; + + /* THEN values for driver housekeeping */ + struct list_head desc_node; + struct dma_async_tx_descriptor txd; + size_t len; +}; + +static inline struct dw_desc * +txd_to_dw_desc(struct dma_async_tx_descriptor *txd) +{ + return container_of(txd, struct dw_desc, txd); +} diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c new file mode 100644 index 0000000..0b95dcc --- /dev/null +++ b/drivers/dma/fsldma.c @@ -0,0 +1,1036 @@ +/* + * Freescale MPC85xx, MPC83xx DMA Engine support + * + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: + * Zhang Wei <wei.zhang@freescale.com>, Jul 2007 + * Ebony Zhu <ebony.zhu@freescale.com>, May 2007 + * + * Description: + * DMA engine driver for Freescale MPC8540 DMA controller, which is + * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc. + * The support for MPC8349 DMA contorller is also added. + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/dmapool.h> +#include <linux/of_platform.h> + +#include "fsldma.h" + +static void dma_init(struct fsl_dma_chan *fsl_chan) +{ + /* Reset the channel */ + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 0, 32); + + switch (fsl_chan->feature & FSL_DMA_IP_MASK) { + case FSL_DMA_IP_85XX: + /* Set the channel to below modes: + * EIE - Error interrupt enable + * EOSIE - End of segments interrupt enable (basic mode) + * EOLNIE - End of links interrupt enable + */ + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EIE + | FSL_DMA_MR_EOLNIE | FSL_DMA_MR_EOSIE, 32); + break; + case FSL_DMA_IP_83XX: + /* Set the channel to below modes: + * EOTIE - End-of-transfer interrupt enable + */ + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE, + 32); + break; + } + +} + +static void set_sr(struct fsl_dma_chan *fsl_chan, u32 val) +{ + DMA_OUT(fsl_chan, &fsl_chan->reg_base->sr, val, 32); +} + +static u32 get_sr(struct fsl_dma_chan *fsl_chan) +{ + return DMA_IN(fsl_chan, &fsl_chan->reg_base->sr, 32); +} + +static void set_desc_cnt(struct fsl_dma_chan *fsl_chan, + struct fsl_dma_ld_hw *hw, u32 count) +{ + hw->count = CPU_TO_DMA(fsl_chan, count, 32); +} + +static void set_desc_src(struct fsl_dma_chan *fsl_chan, + struct fsl_dma_ld_hw *hw, dma_addr_t src) +{ + u64 snoop_bits; + + snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0; + hw->src_addr = CPU_TO_DMA(fsl_chan, snoop_bits | src, 64); +} + +static void set_desc_dest(struct fsl_dma_chan *fsl_chan, + struct fsl_dma_ld_hw *hw, dma_addr_t dest) +{ + u64 snoop_bits; + + snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX) + ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0; + hw->dst_addr = CPU_TO_DMA(fsl_chan, snoop_bits | dest, 64); +} + +static void set_desc_next(struct fsl_dma_chan *fsl_chan, + struct fsl_dma_ld_hw *hw, dma_addr_t next) +{ + u64 snoop_bits; + + snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX) + ? FSL_DMA_SNEN : 0; + hw->next_ln_addr = CPU_TO_DMA(fsl_chan, snoop_bits | next, 64); +} + +static void set_cdar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr) +{ + DMA_OUT(fsl_chan, &fsl_chan->reg_base->cdar, addr | FSL_DMA_SNEN, 64); +} + +static dma_addr_t get_cdar(struct fsl_dma_chan *fsl_chan) +{ + return DMA_IN(fsl_chan, &fsl_chan->reg_base->cdar, 64) & ~FSL_DMA_SNEN; +} + +static void set_ndar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr) +{ + DMA_OUT(fsl_chan, &fsl_chan->reg_base->ndar, addr, 64); +} + +static dma_addr_t get_ndar(struct fsl_dma_chan *fsl_chan) +{ + return DMA_IN(fsl_chan, &fsl_chan->reg_base->ndar, 64); +} + +static u32 get_bcr(struct fsl_dma_chan *fsl_chan) +{ + return DMA_IN(fsl_chan, &fsl_chan->reg_base->bcr, 32); +} + +static int dma_is_idle(struct fsl_dma_chan *fsl_chan) +{ + u32 sr = get_sr(fsl_chan); + return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH); +} + +static void dma_start(struct fsl_dma_chan *fsl_chan) +{ + u32 mr_set = 0;; + + if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) { + DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32); + mr_set |= FSL_DMA_MR_EMP_EN; + } else + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) + & ~FSL_DMA_MR_EMP_EN, 32); + + if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT) + mr_set |= FSL_DMA_MR_EMS_EN; + else + mr_set |= FSL_DMA_MR_CS; + + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) + | mr_set, 32); +} + +static void dma_halt(struct fsl_dma_chan *fsl_chan) +{ + int i = 0; + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | FSL_DMA_MR_CA, + 32); + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & ~(FSL_DMA_MR_CS + | FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA), 32); + + while (!dma_is_idle(fsl_chan) && (i++ < 100)) + udelay(10); + if (i >= 100 && !dma_is_idle(fsl_chan)) + dev_err(fsl_chan->dev, "DMA halt timeout!\n"); +} + +static void set_ld_eol(struct fsl_dma_chan *fsl_chan, + struct fsl_desc_sw *desc) +{ + desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan, + DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL, + 64); +} + +static void append_ld_queue(struct fsl_dma_chan *fsl_chan, + struct fsl_desc_sw *new_desc) +{ + struct fsl_desc_sw *queue_tail = to_fsl_desc(fsl_chan->ld_queue.prev); + + if (list_empty(&fsl_chan->ld_queue)) + return; + + /* Link to the new descriptor physical address and + * Enable End-of-segment interrupt for + * the last link descriptor. + * (the previous node's next link descriptor) + * + * For FSL_DMA_IP_83xx, the snoop enable bit need be set. + */ + queue_tail->hw.next_ln_addr = CPU_TO_DMA(fsl_chan, + new_desc->async_tx.phys | FSL_DMA_EOSIE | + (((fsl_chan->feature & FSL_DMA_IP_MASK) + == FSL_DMA_IP_83XX) ? FSL_DMA_SNEN : 0), 64); +} + +/** + * fsl_chan_set_src_loop_size - Set source address hold transfer size + * @fsl_chan : Freescale DMA channel + * @size : Address loop size, 0 for disable loop + * + * The set source address hold transfer size. The source + * address hold or loop transfer size is when the DMA transfer + * data from source address (SA), if the loop size is 4, the DMA will + * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA, + * SA + 1 ... and so on. + */ +static void fsl_chan_set_src_loop_size(struct fsl_dma_chan *fsl_chan, int size) +{ + switch (size) { + case 0: + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & + (~FSL_DMA_MR_SAHE), 32); + break; + case 1: + case 2: + case 4: + case 8: + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | + FSL_DMA_MR_SAHE | (__ilog2(size) << 14), + 32); + break; + } +} + +/** + * fsl_chan_set_dest_loop_size - Set destination address hold transfer size + * @fsl_chan : Freescale DMA channel + * @size : Address loop size, 0 for disable loop + * + * The set destination address hold transfer size. The destination + * address hold or loop transfer size is when the DMA transfer + * data to destination address (TA), if the loop size is 4, the DMA will + * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA, + * TA + 1 ... and so on. + */ +static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size) +{ + switch (size) { + case 0: + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & + (~FSL_DMA_MR_DAHE), 32); + break; + case 1: + case 2: + case 4: + case 8: + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | + FSL_DMA_MR_DAHE | (__ilog2(size) << 16), + 32); + break; + } +} + +/** + * fsl_chan_toggle_ext_pause - Toggle channel external pause status + * @fsl_chan : Freescale DMA channel + * @size : Pause control size, 0 for disable external pause control. + * The maximum is 1024. + * + * The Freescale DMA channel can be controlled by the external + * signal DREQ#. The pause control size is how many bytes are allowed + * to transfer before pausing the channel, after which a new assertion + * of DREQ# resumes channel operation. + */ +static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size) +{ + if (size > 1024) + return; + + if (size) { + DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, + DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) + | ((__ilog2(size) << 24) & 0x0f000000), + 32); + fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT; + } else + fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT; +} + +/** + * fsl_chan_toggle_ext_start - Toggle channel external start status + * @fsl_chan : Freescale DMA channel + * @enable : 0 is disabled, 1 is enabled. + * + * If enable the external start, the channel can be started by an + * external DMA start pin. So the dma_start() does not start the + * transfer immediately. The DMA channel will wait for the + * control pin asserted. + */ +static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable) +{ + if (enable) + fsl_chan->feature |= FSL_DMA_CHAN_START_EXT; + else + fsl_chan->feature &= ~FSL_DMA_CHAN_START_EXT; +} + +static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct fsl_desc_sw *desc = tx_to_fsl_desc(tx); + struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan); + unsigned long flags; + dma_cookie_t cookie; + + /* cookie increment and adding to ld_queue must be atomic */ + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + + cookie = fsl_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + desc->async_tx.cookie = cookie; + fsl_chan->common.cookie = desc->async_tx.cookie; + + append_ld_queue(fsl_chan, desc); + list_splice_init(&desc->async_tx.tx_list, fsl_chan->ld_queue.prev); + + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); + + return cookie; +} + +/** + * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool. + * @fsl_chan : Freescale DMA channel + * + * Return - The descriptor allocated. NULL for failed. + */ +static struct fsl_desc_sw *fsl_dma_alloc_descriptor( + struct fsl_dma_chan *fsl_chan) +{ + dma_addr_t pdesc; + struct fsl_desc_sw *desc_sw; + + desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc); + if (desc_sw) { + memset(desc_sw, 0, sizeof(struct fsl_desc_sw)); + dma_async_tx_descriptor_init(&desc_sw->async_tx, + &fsl_chan->common); + desc_sw->async_tx.tx_submit = fsl_dma_tx_submit; + INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); + desc_sw->async_tx.phys = pdesc; + } + + return desc_sw; +} + + +/** + * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel. + * @fsl_chan : Freescale DMA channel + * + * This function will create a dma pool for descriptor allocation. + * + * Return - The number of descriptors allocated. + */ +static int fsl_dma_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); + + /* Has this channel already been allocated? */ + if (fsl_chan->desc_pool) + return 1; + + /* We need the descriptor to be aligned to 32bytes + * for meeting FSL DMA specification requirement. + */ + fsl_chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool", + fsl_chan->dev, sizeof(struct fsl_desc_sw), + 32, 0); + if (!fsl_chan->desc_pool) { + dev_err(fsl_chan->dev, "No memory for channel %d " + "descriptor dma pool.\n", fsl_chan->id); + return 0; + } + + return 1; +} + +/** + * fsl_dma_free_chan_resources - Free all resources of the channel. + * @fsl_chan : Freescale DMA channel + */ +static void fsl_dma_free_chan_resources(struct dma_chan *chan) +{ + struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); + struct fsl_desc_sw *desc, *_desc; + unsigned long flags; + + dev_dbg(fsl_chan->dev, "Free all channel resources.\n"); + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) { +#ifdef FSL_DMA_LD_DEBUG + dev_dbg(fsl_chan->dev, + "LD %p will be released.\n", desc); +#endif + list_del(&desc->node); + /* free link descriptor */ + dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys); + } + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); + dma_pool_destroy(fsl_chan->desc_pool); + + fsl_chan->desc_pool = NULL; +} + +static struct dma_async_tx_descriptor * +fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *new; + + if (!chan) + return NULL; + + fsl_chan = to_fsl_chan(chan); + + new = fsl_dma_alloc_descriptor(fsl_chan); + if (!new) { + dev_err(fsl_chan->dev, "No free memory for link descriptor\n"); + return NULL; + } + + new->async_tx.cookie = -EBUSY; + new->async_tx.flags = flags; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &new->async_tx.tx_list); + + /* Set End-of-link to the last link descriptor of new list*/ + set_ld_eol(fsl_chan, new); + + return &new->async_tx; +} + +static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy( + struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src, + size_t len, unsigned long flags) +{ + struct fsl_dma_chan *fsl_chan; + struct fsl_desc_sw *first = NULL, *prev = NULL, *new; + size_t copy; + LIST_HEAD(link_chain); + + if (!chan) + return NULL; + + if (!len) + return NULL; + + fsl_chan = to_fsl_chan(chan); + + do { + + /* Allocate the link descriptor from DMA pool */ + new = fsl_dma_alloc_descriptor(fsl_chan); + if (!new) { + dev_err(fsl_chan->dev, + "No free memory for link descriptor\n"); + return NULL; + } +#ifdef FSL_DMA_LD_DEBUG + dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new); +#endif + + copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT); + + set_desc_cnt(fsl_chan, &new->hw, copy); + set_desc_src(fsl_chan, &new->hw, dma_src); + set_desc_dest(fsl_chan, &new->hw, dma_dest); + + if (!first) + first = new; + else + set_desc_next(fsl_chan, &prev->hw, new->async_tx.phys); + + new->async_tx.cookie = 0; + async_tx_ack(&new->async_tx); + + prev = new; + len -= copy; + dma_src += copy; + dma_dest += copy; + + /* Insert the link descriptor to the LD ring */ + list_add_tail(&new->node, &first->async_tx.tx_list); + } while (len); + + new->async_tx.flags = flags; /* client is in control of this ack */ + new->async_tx.cookie = -EBUSY; + + /* Set End-of-link to the last link descriptor of new list*/ + set_ld_eol(fsl_chan, new); + + return first ? &first->async_tx : NULL; +} + +/** + * fsl_dma_update_completed_cookie - Update the completed cookie. + * @fsl_chan : Freescale DMA channel + */ +static void fsl_dma_update_completed_cookie(struct fsl_dma_chan *fsl_chan) +{ + struct fsl_desc_sw *cur_desc, *desc; + dma_addr_t ld_phy; + + ld_phy = get_cdar(fsl_chan) & FSL_DMA_NLDA_MASK; + + if (ld_phy) { + cur_desc = NULL; + list_for_each_entry(desc, &fsl_chan->ld_queue, node) + if (desc->async_tx.phys == ld_phy) { + cur_desc = desc; + break; + } + + if (cur_desc && cur_desc->async_tx.cookie) { + if (dma_is_idle(fsl_chan)) + fsl_chan->completed_cookie = + cur_desc->async_tx.cookie; + else + fsl_chan->completed_cookie = + cur_desc->async_tx.cookie - 1; + } + } +} + +/** + * fsl_chan_ld_cleanup - Clean up link descriptors + * @fsl_chan : Freescale DMA channel + * + * This function clean up the ld_queue of DMA channel. + * If 'in_intr' is set, the function will move the link descriptor to + * the recycle list. Otherwise, free it directly. + */ +static void fsl_chan_ld_cleanup(struct fsl_dma_chan *fsl_chan) +{ + struct fsl_desc_sw *desc, *_desc; + unsigned long flags; + + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + + dev_dbg(fsl_chan->dev, "chan completed_cookie = %d\n", + fsl_chan->completed_cookie); + list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) { + dma_async_tx_callback callback; + void *callback_param; + + if (dma_async_is_complete(desc->async_tx.cookie, + fsl_chan->completed_cookie, fsl_chan->common.cookie) + == DMA_IN_PROGRESS) + break; + + callback = desc->async_tx.callback; + callback_param = desc->async_tx.callback_param; + + /* Remove from ld_queue list */ + list_del(&desc->node); + + dev_dbg(fsl_chan->dev, "link descriptor %p will be recycle.\n", + desc); + dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys); + + /* Run the link descriptor callback function */ + if (callback) { + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); + dev_dbg(fsl_chan->dev, "link descriptor %p callback\n", + desc); + callback(callback_param); + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + } + } + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); +} + +/** + * fsl_chan_xfer_ld_queue - Transfer link descriptors in channel ld_queue. + * @fsl_chan : Freescale DMA channel + */ +static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan) +{ + struct list_head *ld_node; + dma_addr_t next_dest_addr; + unsigned long flags; + + if (!dma_is_idle(fsl_chan)) + return; + + dma_halt(fsl_chan); + + /* If there are some link descriptors + * not transfered in queue. We need to start it. + */ + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + + /* Find the first un-transfer desciptor */ + for (ld_node = fsl_chan->ld_queue.next; + (ld_node != &fsl_chan->ld_queue) + && (dma_async_is_complete( + to_fsl_desc(ld_node)->async_tx.cookie, + fsl_chan->completed_cookie, + fsl_chan->common.cookie) == DMA_SUCCESS); + ld_node = ld_node->next); + + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); + + if (ld_node != &fsl_chan->ld_queue) { + /* Get the ld start address from ld_queue */ + next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys; + dev_dbg(fsl_chan->dev, "xfer LDs staring from %p\n", + (void *)next_dest_addr); + set_cdar(fsl_chan, next_dest_addr); + dma_start(fsl_chan); + } else { + set_cdar(fsl_chan, 0); + set_ndar(fsl_chan, 0); + } +} + +/** + * fsl_dma_memcpy_issue_pending - Issue the DMA start command + * @fsl_chan : Freescale DMA channel + */ +static void fsl_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); + +#ifdef FSL_DMA_LD_DEBUG + struct fsl_desc_sw *ld; + unsigned long flags; + + spin_lock_irqsave(&fsl_chan->desc_lock, flags); + if (list_empty(&fsl_chan->ld_queue)) { + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); + return; + } + + dev_dbg(fsl_chan->dev, "--memcpy issue--\n"); + list_for_each_entry(ld, &fsl_chan->ld_queue, node) { + int i; + dev_dbg(fsl_chan->dev, "Ch %d, LD %08x\n", + fsl_chan->id, ld->async_tx.phys); + for (i = 0; i < 8; i++) + dev_dbg(fsl_chan->dev, "LD offset %d: %08x\n", + i, *(((u32 *)&ld->hw) + i)); + } + dev_dbg(fsl_chan->dev, "----------------\n"); + spin_unlock_irqrestore(&fsl_chan->desc_lock, flags); +#endif + + fsl_chan_xfer_ld_queue(fsl_chan); +} + +/** + * fsl_dma_is_complete - Determine the DMA status + * @fsl_chan : Freescale DMA channel + */ +static enum dma_status fsl_dma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + + fsl_chan_ld_cleanup(fsl_chan); + + last_used = chan->cookie; + last_complete = fsl_chan->completed_cookie; + + if (done) + *done = last_complete; + + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data) +{ + struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data; + u32 stat; + int update_cookie = 0; + int xfer_ld_q = 0; + + stat = get_sr(fsl_chan); + dev_dbg(fsl_chan->dev, "event: channel %d, stat = 0x%x\n", + fsl_chan->id, stat); + set_sr(fsl_chan, stat); /* Clear the event register */ + + stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH); + if (!stat) + return IRQ_NONE; + + if (stat & FSL_DMA_SR_TE) + dev_err(fsl_chan->dev, "Transfer Error!\n"); + + /* Programming Error + * The DMA_INTERRUPT async_tx is a NULL transfer, which will + * triger a PE interrupt. + */ + if (stat & FSL_DMA_SR_PE) { + dev_dbg(fsl_chan->dev, "event: Programming Error INT\n"); + if (get_bcr(fsl_chan) == 0) { + /* BCR register is 0, this is a DMA_INTERRUPT async_tx. + * Now, update the completed cookie, and continue the + * next uncompleted transfer. + */ + update_cookie = 1; + xfer_ld_q = 1; + } + stat &= ~FSL_DMA_SR_PE; + } + + /* If the link descriptor segment transfer finishes, + * we will recycle the used descriptor. + */ + if (stat & FSL_DMA_SR_EOSI) { + dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n"); + dev_dbg(fsl_chan->dev, "event: clndar %p, nlndar %p\n", + (void *)get_cdar(fsl_chan), (void *)get_ndar(fsl_chan)); + stat &= ~FSL_DMA_SR_EOSI; + update_cookie = 1; + } + + /* For MPC8349, EOCDI event need to update cookie + * and start the next transfer if it exist. + */ + if (stat & FSL_DMA_SR_EOCDI) { + dev_dbg(fsl_chan->dev, "event: End-of-Chain link INT\n"); + stat &= ~FSL_DMA_SR_EOCDI; + update_cookie = 1; + xfer_ld_q = 1; + } + + /* If it current transfer is the end-of-transfer, + * we should clear the Channel Start bit for + * prepare next transfer. + */ + if (stat & FSL_DMA_SR_EOLNI) { + dev_dbg(fsl_chan->dev, "event: End-of-link INT\n"); + stat &= ~FSL_DMA_SR_EOLNI; + xfer_ld_q = 1; + } + + if (update_cookie) + fsl_dma_update_completed_cookie(fsl_chan); + if (xfer_ld_q) + fsl_chan_xfer_ld_queue(fsl_chan); + if (stat) + dev_dbg(fsl_chan->dev, "event: unhandled sr 0x%02x\n", + stat); + + dev_dbg(fsl_chan->dev, "event: Exit\n"); + tasklet_schedule(&fsl_chan->tasklet); + return IRQ_HANDLED; +} + +static irqreturn_t fsl_dma_do_interrupt(int irq, void *data) +{ + struct fsl_dma_device *fdev = (struct fsl_dma_device *)data; + u32 gsr; + int ch_nr; + + gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->reg_base) + : in_le32(fdev->reg_base); + ch_nr = (32 - ffs(gsr)) / 8; + + return fdev->chan[ch_nr] ? fsl_dma_chan_do_interrupt(irq, + fdev->chan[ch_nr]) : IRQ_NONE; +} + +static void dma_do_tasklet(unsigned long data) +{ + struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data; + fsl_chan_ld_cleanup(fsl_chan); +} + +static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev, + struct device_node *node, u32 feature, const char *compatible) +{ + struct fsl_dma_chan *new_fsl_chan; + int err; + + /* alloc channel */ + new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL); + if (!new_fsl_chan) { + dev_err(fdev->dev, "No free memory for allocating " + "dma channels!\n"); + return -ENOMEM; + } + + /* get dma channel register base */ + err = of_address_to_resource(node, 0, &new_fsl_chan->reg); + if (err) { + dev_err(fdev->dev, "Can't get %s property 'reg'\n", + node->full_name); + goto err_no_reg; + } + + new_fsl_chan->feature = feature; + + if (!fdev->feature) + fdev->feature = new_fsl_chan->feature; + + /* If the DMA device's feature is different than its channels', + * report the bug. + */ + WARN_ON(fdev->feature != new_fsl_chan->feature); + + new_fsl_chan->dev = &new_fsl_chan->common.dev; + new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start, + new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1); + + new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7; + if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) { + dev_err(fdev->dev, "There is no %d channel!\n", + new_fsl_chan->id); + err = -EINVAL; + goto err_no_chan; + } + fdev->chan[new_fsl_chan->id] = new_fsl_chan; + tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet, + (unsigned long)new_fsl_chan); + + /* Init the channel */ + dma_init(new_fsl_chan); + + /* Clear cdar registers */ + set_cdar(new_fsl_chan, 0); + + switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) { + case FSL_DMA_IP_85XX: + new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start; + new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause; + case FSL_DMA_IP_83XX: + new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size; + new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size; + } + + spin_lock_init(&new_fsl_chan->desc_lock); + INIT_LIST_HEAD(&new_fsl_chan->ld_queue); + + new_fsl_chan->common.device = &fdev->common; + + /* Add the channel to DMA device channel list */ + list_add_tail(&new_fsl_chan->common.device_node, + &fdev->common.channels); + fdev->common.chancnt++; + + new_fsl_chan->irq = irq_of_parse_and_map(node, 0); + if (new_fsl_chan->irq != NO_IRQ) { + err = request_irq(new_fsl_chan->irq, + &fsl_dma_chan_do_interrupt, IRQF_SHARED, + "fsldma-channel", new_fsl_chan); + if (err) { + dev_err(fdev->dev, "DMA channel %s request_irq error " + "with return %d\n", node->full_name, err); + goto err_no_irq; + } + } + + dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id, + compatible, new_fsl_chan->irq); + + return 0; + +err_no_irq: + list_del(&new_fsl_chan->common.device_node); +err_no_chan: + iounmap(new_fsl_chan->reg_base); +err_no_reg: + kfree(new_fsl_chan); + return err; +} + +static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan) +{ + free_irq(fchan->irq, fchan); + list_del(&fchan->common.device_node); + iounmap(fchan->reg_base); + kfree(fchan); +} + +static int __devinit of_fsl_dma_probe(struct of_device *dev, + const struct of_device_id *match) +{ + int err; + struct fsl_dma_device *fdev; + struct device_node *child; + + fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL); + if (!fdev) { + dev_err(&dev->dev, "No enough memory for 'priv'\n"); + return -ENOMEM; + } + fdev->dev = &dev->dev; + INIT_LIST_HEAD(&fdev->common.channels); + + /* get DMA controller register base */ + err = of_address_to_resource(dev->node, 0, &fdev->reg); + if (err) { + dev_err(&dev->dev, "Can't get %s property 'reg'\n", + dev->node->full_name); + goto err_no_reg; + } + + dev_info(&dev->dev, "Probe the Freescale DMA driver for %s " + "controller at %p...\n", + match->compatible, (void *)fdev->reg.start); + fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end + - fdev->reg.start + 1); + + dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask); + dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask); + fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources; + fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources; + fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt; + fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy; + fdev->common.device_is_tx_complete = fsl_dma_is_complete; + fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending; + fdev->common.dev = &dev->dev; + + fdev->irq = irq_of_parse_and_map(dev->node, 0); + if (fdev->irq != NO_IRQ) { + err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED, + "fsldma-device", fdev); + if (err) { + dev_err(&dev->dev, "DMA device request_irq error " + "with return %d\n", err); + goto err; + } + } + + dev_set_drvdata(&(dev->dev), fdev); + + /* We cannot use of_platform_bus_probe() because there is no + * of_platform_bus_remove. Instead, we manually instantiate every DMA + * channel object. + */ + for_each_child_of_node(dev->node, child) { + if (of_device_is_compatible(child, "fsl,eloplus-dma-channel")) + fsl_dma_chan_probe(fdev, child, + FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN, + "fsl,eloplus-dma-channel"); + if (of_device_is_compatible(child, "fsl,elo-dma-channel")) + fsl_dma_chan_probe(fdev, child, + FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN, + "fsl,elo-dma-channel"); + } + + dma_async_device_register(&fdev->common); + return 0; + +err: + iounmap(fdev->reg_base); +err_no_reg: + kfree(fdev); + return err; +} + +static int of_fsl_dma_remove(struct of_device *of_dev) +{ + struct fsl_dma_device *fdev; + unsigned int i; + + fdev = dev_get_drvdata(&of_dev->dev); + + dma_async_device_unregister(&fdev->common); + + for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) + if (fdev->chan[i]) + fsl_dma_chan_remove(fdev->chan[i]); + + if (fdev->irq != NO_IRQ) + free_irq(fdev->irq, fdev); + + iounmap(fdev->reg_base); + + kfree(fdev); + dev_set_drvdata(&of_dev->dev, NULL); + + return 0; +} + +static struct of_device_id of_fsl_dma_ids[] = { + { .compatible = "fsl,eloplus-dma", }, + { .compatible = "fsl,elo-dma", }, + {} +}; + +static struct of_platform_driver of_fsl_dma_driver = { + .name = "fsl-elo-dma", + .match_table = of_fsl_dma_ids, + .probe = of_fsl_dma_probe, + .remove = of_fsl_dma_remove, +}; + +static __init int of_fsl_dma_init(void) +{ + int ret; + + pr_info("Freescale Elo / Elo Plus DMA driver\n"); + + ret = of_register_platform_driver(&of_fsl_dma_driver); + if (ret) + pr_err("fsldma: failed to register platform driver\n"); + + return ret; +} + +static void __exit of_fsl_dma_exit(void) +{ + of_unregister_platform_driver(&of_fsl_dma_driver); +} + +subsys_initcall(of_fsl_dma_init); +module_exit(of_fsl_dma_exit); + +MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h new file mode 100644 index 0000000..4f21a51 --- /dev/null +++ b/drivers/dma/fsldma.h @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved. + * + * Author: + * Zhang Wei <wei.zhang@freescale.com>, Jul 2007 + * Ebony Zhu <ebony.zhu@freescale.com>, May 2007 + * + * This is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + */ +#ifndef __DMA_FSLDMA_H +#define __DMA_FSLDMA_H + +#include <linux/device.h> +#include <linux/dmapool.h> +#include <linux/dmaengine.h> + +/* Define data structures needed by Freescale + * MPC8540 and MPC8349 DMA controller. + */ +#define FSL_DMA_MR_CS 0x00000001 +#define FSL_DMA_MR_CC 0x00000002 +#define FSL_DMA_MR_CA 0x00000008 +#define FSL_DMA_MR_EIE 0x00000040 +#define FSL_DMA_MR_XFE 0x00000020 +#define FSL_DMA_MR_EOLNIE 0x00000100 +#define FSL_DMA_MR_EOLSIE 0x00000080 +#define FSL_DMA_MR_EOSIE 0x00000200 +#define FSL_DMA_MR_CDSM 0x00000010 +#define FSL_DMA_MR_CTM 0x00000004 +#define FSL_DMA_MR_EMP_EN 0x00200000 +#define FSL_DMA_MR_EMS_EN 0x00040000 +#define FSL_DMA_MR_DAHE 0x00002000 +#define FSL_DMA_MR_SAHE 0x00001000 + +/* Special MR definition for MPC8349 */ +#define FSL_DMA_MR_EOTIE 0x00000080 + +#define FSL_DMA_SR_CH 0x00000020 +#define FSL_DMA_SR_PE 0x00000010 +#define FSL_DMA_SR_CB 0x00000004 +#define FSL_DMA_SR_TE 0x00000080 +#define FSL_DMA_SR_EOSI 0x00000002 +#define FSL_DMA_SR_EOLSI 0x00000001 +#define FSL_DMA_SR_EOCDI 0x00000001 +#define FSL_DMA_SR_EOLNI 0x00000008 + +#define FSL_DMA_SATR_SBPATMU 0x20000000 +#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000 +#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000 +#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000 +#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000 +#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000 + +#define FSL_DMA_DATR_DBPATMU 0x20000000 +#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000 +#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000 +#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000 + +#define FSL_DMA_EOL ((u64)0x1) +#define FSL_DMA_SNEN ((u64)0x10) +#define FSL_DMA_EOSIE 0x8 +#define FSL_DMA_NLDA_MASK (~(u64)0x1f) + +#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu + +#define FSL_DMA_DGSR_TE 0x80 +#define FSL_DMA_DGSR_CH 0x20 +#define FSL_DMA_DGSR_PE 0x10 +#define FSL_DMA_DGSR_EOLNI 0x08 +#define FSL_DMA_DGSR_CB 0x04 +#define FSL_DMA_DGSR_EOSI 0x02 +#define FSL_DMA_DGSR_EOLSI 0x01 + +typedef u64 __bitwise v64; +typedef u32 __bitwise v32; + +struct fsl_dma_ld_hw { + v64 src_addr; + v64 dst_addr; + v64 next_ln_addr; + v32 count; + v32 reserve; +} __attribute__((aligned(32))); + +struct fsl_desc_sw { + struct fsl_dma_ld_hw hw; + struct list_head node; + struct dma_async_tx_descriptor async_tx; + struct list_head *ld; + void *priv; +} __attribute__((aligned(32))); + +struct fsl_dma_chan_regs { + u32 mr; /* 0x00 - Mode Register */ + u32 sr; /* 0x04 - Status Register */ + u64 cdar; /* 0x08 - Current descriptor address register */ + u64 sar; /* 0x10 - Source Address Register */ + u64 dar; /* 0x18 - Destination Address Register */ + u32 bcr; /* 0x20 - Byte Count Register */ + u64 ndar; /* 0x24 - Next Descriptor Address Register */ +}; + +struct fsl_dma_chan; +#define FSL_DMA_MAX_CHANS_PER_DEVICE 4 + +struct fsl_dma_device { + void __iomem *reg_base; /* DGSR register base */ + struct resource reg; /* Resource for register */ + struct device *dev; + struct dma_device common; + struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE]; + u32 feature; /* The same as DMA channels */ + int irq; /* Channel IRQ */ +}; + +/* Define macros for fsl_dma_chan->feature property */ +#define FSL_DMA_LITTLE_ENDIAN 0x00000000 +#define FSL_DMA_BIG_ENDIAN 0x00000001 + +#define FSL_DMA_IP_MASK 0x00000ff0 +#define FSL_DMA_IP_85XX 0x00000010 +#define FSL_DMA_IP_83XX 0x00000020 + +#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000 +#define FSL_DMA_CHAN_START_EXT 0x00002000 + +struct fsl_dma_chan { + struct fsl_dma_chan_regs __iomem *reg_base; + dma_cookie_t completed_cookie; /* The maximum cookie completed */ + spinlock_t desc_lock; /* Descriptor operation lock */ + struct list_head ld_queue; /* Link descriptors queue */ + struct dma_chan common; /* DMA common channel */ + struct dma_pool *desc_pool; /* Descriptors pool */ + struct device *dev; /* Channel device */ + struct resource reg; /* Resource for register */ + int irq; /* Channel IRQ */ + int id; /* Raw id of this channel */ + struct tasklet_struct tasklet; + u32 feature; + + void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size); + void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable); + void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size); + void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size); +}; + +#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common) +#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node) +#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx) + +#ifndef __powerpc64__ +static u64 in_be64(const u64 __iomem *addr) +{ + return ((u64)in_be32((u32 __iomem *)addr) << 32) | + (in_be32((u32 __iomem *)addr + 1)); +} + +static void out_be64(u64 __iomem *addr, u64 val) +{ + out_be32((u32 __iomem *)addr, val >> 32); + out_be32((u32 __iomem *)addr + 1, (u32)val); +} + +/* There is no asm instructions for 64 bits reverse loads and stores */ +static u64 in_le64(const u64 __iomem *addr) +{ + return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) | + (in_le32((u32 __iomem *)addr)); +} + +static void out_le64(u64 __iomem *addr, u64 val) +{ + out_le32((u32 __iomem *)addr + 1, val >> 32); + out_le32((u32 __iomem *)addr, (u32)val); +} +#endif + +#define DMA_IN(fsl_chan, addr, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + in_be##width(addr) : in_le##width(addr)) +#define DMA_OUT(fsl_chan, addr, val, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + out_be##width(addr, val) : out_le##width(addr, val)) + +#define DMA_TO_CPU(fsl_chan, d, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + be##width##_to_cpu((__force __be##width)(v##width)d) : \ + le##width##_to_cpu((__force __le##width)(v##width)d)) +#define CPU_TO_DMA(fsl_chan, c, width) \ + (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \ + (__force v##width)cpu_to_be##width(c) : \ + (__force v##width)cpu_to_le##width(c)) + +#endif /* __DMA_FSLDMA_H */ diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c new file mode 100644 index 0000000..9b16a3a --- /dev/null +++ b/drivers/dma/ioat.c @@ -0,0 +1,226 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dca.h> +#include "ioatdma.h" +#include "ioatdma_registers.h" +#include "ioatdma_hw.h" + +MODULE_VERSION(IOAT_DMA_VERSION); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); + +static struct pci_device_id ioat_pci_tbl[] = { + /* I/OAT v1 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) }, + { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) }, + + /* I/OAT v2 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) }, + + /* I/OAT v3 platforms */ + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) }, + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) }, + { 0, } +}; + +struct ioat_device { + struct pci_dev *pdev; + void __iomem *iobase; + struct ioatdma_device *dma; + struct dca_provider *dca; +}; + +static int __devinit ioat_probe(struct pci_dev *pdev, + const struct pci_device_id *id); +static void __devexit ioat_remove(struct pci_dev *pdev); + +static int ioat_dca_enabled = 1; +module_param(ioat_dca_enabled, int, 0644); +MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)"); + +static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase) +{ + struct ioat_device *device = pci_get_drvdata(pdev); + u8 version; + int err = 0; + + version = readb(iobase + IOAT_VER_OFFSET); + switch (version) { + case IOAT_VER_1_2: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat_dca_init(pdev, iobase); + break; + case IOAT_VER_2_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat2_dca_init(pdev, iobase); + break; + case IOAT_VER_3_0: + device->dma = ioat_dma_probe(pdev, iobase); + if (device->dma && ioat_dca_enabled) + device->dca = ioat3_dca_init(pdev, iobase); + break; + default: + err = -ENODEV; + break; + } + if (!device->dma) + err = -ENODEV; + return err; +} + +static void ioat_shutdown_functionality(struct pci_dev *pdev) +{ + struct ioat_device *device = pci_get_drvdata(pdev); + + dev_err(&pdev->dev, "Removing dma and dca services\n"); + if (device->dca) { + unregister_dca_provider(device->dca); + free_dca_provider(device->dca); + device->dca = NULL; + } + + if (device->dma) { + ioat_dma_remove(device->dma); + device->dma = NULL; + } +} + +static struct pci_driver ioat_pci_driver = { + .name = "ioatdma", + .id_table = ioat_pci_tbl, + .probe = ioat_probe, + .shutdown = ioat_shutdown_functionality, + .remove = __devexit_p(ioat_remove), +}; + +static int __devinit ioat_probe(struct pci_dev *pdev, + const struct pci_device_id *id) +{ + void __iomem *iobase; + struct ioat_device *device; + unsigned long mmio_start, mmio_len; + int err; + + err = pci_enable_device(pdev); + if (err) + goto err_enable_device; + + err = pci_request_regions(pdev, ioat_pci_driver.name); + if (err) + goto err_request_regions; + + err = pci_set_dma_mask(pdev, DMA_64BIT_MASK); + if (err) + err = pci_set_dma_mask(pdev, DMA_32BIT_MASK); + if (err) + goto err_set_dma_mask; + + err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK); + if (err) + err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK); + if (err) + goto err_set_dma_mask; + + mmio_start = pci_resource_start(pdev, 0); + mmio_len = pci_resource_len(pdev, 0); + iobase = ioremap(mmio_start, mmio_len); + if (!iobase) { + err = -ENOMEM; + goto err_ioremap; + } + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + err = -ENOMEM; + goto err_kzalloc; + } + device->pdev = pdev; + pci_set_drvdata(pdev, device); + device->iobase = iobase; + + pci_set_master(pdev); + + err = ioat_setup_functionality(pdev, iobase); + if (err) + goto err_version; + + return 0; + +err_version: + kfree(device); +err_kzalloc: + iounmap(iobase); +err_ioremap: +err_set_dma_mask: + pci_release_regions(pdev); + pci_disable_device(pdev); +err_request_regions: +err_enable_device: + return err; +} + +/* + * It is unsafe to remove this module: if removed while a requested + * dma is outstanding, esp. from tcp, it is possible to hang while + * waiting for something that will never finish. However, if you're + * feeling lucky, this usually works just fine. + */ +static void __devexit ioat_remove(struct pci_dev *pdev) +{ + struct ioat_device *device = pci_get_drvdata(pdev); + + ioat_shutdown_functionality(pdev); + + kfree(device); +} + +static int __init ioat_init_module(void) +{ + return pci_register_driver(&ioat_pci_driver); +} +module_init(ioat_init_module); + +static void __exit ioat_exit_module(void) +{ + pci_unregister_driver(&ioat_pci_driver); +} +module_exit(ioat_exit_module); diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c new file mode 100644 index 0000000..6cf622d --- /dev/null +++ b/drivers/dma/ioat_dca.c @@ -0,0 +1,657 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2007 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/smp.h> +#include <linux/interrupt.h> +#include <linux/dca.h> + +/* either a kernel change is needed, or we need something like this in kernel */ +#ifndef CONFIG_SMP +#include <asm/smp.h> +#undef cpu_physical_id +#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24) +#endif + +#include "ioatdma.h" +#include "ioatdma_registers.h" + +/* + * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6 + * contain the bit number of the APIC ID to map into the DCA tag. If the valid + * bit is not set, then the value must be 0 or 1 and defines the bit in the tag. + */ +#define DCA_TAG_MAP_VALID 0x80 + +#define DCA3_TAG_MAP_BIT_TO_INV 0x80 +#define DCA3_TAG_MAP_BIT_TO_SEL 0x40 +#define DCA3_TAG_MAP_LITERAL_VAL 0x1 + +#define DCA_TAG_MAP_MASK 0xDF + +/* + * "Legacy" DCA systems do not implement the DCA register set in the + * I/OAT device. Software needs direct support for their tag mappings. + */ + +#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x)) +#define IOAT_TAG_MAP_LEN 8 + +static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; +static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), }; +static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = { + 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), }; +static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 }; + +/* pack PCI B/D/F into a u16 */ +static inline u16 dcaid_from_pcidev(struct pci_dev *pci) +{ + return (pci->bus->number << 8) | pci->devfn; +} + +static int dca_enabled_in_bios(struct pci_dev *pdev) +{ + /* CPUID level 9 returns DCA configuration */ + /* Bit 0 indicates DCA enabled by the BIOS */ + unsigned long cpuid_level_9; + int res; + + cpuid_level_9 = cpuid_eax(9); + res = test_bit(0, &cpuid_level_9); + if (!res) + dev_err(&pdev->dev, "DCA is disabled in BIOS\n"); + + return res; +} + +static int system_has_dca_enabled(struct pci_dev *pdev) +{ + if (boot_cpu_has(X86_FEATURE_DCA)) + return dca_enabled_in_bios(pdev); + + dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n"); + return 0; +} + +struct ioat_dca_slot { + struct pci_dev *pdev; /* requester device */ + u16 rid; /* requester id, as used by IOAT */ +}; + +#define IOAT_DCA_MAX_REQ 6 +#define IOAT3_DCA_MAX_REQ 2 + +struct ioat_dca_priv { + void __iomem *iobase; + void __iomem *dca_base; + int max_requesters; + int requester_count; + u8 tag_map[IOAT_TAG_MAP_LEN]; + struct ioat_dca_slot req_slots[0]; +}; + +/* 5000 series chipset DCA Port Requester ID Table Entry Format + * [15:8] PCI-Express Bus Number + * [7:3] PCI-Express Device Number + * [2:0] PCI-Express Function Number + * + * 5000 series chipset DCA control register format + * [7:1] Reserved (0) + * [0] Ignore Function Number + */ + +static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + writew(id, ioatdca->dca_base + (i * 4)); + /* make sure the ignore function bit is off */ + writeb(0, ioatdca->dca_base + (i * 4) + 2); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + writew(0, ioatdca->dca_base + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry, tag; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA_TAG_MAP_VALID) { + bit = entry & ~DCA_TAG_MAP_VALID; + value = (apic_id & (1 << bit)) ? 1 : 0; + } else { + value = entry ? 1 : 0; + } + tag |= (value << i); + } + return tag; +} + +static int ioat_dca_dev_managed(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + + pdev = to_pci_dev(dev); + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) + return 1; + } + return 0; +} + +static struct dca_ops ioat_dca_ops = { + .add_requester = ioat_dca_add_requester, + .remove_requester = ioat_dca_remove_requester, + .get_tag = ioat_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + + +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + u8 *tag_map = NULL; + int i; + int err; + u8 version; + u8 max_requesters; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + /* I/OAT v1 systems must have a known tag_map to support DCA */ + switch (pdev->vendor) { + case PCI_VENDOR_ID_INTEL: + switch (pdev->device) { + case PCI_DEVICE_ID_INTEL_IOAT: + tag_map = ioat_tag_map_BNB; + break; + case PCI_DEVICE_ID_INTEL_IOAT_CNB: + tag_map = ioat_tag_map_CNB; + break; + case PCI_DEVICE_ID_INTEL_IOAT_SCNB: + tag_map = ioat_tag_map_SCNB; + break; + } + break; + case PCI_VENDOR_ID_UNISYS: + switch (pdev->device) { + case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR: + tag_map = ioat_tag_map_UNISYS; + break; + } + break; + } + if (tag_map == NULL) + return NULL; + + version = readb(iobase + IOAT_VER_OFFSET); + if (version == IOAT_VER_3_0) + max_requesters = IOAT3_DCA_MAX_REQ; + else + max_requesters = IOAT_DCA_MAX_REQ; + + dca = alloc_dca_provider(&ioat_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * max_requesters)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->max_requesters = max_requesters; + ioatdca->dca_base = iobase + 0x54; + + /* copy over the APIC ID to DCA tag mapping */ + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) + ioatdca->tag_map[i] = tag_map[i]; + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} + + +static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat2_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat2_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + tag = ioat_dca_get_tag(dca, dev, cpu); + tag = (~tag) & 0x1F; + return tag; +} + +static struct dca_ops ioat2_dca_ops = { + .add_requester = ioat2_dca_add_requester, + .remove_requester = ioat2_dca_remove_requester, + .get_tag = ioat2_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u32 tag_map; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat2_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat2_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) { + csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) { + pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET); + for (i = 0; i < 5; i++) { + bit = (tag_map >> (4 * i)) & 0x0f; + if (bit < 8) + ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID; + else + ioatdca->tag_map[i] = 0; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} + +static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 id; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + id = dcaid_from_pcidev(pdev); + + if (ioatdca->requester_count == ioatdca->max_requesters) + return -ENODEV; + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == NULL) { + /* found an empty slot */ + ioatdca->requester_count++; + ioatdca->req_slots[i].pdev = pdev; + ioatdca->req_slots[i].rid = id; + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(id | IOAT_DCA_GREQID_VALID, + ioatdca->iobase + global_req_table + (i * 4)); + return i; + } + } + /* Error, ioatdma->requester_count is out of whack */ + return -EFAULT; +} + +static int ioat3_dca_remove_requester(struct dca_provider *dca, + struct device *dev) +{ + struct ioat_dca_priv *ioatdca = dca_priv(dca); + struct pci_dev *pdev; + int i; + u16 global_req_table; + + /* This implementation only supports PCI-Express */ + if (dev->bus != &pci_bus_type) + return -ENODEV; + pdev = to_pci_dev(dev); + + for (i = 0; i < ioatdca->max_requesters; i++) { + if (ioatdca->req_slots[i].pdev == pdev) { + global_req_table = + readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET); + writel(0, ioatdca->iobase + global_req_table + (i * 4)); + ioatdca->req_slots[i].pdev = NULL; + ioatdca->req_slots[i].rid = 0; + ioatdca->requester_count--; + return i; + } + } + return -ENODEV; +} + +static u8 ioat3_dca_get_tag(struct dca_provider *dca, + struct device *dev, + int cpu) +{ + u8 tag; + + struct ioat_dca_priv *ioatdca = dca_priv(dca); + int i, apic_id, bit, value; + u8 entry; + + tag = 0; + apic_id = cpu_physical_id(cpu); + + for (i = 0; i < IOAT_TAG_MAP_LEN; i++) { + entry = ioatdca->tag_map[i]; + if (entry & DCA3_TAG_MAP_BIT_TO_SEL) { + bit = entry & + ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV); + value = (apic_id & (1 << bit)) ? 1 : 0; + } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) { + bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV; + value = (apic_id & (1 << bit)) ? 0 : 1; + } else { + value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0; + } + tag |= (value << i); + } + + return tag; +} + +static struct dca_ops ioat3_dca_ops = { + .add_requester = ioat3_dca_add_requester, + .remove_requester = ioat3_dca_remove_requester, + .get_tag = ioat3_dca_get_tag, + .dev_managed = ioat_dca_dev_managed, +}; + +static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset) +{ + int slots = 0; + u32 req; + u16 global_req_table; + + global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET); + if (global_req_table == 0) + return 0; + + do { + req = readl(iobase + global_req_table + (slots * sizeof(u32))); + slots++; + } while ((req & IOAT_DCA_GREQID_LASTID) == 0); + + return slots; +} + +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase) +{ + struct dca_provider *dca; + struct ioat_dca_priv *ioatdca; + int slots; + int i; + int err; + u16 dca_offset; + u16 csi_fsb_control; + u16 pcie_control; + u8 bit; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } tag_map; + + if (!system_has_dca_enabled(pdev)) + return NULL; + + dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET); + if (dca_offset == 0) + return NULL; + + slots = ioat3_dca_count_dca_slots(iobase, dca_offset); + if (slots == 0) + return NULL; + + dca = alloc_dca_provider(&ioat3_dca_ops, + sizeof(*ioatdca) + + (sizeof(struct ioat_dca_slot) * slots)); + if (!dca) + return NULL; + + ioatdca = dca_priv(dca); + ioatdca->iobase = iobase; + ioatdca->dca_base = iobase + dca_offset; + ioatdca->max_requesters = slots; + + /* some bios might not know to turn these on */ + csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) { + csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH; + writew(csi_fsb_control, + ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET); + } + pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) { + pcie_control |= IOAT3_PCI_CONTROL_MEMWR; + writew(pcie_control, + ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET); + } + + + /* TODO version, compatibility and configuration checks */ + + /* copy out the APIC to DCA tag map */ + tag_map.low = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW); + tag_map.high = + readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH); + for (i = 0; i < 8; i++) { + bit = tag_map.full >> (8 * i); + ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK; + } + + err = register_dca_provider(dca, &pdev->dev); + if (err) { + free_dca_provider(dca); + return NULL; + } + + return dca; +} diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c new file mode 100644 index 0000000..f2ed033 --- /dev/null +++ b/drivers/dma/ioat_dma.c @@ -0,0 +1,1723 @@ +/* + * Intel I/OAT DMA Linux driver + * Copyright(c) 2004 - 2007 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * The full GNU General Public License is included in this distribution in + * the file called "COPYING". + * + */ + +/* + * This driver supports an Intel I/OAT DMA engine, which does asynchronous + * copy operations. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/interrupt.h> +#include <linux/dmaengine.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/workqueue.h> +#include <linux/i7300_idle.h> +#include "ioatdma.h" +#include "ioatdma_registers.h" +#include "ioatdma_hw.h" + +#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common) +#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common) +#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node) +#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx) + +#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80) +static int ioat_pending_level = 4; +module_param(ioat_pending_level, int, 0644); +MODULE_PARM_DESC(ioat_pending_level, + "high-water mark for pushing ioat descriptors (default: 4)"); + +#define RESET_DELAY msecs_to_jiffies(100) +#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000)) +static void ioat_dma_chan_reset_part2(struct work_struct *work); +static void ioat_dma_chan_watchdog(struct work_struct *work); + +/* + * workaround for IOAT ver.3.0 null descriptor issue + * (channel returns error when size is 0) + */ +#define NULL_DESC_BUFFER_SIZE 1 + +/* internal functions */ +static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan); +static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan); + +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); +static struct ioat_desc_sw * +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan); + +static inline struct ioat_dma_chan *ioat_lookup_chan_by_index( + struct ioatdma_device *device, + int index) +{ + return device->idx[index]; +} + +/** + * ioat_dma_do_interrupt - handler used for single vector interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt(int irq, void *data) +{ + struct ioatdma_device *instance = data; + struct ioat_dma_chan *ioat_chan; + unsigned long attnstatus; + int bit; + u8 intrctrl; + + intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET); + + if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN)) + return IRQ_NONE; + + if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) { + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_NONE; + } + + attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET); + for_each_bit(bit, &attnstatus, BITS_PER_LONG) { + ioat_chan = ioat_lookup_chan_by_index(instance, bit); + tasklet_schedule(&ioat_chan->cleanup_task); + } + + writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET); + return IRQ_HANDLED; +} + +/** + * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode + * @irq: interrupt id + * @data: interrupt data + */ +static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data) +{ + struct ioat_dma_chan *ioat_chan = data; + + tasklet_schedule(&ioat_chan->cleanup_task); + + return IRQ_HANDLED; +} + +static void ioat_dma_cleanup_tasklet(unsigned long data); + +/** + * ioat_dma_enumerate_channels - find and initialize the device's channels + * @device: the device to be enumerated + */ +static int ioat_dma_enumerate_channels(struct ioatdma_device *device) +{ + u8 xfercap_scale; + u32 xfercap; + int i; + struct ioat_dma_chan *ioat_chan; + + /* + * IOAT ver.3 workarounds + */ + if (device->version == IOAT_VER_3_0) { + u32 chan_err_mask; + u16 dev_id; + u32 dmauncerrsts; + + /* + * Write CHANERRMSK_INT with 3E07h to mask out the errors + * that can cause stability issues for IOAT ver.3 + */ + chan_err_mask = 0x3E07; + pci_write_config_dword(device->pdev, + IOAT_PCI_CHANERRMASK_INT_OFFSET, + chan_err_mask); + + /* + * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit + * (workaround for spurious config parity error after restart) + */ + pci_read_config_word(device->pdev, + IOAT_PCI_DEVICE_ID_OFFSET, + &dev_id); + if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) { + dmauncerrsts = 0x10; + pci_write_config_dword(device->pdev, + IOAT_PCI_DMAUNCERRSTS_OFFSET, + dmauncerrsts); + } + } + + device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET); + xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET); + xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale)); + +#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL + if (i7300_idle_platform_probe(NULL, NULL) == 0) { + device->common.chancnt--; + } +#endif + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL); + if (!ioat_chan) { + device->common.chancnt = i; + break; + } + + ioat_chan->device = device; + ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1)); + ioat_chan->xfercap = xfercap; + ioat_chan->desccount = 0; + INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2); + if (ioat_chan->device->version != IOAT_VER_1_2) { + writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE + | IOAT_DMA_DCA_ANY_CPU, + ioat_chan->reg_base + IOAT_DCACTRL_OFFSET); + } + spin_lock_init(&ioat_chan->cleanup_lock); + spin_lock_init(&ioat_chan->desc_lock); + INIT_LIST_HEAD(&ioat_chan->free_desc); + INIT_LIST_HEAD(&ioat_chan->used_desc); + /* This should be made common somewhere in dmaengine.c */ + ioat_chan->common.device = &device->common; + list_add_tail(&ioat_chan->common.device_node, + &device->common.channels); + device->idx[i] = ioat_chan; + tasklet_init(&ioat_chan->cleanup_task, + ioat_dma_cleanup_tasklet, + (unsigned long) ioat_chan); + tasklet_disable(&ioat_chan->cleanup_task); + } + return device->common.chancnt; +} + +/** + * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended + * descriptors to hw + * @chan: DMA channel handle + */ +static inline void __ioat1_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET); +} + +static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + + if (ioat_chan->pending > 0) { + spin_lock_bh(&ioat_chan->desc_lock); + __ioat1_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + } +} + +static inline void __ioat2_dma_memcpy_issue_pending( + struct ioat_dma_chan *ioat_chan) +{ + ioat_chan->pending = 0; + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); +} + +static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + + if (ioat_chan->pending > 0) { + spin_lock_bh(&ioat_chan->desc_lock); + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + } +} + + +/** + * ioat_dma_chan_reset_part2 - reinit the channel after a reset + */ +static void ioat_dma_chan_reset_part2(struct work_struct *work) +{ + struct ioat_dma_chan *ioat_chan = + container_of(work, struct ioat_dma_chan, work.work); + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat_chan->cleanup_lock); + spin_lock_bh(&ioat_chan->desc_lock); + + ioat_chan->completion_virt->low = 0; + ioat_chan->completion_virt->high = 0; + ioat_chan->pending = 0; + + /* + * count the descriptors waiting, and be sure to do it + * right for both the CB1 line and the CB2 ring + */ + ioat_chan->dmacount = 0; + if (ioat_chan->used_desc.prev) { + desc = to_ioat_desc(ioat_chan->used_desc.prev); + do { + ioat_chan->dmacount++; + desc = to_ioat_desc(desc->node.next); + } while (&desc->node != ioat_chan->used_desc.next); + } + + /* + * write the new starting descriptor address + * this puts channel engine into ARMED state + */ + desc = to_ioat_desc(ioat_chan->used_desc.prev); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + /* tell the engine to go with what's left to be done */ + writew(ioat_chan->dmacount, + ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET); + + break; + } + dev_err(&ioat_chan->device->pdev->dev, + "chan%d reset - %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + + spin_unlock_bh(&ioat_chan->desc_lock); + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +/** + * ioat_dma_reset_channel - restart a channel + * @ioat_chan: IOAT DMA channel handle + */ +static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan) +{ + u32 chansts, chanerr; + + if (!ioat_chan->used_desc.prev) + return; + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + chansts = (ioat_chan->completion_virt->low + & IOAT_CHANSTS_DMA_TRANSFER_STATUS); + if (chanerr) { + dev_err(&ioat_chan->device->pdev->dev, + "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n", + chan_num(ioat_chan), chansts, chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* + * whack it upside the head with a reset + * and wait for things to settle out. + * force the pending count to a really big negative + * to make sure no one forces an issue_pending + * while we're waiting. + */ + + spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->pending = INT_MIN; + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + spin_unlock_bh(&ioat_chan->desc_lock); + + /* schedule the 2nd half instead of sleeping a long time */ + schedule_delayed_work(&ioat_chan->work, RESET_DELAY); +} + +/** + * ioat_dma_chan_watchdog - watch for stuck channels + */ +static void ioat_dma_chan_watchdog(struct work_struct *work) +{ + struct ioatdma_device *device = + container_of(work, struct ioatdma_device, work.work); + struct ioat_dma_chan *ioat_chan; + int i; + + union { + u64 full; + struct { + u32 low; + u32 high; + }; + } completion_hw; + unsigned long compl_desc_addr_hw; + + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + + if (ioat_chan->device->version == IOAT_VER_1_2 + /* have we started processing anything yet */ + && ioat_chan->last_completion + /* have we completed any since last watchdog cycle? */ + && (ioat_chan->last_completion == + ioat_chan->watchdog_completion) + /* has TCP stuck on one cookie since last watchdog? */ + && (ioat_chan->watchdog_tcp_cookie == + ioat_chan->watchdog_last_tcp_cookie) + && (ioat_chan->watchdog_tcp_cookie != + ioat_chan->completed_cookie) + /* is there something in the chain to be processed? */ + /* CB1 chain always has at least the last one processed */ + && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next) + && ioat_chan->pending == 0) { + + /* + * check CHANSTS register for completed + * descriptor address. + * if it is different than completion writeback, + * it is not zero + * and it has changed since the last watchdog + * we can assume that channel + * is still working correctly + * and the problem is in completion writeback. + * update completion writeback + * with actual CHANSTS value + * else + * try resetting the channel + */ + + completion_hw.low = readl(ioat_chan->reg_base + + IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version)); + completion_hw.high = readl(ioat_chan->reg_base + + IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version)); +#if (BITS_PER_LONG == 64) + compl_desc_addr_hw = + completion_hw.full + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +#else + compl_desc_addr_hw = + completion_hw.low & IOAT_LOW_COMPLETION_MASK; +#endif + + if ((compl_desc_addr_hw != 0) + && (compl_desc_addr_hw != ioat_chan->watchdog_completion) + && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) { + ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw; + ioat_chan->completion_virt->low = completion_hw.low; + ioat_chan->completion_virt->high = completion_hw.high; + } else { + ioat_dma_reset_channel(ioat_chan); + ioat_chan->watchdog_completion = 0; + ioat_chan->last_compl_desc_addr_hw = 0; + } + + /* + * for version 2.0 if there are descriptors yet to be processed + * and the last completed hasn't changed since the last watchdog + * if they haven't hit the pending level + * issue the pending to push them through + * else + * try resetting the channel + */ + } else if (ioat_chan->device->version == IOAT_VER_2_0 + && ioat_chan->used_desc.prev + && ioat_chan->last_completion + && ioat_chan->last_completion == ioat_chan->watchdog_completion) { + + if (ioat_chan->pending < ioat_pending_level) + ioat2_dma_memcpy_issue_pending(&ioat_chan->common); + else { + ioat_dma_reset_channel(ioat_chan); + ioat_chan->watchdog_completion = 0; + } + } else { + ioat_chan->last_compl_desc_addr_hw = 0; + ioat_chan->watchdog_completion + = ioat_chan->last_completion; + } + + ioat_chan->watchdog_last_tcp_cookie = + ioat_chan->watchdog_tcp_cookie; + } + + schedule_delayed_work(&device->work, WATCHDOG_DELAY); +} + +static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *first = tx_to_ioat_desc(tx); + struct ioat_desc_sw *prev, *new; + struct ioat_dma_descriptor *hw; + dma_cookie_t cookie; + LIST_HEAD(new_chain); + u32 copy; + size_t len; + dma_addr_t src, dst; + unsigned long orig_flags; + unsigned int desc_count = 0; + + /* src and dest and len are stored in the initial descriptor */ + len = first->len; + src = first->src; + dst = first->dst; + orig_flags = first->async_tx.flags; + new = first; + + spin_lock_bh(&ioat_chan->desc_lock); + prev = to_ioat_desc(ioat_chan->used_desc.prev); + prefetch(prev->hw); + do { + copy = min_t(size_t, len, ioat_chan->xfercap); + + async_tx_ack(&new->async_tx); + + hw = new->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + hw->next = 0; + + /* chain together the physical address list for the HW */ + wmb(); + prev->hw->next = (u64) new->async_tx.phys; + + len -= copy; + dst += copy; + src += copy; + + list_add_tail(&new->node, &new_chain); + desc_count++; + prev = new; + } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan))); + + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + + hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (first->async_tx.callback) { + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + if (first != new) { + /* move callback into to last desc */ + new->async_tx.callback = first->async_tx.callback; + new->async_tx.callback_param + = first->async_tx.callback_param; + first->async_tx.callback = NULL; + first->async_tx.callback_param = NULL; + } + } + + new->tx_cnt = desc_count; + new->async_tx.flags = orig_flags; /* client is in control of this ack */ + + /* store the original values for use in later cleanup */ + if (new != first) { + new->src = first->src; + new->dst = first->dst; + new->len = first->len; + } + + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = new->async_tx.cookie = cookie; + + /* write address into NextDescriptor field of last desc in chain */ + to_ioat_desc(ioat_chan->used_desc.prev)->hw->next = + first->async_tx.phys; + list_splice_tail(&new_chain, &ioat_chan->used_desc); + + ioat_chan->dmacount += desc_count; + ioat_chan->pending += desc_count; + if (ioat_chan->pending >= ioat_pending_level) + __ioat1_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + return cookie; +} + +static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan); + struct ioat_desc_sw *first = tx_to_ioat_desc(tx); + struct ioat_desc_sw *new; + struct ioat_dma_descriptor *hw; + dma_cookie_t cookie; + u32 copy; + size_t len; + dma_addr_t src, dst; + unsigned long orig_flags; + unsigned int desc_count = 0; + + /* src and dest and len are stored in the initial descriptor */ + len = first->len; + src = first->src; + dst = first->dst; + orig_flags = first->async_tx.flags; + new = first; + + /* + * ioat_chan->desc_lock is still in force in version 2 path + * it gets unlocked at end of this function + */ + do { + copy = min_t(size_t, len, ioat_chan->xfercap); + + async_tx_ack(&new->async_tx); + + hw = new->hw; + hw->size = copy; + hw->ctl = 0; + hw->src_addr = src; + hw->dst_addr = dst; + + len -= copy; + dst += copy; + src += copy; + desc_count++; + } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan))); + + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "tx submit failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return -ENOMEM; + } + + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + if (first->async_tx.callback) { + hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN; + if (first != new) { + /* move callback into to last desc */ + new->async_tx.callback = first->async_tx.callback; + new->async_tx.callback_param + = first->async_tx.callback_param; + first->async_tx.callback = NULL; + first->async_tx.callback_param = NULL; + } + } + + new->tx_cnt = desc_count; + new->async_tx.flags = orig_flags; /* client is in control of this ack */ + + /* store the original values for use in later cleanup */ + if (new != first) { + new->src = first->src; + new->dst = first->dst; + new->len = first->len; + } + + /* cookie incr and addition to used_list must be atomic */ + cookie = ioat_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + ioat_chan->common.cookie = new->async_tx.cookie = cookie; + + ioat_chan->dmacount += desc_count; + ioat_chan->pending += desc_count; + if (ioat_chan->pending >= ioat_pending_level) + __ioat2_dma_memcpy_issue_pending(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + return cookie; +} + +/** + * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair + * @ioat_chan: the channel supplying the memory pool for the descriptors + * @flags: allocation flags + */ +static struct ioat_desc_sw *ioat_dma_alloc_descriptor( + struct ioat_dma_chan *ioat_chan, + gfp_t flags) +{ + struct ioat_dma_descriptor *desc; + struct ioat_desc_sw *desc_sw; + struct ioatdma_device *ioatdma_device; + dma_addr_t phys; + + ioatdma_device = to_ioatdma_device(ioat_chan->common.device); + desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys); + if (unlikely(!desc)) + return NULL; + + desc_sw = kzalloc(sizeof(*desc_sw), flags); + if (unlikely(!desc_sw)) { + pci_pool_free(ioatdma_device->dma_pool, desc, phys); + return NULL; + } + + memset(desc, 0, sizeof(*desc)); + dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc_sw->async_tx.tx_submit = ioat1_tx_submit; + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + desc_sw->async_tx.tx_submit = ioat2_tx_submit; + break; + } + INIT_LIST_HEAD(&desc_sw->async_tx.tx_list); + + desc_sw->hw = desc; + desc_sw->async_tx.phys = phys; + + return desc_sw; +} + +static int ioat_initial_desc_count = 256; +module_param(ioat_initial_desc_count, int, 0644); +MODULE_PARM_DESC(ioat_initial_desc_count, + "initial descriptors per channel (default: 256)"); + +/** + * ioat2_dma_massage_chan_desc - link the descriptors into a circle + * @ioat_chan: the channel to be massaged + */ +static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *desc, *_desc; + + /* setup used_desc */ + ioat_chan->used_desc.next = ioat_chan->free_desc.next; + ioat_chan->used_desc.prev = NULL; + + /* pull free_desc out of the circle so that every node is a hw + * descriptor, but leave it pointing to the list + */ + ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next; + ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev; + + /* circle link the hw descriptors */ + desc = to_ioat_desc(ioat_chan->free_desc.next); + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) { + desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys; + } +} + +/** + * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors + * @chan: the channel to be filled out + */ +static int ioat_dma_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *desc; + u16 chanctrl; + u32 chanerr; + int i; + LIST_HEAD(tmp_list); + + /* have we already been set up? */ + if (!list_empty(&ioat_chan->free_desc)) + return ioat_chan->desccount; + + /* Setup register to interrupt and write completion status on error */ + chanctrl = IOAT_CHANCTRL_ERR_INT_EN | + IOAT_CHANCTRL_ANY_ERR_ABORT_EN | + IOAT_CHANCTRL_ERR_COMPLETION_EN; + writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET); + + chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + if (chanerr) { + dev_err(&ioat_chan->device->pdev->dev, + "CHANERR = %x, clearing\n", chanerr); + writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET); + } + + /* Allocate descriptors */ + for (i = 0; i < ioat_initial_desc_count; i++) { + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL); + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "Only %d initial descriptors\n", i); + break; + } + list_add_tail(&desc->node, &tmp_list); + } + spin_lock_bh(&ioat_chan->desc_lock); + ioat_chan->desccount = i; + list_splice(&tmp_list, &ioat_chan->free_desc); + if (ioat_chan->device->version != IOAT_VER_1_2) + ioat2_dma_massage_chan_desc(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + /* allocate a completion writeback area */ + /* doing 2 32bit writes to mmio since 1 64b write doesn't work */ + ioat_chan->completion_virt = + pci_pool_alloc(ioat_chan->device->completion_pool, + GFP_KERNEL, + &ioat_chan->completion_addr); + memset(ioat_chan->completion_virt, 0, + sizeof(*ioat_chan->completion_virt)); + writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW); + writel(((u64) ioat_chan->completion_addr) >> 32, + ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH); + + tasklet_enable(&ioat_chan->cleanup_task); + ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */ + return ioat_chan->desccount; +} + +/** + * ioat_dma_free_chan_resources - release all the descriptors + * @chan: the channel to be cleaned + */ +static void ioat_dma_free_chan_resources(struct dma_chan *chan) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device); + struct ioat_desc_sw *desc, *_desc; + int in_use_descs = 0; + + /* Before freeing channel resources first check + * if they have been previously allocated for this channel. + */ + if (ioat_chan->desccount == 0) + return; + + tasklet_disable(&ioat_chan->cleanup_task); + ioat_dma_memcpy_cleanup(ioat_chan); + + /* Delay 100ms after reset to allow internal DMA logic to quiesce + * before removing DMA descriptor resources. + */ + writeb(IOAT_CHANCMD_RESET, + ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + mdelay(100); + + spin_lock_bh(&ioat_chan->desc_lock); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { + in_use_descs++; + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + list_for_each_entry_safe(desc, _desc, + &ioat_chan->free_desc, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + list_for_each_entry_safe(desc, _desc, + ioat_chan->free_desc.next, node) { + list_del(&desc->node); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + } + desc = to_ioat_desc(ioat_chan->free_desc.next); + pci_pool_free(ioatdma_device->dma_pool, desc->hw, + desc->async_tx.phys); + kfree(desc); + INIT_LIST_HEAD(&ioat_chan->free_desc); + INIT_LIST_HEAD(&ioat_chan->used_desc); + break; + } + spin_unlock_bh(&ioat_chan->desc_lock); + + pci_pool_free(ioatdma_device->completion_pool, + ioat_chan->completion_virt, + ioat_chan->completion_addr); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + dev_err(&ioat_chan->device->pdev->dev, + "Freeing %d in use descriptors!\n", + in_use_descs - 1); + + ioat_chan->last_completion = ioat_chan->completion_addr = 0; + ioat_chan->pending = 0; + ioat_chan->dmacount = 0; + ioat_chan->desccount = 0; + ioat_chan->watchdog_completion = 0; + ioat_chan->last_compl_desc_addr_hw = 0; + ioat_chan->watchdog_tcp_cookie = + ioat_chan->watchdog_last_tcp_cookie = 0; +} + +/** + * ioat_dma_get_next_descriptor - return the next available descriptor + * @ioat_chan: IOAT DMA channel handle + * + * Gets the next descriptor from the chain, and must be called with the + * channel's desc_lock held. Allocates more descriptors if the channel + * has run out. + */ +static struct ioat_desc_sw * +ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *new; + + if (!list_empty(&ioat_chan->free_desc)) { + new = to_ioat_desc(ioat_chan->free_desc.next); + list_del(&new->node); + } else { + /* try to get another desc */ + new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + if (!new) { + dev_err(&ioat_chan->device->pdev->dev, + "alloc failed\n"); + return NULL; + } + } + + prefetch(new->hw); + return new; +} + +static struct ioat_desc_sw * +ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *new; + + /* + * used.prev points to where to start processing + * used.next points to next free descriptor + * if used.prev == NULL, there are none waiting to be processed + * if used.next == used.prev.prev, there is only one free descriptor, + * and we need to use it to as a noop descriptor before + * linking in a new set of descriptors, since the device + * has probably already read the pointer to it + */ + if (ioat_chan->used_desc.prev && + ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) { + + struct ioat_desc_sw *desc; + struct ioat_desc_sw *noop_desc; + int i; + + /* set up the noop descriptor */ + noop_desc = to_ioat_desc(ioat_chan->used_desc.next); + /* set size to non-zero value (channel returns error when size is 0) */ + noop_desc->hw->size = NULL_DESC_BUFFER_SIZE; + noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL; + noop_desc->hw->src_addr = 0; + noop_desc->hw->dst_addr = 0; + + ioat_chan->used_desc.next = ioat_chan->used_desc.next->next; + ioat_chan->pending++; + ioat_chan->dmacount++; + + /* try to get a few more descriptors */ + for (i = 16; i; i--) { + desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC); + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "alloc failed\n"); + break; + } + list_add_tail(&desc->node, ioat_chan->used_desc.next); + + desc->hw->next + = to_ioat_desc(desc->node.next)->async_tx.phys; + to_ioat_desc(desc->node.prev)->hw->next + = desc->async_tx.phys; + ioat_chan->desccount++; + } + + ioat_chan->used_desc.next = noop_desc->node.next; + } + new = to_ioat_desc(ioat_chan->used_desc.next); + prefetch(new); + ioat_chan->used_desc.next = new->node.next; + + if (ioat_chan->used_desc.prev == NULL) + ioat_chan->used_desc.prev = &new->node; + + prefetch(new->hw); + return new; +} + +static struct ioat_desc_sw *ioat_dma_get_next_descriptor( + struct ioat_dma_chan *ioat_chan) +{ + if (!ioat_chan) + return NULL; + + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + return ioat1_dma_get_next_descriptor(ioat_chan); + case IOAT_VER_2_0: + case IOAT_VER_3_0: + return ioat2_dma_get_next_descriptor(ioat_chan); + } + return NULL; +} + +static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy( + struct dma_chan *chan, + dma_addr_t dma_dest, + dma_addr_t dma_src, + size_t len, + unsigned long flags) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *new; + + spin_lock_bh(&ioat_chan->desc_lock); + new = ioat_dma_get_next_descriptor(ioat_chan); + spin_unlock_bh(&ioat_chan->desc_lock); + + if (new) { + new->len = len; + new->dst = dma_dest; + new->src = dma_src; + new->async_tx.flags = flags; + return &new->async_tx; + } else { + dev_err(&ioat_chan->device->pdev->dev, + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + return NULL; + } +} + +static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy( + struct dma_chan *chan, + dma_addr_t dma_dest, + dma_addr_t dma_src, + size_t len, + unsigned long flags) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + struct ioat_desc_sw *new; + + spin_lock_bh(&ioat_chan->desc_lock); + new = ioat2_dma_get_next_descriptor(ioat_chan); + + /* + * leave ioat_chan->desc_lock set in ioat 2 path + * it will get unlocked at end of tx_submit + */ + + if (new) { + new->len = len; + new->dst = dma_dest; + new->src = dma_src; + new->async_tx.flags = flags; + return &new->async_tx; + } else { + spin_unlock_bh(&ioat_chan->desc_lock); + dev_err(&ioat_chan->device->pdev->dev, + "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n", + chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount); + return NULL; + } +} + +static void ioat_dma_cleanup_tasklet(unsigned long data) +{ + struct ioat_dma_chan *chan = (void *)data; + ioat_dma_memcpy_cleanup(chan); + writew(IOAT_CHANCTRL_INT_DISABLE, + chan->reg_base + IOAT_CHANCTRL_OFFSET); +} + +static void +ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc) +{ + /* + * yes we are unmapping both _page and _single + * alloc'd regions with unmap_page. Is this + * *really* that bad? + */ + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP)) + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, dst), + pci_unmap_len(desc, len), + PCI_DMA_FROMDEVICE); + + if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP)) + pci_unmap_page(ioat_chan->device->pdev, + pci_unmap_addr(desc, src), + pci_unmap_len(desc, len), + PCI_DMA_TODEVICE); +} + +/** + * ioat_dma_memcpy_cleanup - cleanup up finished descriptors + * @chan: ioat channel to be cleaned up + */ +static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan) +{ + unsigned long phys_complete; + struct ioat_desc_sw *desc, *_desc; + dma_cookie_t cookie = 0; + unsigned long desc_phys; + struct ioat_desc_sw *latest_desc; + + prefetch(ioat_chan->completion_virt); + + if (!spin_trylock_bh(&ioat_chan->cleanup_lock)) + return; + + /* The completion writeback can happen at any time, + so reads by the driver need to be atomic operations + The descriptor physical addresses are limited to 32-bits + when the CPU can only do a 32-bit mov */ + +#if (BITS_PER_LONG == 64) + phys_complete = + ioat_chan->completion_virt->full + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; +#else + phys_complete = + ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK; +#endif + + if ((ioat_chan->completion_virt->full + & IOAT_CHANSTS_DMA_TRANSFER_STATUS) == + IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) { + dev_err(&ioat_chan->device->pdev->dev, + "Channel halted, chanerr = %x\n", + readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET)); + + /* TODO do something to salvage the situation */ + } + + if (phys_complete == ioat_chan->last_completion) { + spin_unlock_bh(&ioat_chan->cleanup_lock); + /* + * perhaps we're stuck so hard that the watchdog can't go off? + * try to catch it after 2 seconds + */ + if (ioat_chan->device->version != IOAT_VER_3_0) { + if (time_after(jiffies, + ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) { + ioat_dma_chan_watchdog(&(ioat_chan->device->work.work)); + ioat_chan->last_completion_time = jiffies; + } + } + return; + } + ioat_chan->last_completion_time = jiffies; + + cookie = 0; + if (!spin_trylock_bh(&ioat_chan->desc_lock)) { + spin_unlock_bh(&ioat_chan->cleanup_lock); + return; + } + + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + list_for_each_entry_safe(desc, _desc, + &ioat_chan->used_desc, node) { + + /* + * Incoming DMA requests may use multiple descriptors, + * due to exceeding xfercap, perhaps. If so, only the + * last one will have a cookie, and require unmapping. + */ + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + ioat_dma_unmap(ioat_chan, desc); + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } + } + + if (desc->async_tx.phys != phys_complete) { + /* + * a completed entry, but not the last, so clean + * up if the client is done with the descriptor + */ + if (async_tx_test_ack(&desc->async_tx)) { + list_del(&desc->node); + list_add_tail(&desc->node, + &ioat_chan->free_desc); + } else + desc->async_tx.cookie = 0; + } else { + /* + * last used desc. Do not remove, so we can + * append from it, but don't look at it next + * time, either + */ + desc->async_tx.cookie = 0; + + /* TODO check status bits? */ + break; + } + } + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + /* has some other thread has already cleaned up? */ + if (ioat_chan->used_desc.prev == NULL) + break; + + /* work backwards to find latest finished desc */ + desc = to_ioat_desc(ioat_chan->used_desc.next); + latest_desc = NULL; + do { + desc = to_ioat_desc(desc->node.prev); + desc_phys = (unsigned long)desc->async_tx.phys + & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR; + if (desc_phys == phys_complete) { + latest_desc = desc; + break; + } + } while (&desc->node != ioat_chan->used_desc.prev); + + if (latest_desc != NULL) { + + /* work forwards to clear finished descriptors */ + for (desc = to_ioat_desc(ioat_chan->used_desc.prev); + &desc->node != latest_desc->node.next && + &desc->node != ioat_chan->used_desc.next; + desc = to_ioat_desc(desc->node.next)) { + if (desc->async_tx.cookie) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + ioat_dma_unmap(ioat_chan, desc); + if (desc->async_tx.callback) { + desc->async_tx.callback(desc->async_tx.callback_param); + desc->async_tx.callback = NULL; + } + } + } + + /* move used.prev up beyond those that are finished */ + if (&desc->node == ioat_chan->used_desc.next) + ioat_chan->used_desc.prev = NULL; + else + ioat_chan->used_desc.prev = &desc->node; + } + break; + } + + spin_unlock_bh(&ioat_chan->desc_lock); + + ioat_chan->last_completion = phys_complete; + if (cookie != 0) + ioat_chan->completed_cookie = cookie; + + spin_unlock_bh(&ioat_chan->cleanup_lock); +} + +/** + * ioat_dma_is_complete - poll the status of a IOAT DMA transaction + * @chan: IOAT DMA channel handle + * @cookie: DMA transaction identifier + * @done: if not %NULL, updated with last completed transaction + * @used: if not %NULL, updated with last used transaction + */ +static enum dma_status ioat_dma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + last_used = chan->cookie; + last_complete = ioat_chan->completed_cookie; + ioat_chan->watchdog_tcp_cookie = cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) + return ret; + + ioat_dma_memcpy_cleanup(ioat_chan); + + last_used = chan->cookie; + last_complete = ioat_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan) +{ + struct ioat_desc_sw *desc; + + spin_lock_bh(&ioat_chan->desc_lock); + + desc = ioat_dma_get_next_descriptor(ioat_chan); + + if (!desc) { + dev_err(&ioat_chan->device->pdev->dev, + "Unable to start null desc - get next desc failed\n"); + spin_unlock_bh(&ioat_chan->desc_lock); + return; + } + + desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL + | IOAT_DMA_DESCRIPTOR_CTL_INT_GN + | IOAT_DMA_DESCRIPTOR_CTL_CP_STS; + /* set size to non-zero value (channel returns error when size is 0) */ + desc->hw->size = NULL_DESC_BUFFER_SIZE; + desc->hw->src_addr = 0; + desc->hw->dst_addr = 0; + async_tx_ack(&desc->async_tx); + switch (ioat_chan->device->version) { + case IOAT_VER_1_2: + desc->hw->next = 0; + list_add_tail(&desc->node, &ioat_chan->used_desc); + + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH); + + writeb(IOAT_CHANCMD_START, ioat_chan->reg_base + + IOAT_CHANCMD_OFFSET(ioat_chan->device->version)); + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW); + writel(((u64) desc->async_tx.phys) >> 32, + ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH); + + ioat_chan->dmacount++; + __ioat2_dma_memcpy_issue_pending(ioat_chan); + break; + } + spin_unlock_bh(&ioat_chan->desc_lock); +} + +/* + * Perform a IOAT transaction to verify the HW works. + */ +#define IOAT_TEST_SIZE 2000 + +static void ioat_dma_test_callback(void *dma_async_param) +{ + struct completion *cmp = dma_async_param; + + complete(cmp); +} + +/** + * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works. + * @device: device to be tested + */ +static int ioat_dma_self_test(struct ioatdma_device *device) +{ + int i; + u8 *src; + u8 *dest; + struct dma_chan *dma_chan; + struct dma_async_tx_descriptor *tx; + dma_addr_t dma_dest, dma_src; + dma_cookie_t cookie; + int err = 0; + struct completion cmp; + + src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOAT_TEST_SIZE; i++) + src[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) { + dev_err(&device->pdev->dev, + "selftest cannot allocate chan resource\n"); + err = -ENODEV; + goto out; + } + + dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE, + DMA_TO_DEVICE); + dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE, + DMA_FROM_DEVICE); + tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src, + IOAT_TEST_SIZE, 0); + if (!tx) { + dev_err(&device->pdev->dev, + "Self-test prep failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + async_tx_ack(tx); + init_completion(&cmp); + tx->callback = ioat_dma_test_callback; + tx->callback_param = &cmp; + cookie = tx->tx_submit(tx); + if (cookie < 0) { + dev_err(&device->pdev->dev, + "Self-test setup failed, disabling\n"); + err = -ENODEV; + goto free_resources; + } + device->common.device_issue_pending(dma_chan); + + wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000)); + + if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL) + != DMA_SUCCESS) { + dev_err(&device->pdev->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + if (memcmp(src, dest, IOAT_TEST_SIZE)) { + dev_err(&device->pdev->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + device->common.device_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +static char ioat_interrupt_style[32] = "msix"; +module_param_string(ioat_interrupt_style, ioat_interrupt_style, + sizeof(ioat_interrupt_style), 0644); +MODULE_PARM_DESC(ioat_interrupt_style, + "set ioat interrupt style: msix (default), " + "msix-single-vector, msi, intx)"); + +/** + * ioat_dma_setup_interrupts - setup interrupt handler + * @device: ioat device + */ +static int ioat_dma_setup_interrupts(struct ioatdma_device *device) +{ + struct ioat_dma_chan *ioat_chan; + int err, i, j, msixcnt; + u8 intrctrl = 0; + + if (!strcmp(ioat_interrupt_style, "msix")) + goto msix; + if (!strcmp(ioat_interrupt_style, "msix-single-vector")) + goto msix_single_vector; + if (!strcmp(ioat_interrupt_style, "msi")) + goto msi; + if (!strcmp(ioat_interrupt_style, "intx")) + goto intx; + dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n", + ioat_interrupt_style); + goto err_no_irq; + +msix: + /* The number of MSI-X vectors should equal the number of channels */ + msixcnt = device->common.chancnt; + for (i = 0; i < msixcnt; i++) + device->msix_entries[i].entry = i; + + err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt); + if (err < 0) + goto msi; + if (err > 0) + goto msix_single_vector; + + for (i = 0; i < msixcnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + err = request_irq(device->msix_entries[i].vector, + ioat_dma_do_interrupt_msix, + 0, "ioat-msix", ioat_chan); + if (err) { + for (j = 0; j < i; j++) { + ioat_chan = + ioat_lookup_chan_by_index(device, j); + free_irq(device->msix_entries[j].vector, + ioat_chan); + } + goto msix_single_vector; + } + } + intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL; + device->irq_mode = msix_multi_vector; + goto done; + +msix_single_vector: + device->msix_entries[0].entry = 0; + err = pci_enable_msix(device->pdev, device->msix_entries, 1); + if (err) + goto msi; + + err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt, + 0, "ioat-msix", device); + if (err) { + pci_disable_msix(device->pdev); + goto msi; + } + device->irq_mode = msix_single_vector; + goto done; + +msi: + err = pci_enable_msi(device->pdev); + if (err) + goto intx; + + err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, + 0, "ioat-msi", device); + if (err) { + pci_disable_msi(device->pdev); + goto intx; + } + /* + * CB 1.2 devices need a bit set in configuration space to enable MSI + */ + if (device->version == IOAT_VER_1_2) { + u32 dmactrl; + pci_read_config_dword(device->pdev, + IOAT_PCI_DMACTRL_OFFSET, &dmactrl); + dmactrl |= IOAT_PCI_DMACTRL_MSI_EN; + pci_write_config_dword(device->pdev, + IOAT_PCI_DMACTRL_OFFSET, dmactrl); + } + device->irq_mode = msi; + goto done; + +intx: + err = request_irq(device->pdev->irq, ioat_dma_do_interrupt, + IRQF_SHARED, "ioat-intx", device); + if (err) + goto err_no_irq; + device->irq_mode = intx; + +done: + intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN; + writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET); + return 0; + +err_no_irq: + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + dev_err(&device->pdev->dev, "no usable interrupts\n"); + device->irq_mode = none; + return -1; +} + +/** + * ioat_dma_remove_interrupts - remove whatever interrupts were set + * @device: ioat device + */ +static void ioat_dma_remove_interrupts(struct ioatdma_device *device) +{ + struct ioat_dma_chan *ioat_chan; + int i; + + /* Disable all interrupt generation */ + writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET); + + switch (device->irq_mode) { + case msix_multi_vector: + for (i = 0; i < device->common.chancnt; i++) { + ioat_chan = ioat_lookup_chan_by_index(device, i); + free_irq(device->msix_entries[i].vector, ioat_chan); + } + pci_disable_msix(device->pdev); + break; + case msix_single_vector: + free_irq(device->msix_entries[0].vector, device); + pci_disable_msix(device->pdev); + break; + case msi: + free_irq(device->pdev->irq, device); + pci_disable_msi(device->pdev); + break; + case intx: + free_irq(device->pdev->irq, device); + break; + case none: + dev_warn(&device->pdev->dev, + "call to %s without interrupts setup\n", __func__); + } + device->irq_mode = none; +} + +struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, + void __iomem *iobase) +{ + int err; + struct ioatdma_device *device; + + device = kzalloc(sizeof(*device), GFP_KERNEL); + if (!device) { + err = -ENOMEM; + goto err_kzalloc; + } + device->pdev = pdev; + device->reg_base = iobase; + device->version = readb(device->reg_base + IOAT_VER_OFFSET); + + /* DMA coherent memory pool for DMA descriptor allocations */ + device->dma_pool = pci_pool_create("dma_desc_pool", pdev, + sizeof(struct ioat_dma_descriptor), + 64, 0); + if (!device->dma_pool) { + err = -ENOMEM; + goto err_dma_pool; + } + + device->completion_pool = pci_pool_create("completion_pool", pdev, + sizeof(u64), SMP_CACHE_BYTES, + SMP_CACHE_BYTES); + if (!device->completion_pool) { + err = -ENOMEM; + goto err_completion_pool; + } + + INIT_LIST_HEAD(&device->common.channels); + ioat_dma_enumerate_channels(device); + + device->common.device_alloc_chan_resources = + ioat_dma_alloc_chan_resources; + device->common.device_free_chan_resources = + ioat_dma_free_chan_resources; + device->common.dev = &pdev->dev; + + dma_cap_set(DMA_MEMCPY, device->common.cap_mask); + device->common.device_is_tx_complete = ioat_dma_is_complete; + switch (device->version) { + case IOAT_VER_1_2: + device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy; + device->common.device_issue_pending = + ioat1_dma_memcpy_issue_pending; + break; + case IOAT_VER_2_0: + case IOAT_VER_3_0: + device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy; + device->common.device_issue_pending = + ioat2_dma_memcpy_issue_pending; + break; + } + + dev_err(&device->pdev->dev, + "Intel(R) I/OAT DMA Engine found," + " %d channels, device version 0x%02x, driver version %s\n", + device->common.chancnt, device->version, IOAT_DMA_VERSION); + + err = ioat_dma_setup_interrupts(device); + if (err) + goto err_setup_interrupts; + + err = ioat_dma_self_test(device); + if (err) + goto err_self_test; + + ioat_set_tcp_copy_break(device); + + dma_async_device_register(&device->common); + + if (device->version != IOAT_VER_3_0) { + INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog); + schedule_delayed_work(&device->work, + WATCHDOG_DELAY); + } + + return device; + +err_self_test: + ioat_dma_remove_interrupts(device); +err_setup_interrupts: + pci_pool_destroy(device->completion_pool); +err_completion_pool: + pci_pool_destroy(device->dma_pool); +err_dma_pool: + kfree(device); +err_kzalloc: + dev_err(&pdev->dev, + "Intel(R) I/OAT DMA Engine initialization failed\n"); + return NULL; +} + +void ioat_dma_remove(struct ioatdma_device *device) +{ + struct dma_chan *chan, *_chan; + struct ioat_dma_chan *ioat_chan; + + ioat_dma_remove_interrupts(device); + + dma_async_device_unregister(&device->common); + + pci_pool_destroy(device->dma_pool); + pci_pool_destroy(device->completion_pool); + + iounmap(device->reg_base); + pci_release_regions(device->pdev); + pci_disable_device(device->pdev); + + if (device->version != IOAT_VER_3_0) { + cancel_delayed_work(&device->work); + } + + list_for_each_entry_safe(chan, _chan, + &device->common.channels, device_node) { + ioat_chan = to_ioat_chan(chan); + list_del(&chan->device_node); + kfree(ioat_chan); + } + kfree(device); +} + diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h new file mode 100644 index 0000000..a3306d0 --- /dev/null +++ b/drivers/dma/ioatdma.h @@ -0,0 +1,163 @@ +/* + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef IOATDMA_H +#define IOATDMA_H + +#include <linux/dmaengine.h> +#include "ioatdma_hw.h" +#include <linux/init.h> +#include <linux/dmapool.h> +#include <linux/cache.h> +#include <linux/pci_ids.h> +#include <net/tcp.h> + +#define IOAT_DMA_VERSION "3.30" + +enum ioat_interrupt { + none = 0, + msix_multi_vector = 1, + msix_single_vector = 2, + msi = 3, + intx = 4, +}; + +#define IOAT_LOW_COMPLETION_MASK 0xffffffc0 +#define IOAT_DMA_DCA_ANY_CPU ~0 +#define IOAT_WATCHDOG_PERIOD (2 * HZ) + + +/** + * struct ioatdma_device - internal representation of a IOAT device + * @pdev: PCI-Express device + * @reg_base: MMIO register space base address + * @dma_pool: for allocating DMA descriptors + * @common: embedded struct dma_device + * @version: version of ioatdma device + * @irq_mode: which style irq to use + * @msix_entries: irq handlers + * @idx: per channel data + */ + +struct ioatdma_device { + struct pci_dev *pdev; + void __iomem *reg_base; + struct pci_pool *dma_pool; + struct pci_pool *completion_pool; + struct dma_device common; + u8 version; + enum ioat_interrupt irq_mode; + struct delayed_work work; + struct msix_entry msix_entries[4]; + struct ioat_dma_chan *idx[4]; +}; + +/** + * struct ioat_dma_chan - internal representation of a DMA channel + */ +struct ioat_dma_chan { + + void __iomem *reg_base; + + dma_cookie_t completed_cookie; + unsigned long last_completion; + unsigned long last_completion_time; + + size_t xfercap; /* XFERCAP register value expanded out */ + + spinlock_t cleanup_lock; + spinlock_t desc_lock; + struct list_head free_desc; + struct list_head used_desc; + unsigned long watchdog_completion; + int watchdog_tcp_cookie; + u32 watchdog_last_tcp_cookie; + struct delayed_work work; + + int pending; + int dmacount; + int desccount; + + struct ioatdma_device *device; + struct dma_chan common; + + dma_addr_t completion_addr; + union { + u64 full; /* HW completion writeback */ + struct { + u32 low; + u32 high; + }; + } *completion_virt; + unsigned long last_compl_desc_addr_hw; + struct tasklet_struct cleanup_task; +}; + +/* wrapper around hardware descriptor format + additional software fields */ + +/** + * struct ioat_desc_sw - wrapper around hardware descriptor + * @hw: hardware DMA descriptor + * @node: this descriptor will either be on the free list, + * or attached to a transaction list (async_tx.tx_list) + * @tx_cnt: number of descriptors required to complete the transaction + * @async_tx: the generic software descriptor for all engines + */ +struct ioat_desc_sw { + struct ioat_dma_descriptor *hw; + struct list_head node; + int tx_cnt; + size_t len; + dma_addr_t src; + dma_addr_t dst; + struct dma_async_tx_descriptor async_tx; +}; + +static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev) +{ + #ifdef CONFIG_NET_DMA + switch (dev->version) { + case IOAT_VER_1_2: + case IOAT_VER_3_0: + sysctl_tcp_dma_copybreak = 4096; + break; + case IOAT_VER_2_0: + sysctl_tcp_dma_copybreak = 2048; + break; + } + #endif +} + +#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE) +struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev, + void __iomem *iobase); +void ioat_dma_remove(struct ioatdma_device *device); +struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase); +struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase); +#else +#define ioat_dma_probe(pdev, iobase) NULL +#define ioat_dma_remove(device) do { } while (0) +#define ioat_dca_init(pdev, iobase) NULL +#define ioat2_dca_init(pdev, iobase) NULL +#define ioat3_dca_init(pdev, iobase) NULL +#endif + +#endif /* IOATDMA_H */ diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h new file mode 100644 index 0000000..f1ae2c7 --- /dev/null +++ b/drivers/dma/ioatdma_hw.h @@ -0,0 +1,70 @@ +/* + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_HW_H_ +#define _IOAT_HW_H_ + +/* PCI Configuration Space Values */ +#define IOAT_PCI_VID 0x8086 + +/* CB device ID's */ +#define IOAT_PCI_DID_5000 0x1A38 +#define IOAT_PCI_DID_CNB 0x360B +#define IOAT_PCI_DID_SCNB 0x65FF +#define IOAT_PCI_DID_SNB 0x402F + +#define IOAT_PCI_RID 0x00 +#define IOAT_PCI_SVID 0x8086 +#define IOAT_PCI_SID 0x8086 +#define IOAT_VER_1_2 0x12 /* Version 1.2 */ +#define IOAT_VER_2_0 0x20 /* Version 2.0 */ +#define IOAT_VER_3_0 0x30 /* Version 3.0 */ + +struct ioat_dma_descriptor { + uint32_t size; + uint32_t ctl; + uint64_t src_addr; + uint64_t dst_addr; + uint64_t next; + uint64_t rsv1; + uint64_t rsv2; + uint64_t user1; + uint64_t user2; +}; + +#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001 +#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002 +#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004 +#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008 +#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010 +#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020 +#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040 +#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080 +#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100 +#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200 +#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400 + +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000 + +#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001 +#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000 + +#endif diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h new file mode 100644 index 0000000..827cb50 --- /dev/null +++ b/drivers/dma/ioatdma_registers.h @@ -0,0 +1,226 @@ +/* + * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ +#ifndef _IOAT_REGISTERS_H_ +#define _IOAT_REGISTERS_H_ + +#define IOAT_PCI_DMACTRL_OFFSET 0x48 +#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001 +#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002 + +#define IOAT_PCI_DEVICE_ID_OFFSET 0x02 +#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148 +#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184 + +/* MMIO Device Registers */ +#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */ + +#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */ +#define IOAT_XFERCAP_4KB 12 +#define IOAT_XFERCAP_8KB 13 +#define IOAT_XFERCAP_16KB 14 +#define IOAT_XFERCAP_32KB 15 +#define IOAT_XFERCAP_32GB 0 + +#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */ +#define IOAT_GENCTRL_DEBUG_EN 0x01 + +#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */ +#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */ +#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */ +#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */ +#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */ + +#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */ + +#define IOAT_VER_OFFSET 0x08 /* 8-bit */ +#define IOAT_VER_MAJOR_MASK 0xF0 +#define IOAT_VER_MINOR_MASK 0x0F +#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4) +#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK) + +#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */ + +#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */ +#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */ +#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */ + +#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */ +#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001 + +#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */ + +/* DMA Channel Registers */ +#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */ +#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000 +#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100 +#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020 +#define IOAT_CHANCTRL_ERR_INT_EN 0x0010 +#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008 +#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004 +#define IOAT_CHANCTRL_INT_DISABLE 0x0001 + +#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */ +#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */ +#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */ + + +#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */ +#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */ +#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET) +#define IOAT1_CHANSTS_OFFSET_LOW 0x04 +#define IOAT2_CHANSTS_OFFSET_LOW 0x08 +#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW) +#define IOAT1_CHANSTS_OFFSET_HIGH 0x08 +#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C +#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH) +#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F +#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010 +#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2 +#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3 + + + +#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */ + +#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */ +#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000 +#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */ + +/* CB DCA Memory Space Registers */ +#define IOAT_DCAOFFSET_OFFSET 0x14 +/* CB_BAR + IOAT_DCAOFFSET value */ +#define IOAT_DCA_VER_OFFSET 0x00 +#define IOAT_DCA_VER_MAJOR_MASK 0xF0 +#define IOAT_DCA_VER_MINOR_MASK 0x0F + +#define IOAT_DCA_COMP_OFFSET 0x02 +#define IOAT_DCA_COMP_V1 0x1 + +#define IOAT_FSB_CAPABILITY_OFFSET 0x04 +#define IOAT_FSB_CAPABILITY_PREFETCH 0x1 + +#define IOAT_PCI_CAPABILITY_OFFSET 0x06 +#define IOAT_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08 +#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1 + +#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A +#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1 + +#define IOAT_APICID_TAG_MAP_OFFSET 0x0C +#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F +#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0 +#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0 +#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4 +#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00 +#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8 +#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000 +#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12 +#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000 +#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16 +#define IOAT_APICID_TAG_CB2_VALID 0x8080808080 + +#define IOAT_DCA_GREQID_OFFSET 0x10 +#define IOAT_DCA_GREQID_SIZE 0x04 +#define IOAT_DCA_GREQID_MASK 0xFFFF +#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000 +#define IOAT_DCA_GREQID_VALID 0x20000000 +#define IOAT_DCA_GREQID_LASTID 0x80000000 + +#define IOAT3_CSI_CAPABILITY_OFFSET 0x08 +#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1 + +#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A +#define IOAT3_PCI_CAPABILITY_MEMWR 0x1 + +#define IOAT3_CSI_CONTROL_OFFSET 0x0C +#define IOAT3_CSI_CONTROL_PREFETCH 0x1 + +#define IOAT3_PCI_CONTROL_OFFSET 0x0E +#define IOAT3_PCI_CONTROL_MEMWR 0x1 + +#define IOAT3_APICID_TAG_MAP_OFFSET 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10 +#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14 + +#define IOAT3_DCA_GREQID_OFFSET 0x02 + +#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */ +#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */ +#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET) +#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C +#define IOAT2_CHAINADDR_OFFSET_LOW 0x10 +#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW) +#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10 +#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14 +#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH) + +#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */ +#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */ +#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \ + ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET) +#define IOAT_CHANCMD_RESET 0x20 +#define IOAT_CHANCMD_RESUME 0x10 +#define IOAT_CHANCMD_ABORT 0x08 +#define IOAT_CHANCMD_SUSPEND 0x04 +#define IOAT_CHANCMD_APPEND 0x02 +#define IOAT_CHANCMD_START 0x01 + +#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */ +#define IOAT_CHANCMP_OFFSET_LOW 0x18 +#define IOAT_CHANCMP_OFFSET_HIGH 0x1C + +#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */ +#define IOAT_CDAR_OFFSET_LOW 0x20 +#define IOAT_CDAR_OFFSET_HIGH 0x24 + +#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */ +#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001 +#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002 +#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004 +#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008 +#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010 +#define IOAT_CHANERR_CHANCMD_ERR 0x0020 +#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040 +#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080 +#define IOAT_CHANERR_READ_DATA_ERR 0x0100 +#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200 +#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400 +#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800 +#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000 +#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000 +#define IOAT_CHANERR_SOFT_ERR 0x4000 +#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000 + +#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */ + +#endif /* _IOAT_REGISTERS_H_ */ diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c new file mode 100644 index 0000000..6be3172 --- /dev/null +++ b/drivers/dma/iop-adma.c @@ -0,0 +1,1448 @@ +/* + * offload engine driver for the Intel Xscale series of i/o processors + * Copyright © 2006, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + */ + +/* + * This driver supports the asynchrounous DMA copy and RAID engines available + * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x) + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/async_tx.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/memory.h> +#include <linux/ioport.h> + +#include <mach/adma.h> + +#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common) +#define to_iop_adma_device(dev) \ + container_of(dev, struct iop_adma_device, common) +#define tx_to_iop_adma_slot(tx) \ + container_of(tx, struct iop_adma_desc_slot, async_tx) + +/** + * iop_adma_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &iop_chan->lock while calling this function + */ +static void iop_adma_free_slots(struct iop_adma_desc_slot *slot) +{ + int stride = slot->slots_per_op; + + while (stride--) { + slot->slots_per_op = 0; + slot = list_entry(slot->slot_node.next, + struct iop_adma_desc_slot, + slot_node); + } +} + +static dma_cookie_t +iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *iop_chan, dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + desc->async_tx.cookie = 0; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + */ + if (desc->group_head && desc->unmap_len) { + struct iop_adma_desc_slot *unmap = desc->group_head; + struct device *dev = + &iop_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = desc->async_tx.flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = iop_desc_get_dest_addr(unmap, iop_chan); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = iop_desc_get_src_addr(unmap, + iop_chan, + src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, + DMA_TO_DEVICE); + } + } + desc->group_head = NULL; + } + } + + /* run dependent operations */ + async_tx_run_dependencies(&desc->async_tx); + + return cookie; +} + +static int +iop_adma_clean_slot(struct iop_adma_desc_slot *desc, + struct iop_adma_chan *iop_chan) +{ + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) + return 0; + + /* leave the last descriptor in the chain + * so we can append to it + */ + if (desc->chain_node.next == &iop_chan->chain) + return 1; + + dev_dbg(iop_chan->device->common.dev, + "\tfree slot: %d slots_per_op: %d\n", + desc->idx, desc->slots_per_op); + + list_del(&desc->chain_node); + iop_adma_free_slots(desc); + + return 0; +} + +static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL; + dma_cookie_t cookie = 0; + u32 current_desc = iop_chan_get_current_descriptor(iop_chan); + int busy = iop_chan_is_busy(iop_chan); + int seen_current = 0, slot_cnt = 0, slots_per_op = 0; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + /* free completed slots from the chain starting with + * the oldest descriptor + */ + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, + chain_node) { + pr_debug("\tcookie: %d slot: %d busy: %d " + "this_desc: %#x next_desc: %#x ack: %d\n", + iter->async_tx.cookie, iter->idx, busy, + iter->async_tx.phys, iop_desc_get_next_desc(iter), + async_tx_test_ack(&iter->async_tx)); + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy, or if it appears that the current descriptor + * needs to be re-read (i.e. has been appended to) + */ + if (iter->async_tx.phys == current_desc) { + BUG_ON(seen_current++); + if (busy || iop_desc_get_next_desc(iter)) + break; + } + + /* detect the start of a group transaction */ + if (!slot_cnt && !slots_per_op) { + slot_cnt = iter->slot_cnt; + slots_per_op = iter->slots_per_op; + if (slot_cnt <= slots_per_op) { + slot_cnt = 0; + slots_per_op = 0; + } + } + + if (slot_cnt) { + pr_debug("\tgroup++\n"); + if (!grp_start) + grp_start = iter; + slot_cnt -= slots_per_op; + } + + /* all the members of a group are complete */ + if (slots_per_op != 0 && slot_cnt == 0) { + struct iop_adma_desc_slot *grp_iter, *_grp_iter; + int end_of_chain = 0; + pr_debug("\tgroup end\n"); + + /* collect the total results */ + if (grp_start->xor_check_result) { + u32 zero_sum_result = 0; + slot_cnt = grp_start->slot_cnt; + grp_iter = grp_start; + + list_for_each_entry_from(grp_iter, + &iop_chan->chain, chain_node) { + zero_sum_result |= + iop_desc_get_zero_result(grp_iter); + pr_debug("\titer%d result: %d\n", + grp_iter->idx, zero_sum_result); + slot_cnt -= slots_per_op; + if (slot_cnt == 0) + break; + } + pr_debug("\tgrp_start->xor_check_result: %p\n", + grp_start->xor_check_result); + *grp_start->xor_check_result = zero_sum_result; + } + + /* clean up the group */ + slot_cnt = grp_start->slot_cnt; + grp_iter = grp_start; + list_for_each_entry_safe_from(grp_iter, _grp_iter, + &iop_chan->chain, chain_node) { + cookie = iop_adma_run_tx_complete_actions( + grp_iter, iop_chan, cookie); + + slot_cnt -= slots_per_op; + end_of_chain = iop_adma_clean_slot(grp_iter, + iop_chan); + + if (slot_cnt == 0 || end_of_chain) + break; + } + + /* the group should be complete at this point */ + BUG_ON(slot_cnt); + + slots_per_op = 0; + grp_start = NULL; + if (end_of_chain) + break; + else + continue; + } else if (slots_per_op) /* wait for group completion */ + continue; + + /* write back zero sum results (single descriptor case) */ + if (iter->xor_check_result && iter->async_tx.cookie) + *iter->xor_check_result = + iop_desc_get_zero_result(iter); + + cookie = iop_adma_run_tx_complete_actions( + iter, iop_chan, cookie); + + if (iop_adma_clean_slot(iter, iop_chan)) + break; + } + + BUG_ON(!seen_current); + + if (cookie > 0) { + iop_chan->completed_cookie = cookie; + pr_debug("\tcompleted cookie %d\n", cookie); + } +} + +static void +iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan) +{ + spin_lock_bh(&iop_chan->lock); + __iop_adma_slot_cleanup(iop_chan); + spin_unlock_bh(&iop_chan->lock); +} + +static void iop_adma_tasklet(unsigned long data) +{ + struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data; + + spin_lock(&iop_chan->lock); + __iop_adma_slot_cleanup(iop_chan); + spin_unlock(&iop_chan->lock); +} + +static struct iop_adma_desc_slot * +iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots, + int slots_per_op) +{ + struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL; + LIST_HEAD(chain); + int slots_found, retry = 0; + + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = iop_chan->last_used; + else + iter = list_entry(&iop_chan->all_slots, + struct iop_adma_desc_slot, + slot_node); + + list_for_each_entry_safe_continue( + iter, _iter, &iop_chan->all_slots, slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + /* give up after finding the first busy slot + * on the second pass through the list + */ + if (retry) + break; + + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) { + if (iop_desc_is_aligned(iter, slots_per_op)) + alloc_start = iter; + else { + slots_found = 0; + continue; + } + } + + if (slots_found == num_slots) { + struct iop_adma_desc_slot *alloc_tail = NULL; + struct iop_adma_desc_slot *last_used = NULL; + iter = alloc_start; + while (num_slots) { + int i; + dev_dbg(iop_chan->device->common.dev, + "allocated slot: %d " + "(desc %p phys: %#x) slots_per_op %d\n", + iter->idx, iter->hw_desc, + iter->async_tx.phys, slots_per_op); + + /* pre-ack all but the last descriptor */ + if (num_slots != slots_per_op) + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct iop_adma_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->async_tx.tx_list); + iop_chan->last_used = last_used; + iop_desc_clear_next_desc(alloc_start); + iop_desc_clear_next_desc(alloc_tail); + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* perform direct reclaim if the allocation fails */ + __iop_adma_slot_cleanup(iop_chan); + + return NULL; +} + +static dma_cookie_t +iop_desc_assign_cookie(struct iop_adma_chan *iop_chan, + struct iop_adma_desc_slot *desc) +{ + dma_cookie_t cookie = iop_chan->common.cookie; + cookie++; + if (cookie < 0) + cookie = 1; + iop_chan->common.cookie = desc->async_tx.cookie = cookie; + return cookie; +} + +static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan) +{ + dev_dbg(iop_chan->device->common.dev, "pending: %d\n", + iop_chan->pending); + + if (iop_chan->pending >= IOP_ADMA_THRESHOLD) { + iop_chan->pending = 0; + iop_chan_append(iop_chan); + } +} + +static dma_cookie_t +iop_adma_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx); + struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan); + struct iop_adma_desc_slot *grp_start, *old_chain_tail; + int slot_cnt; + int slots_per_op; + dma_cookie_t cookie; + dma_addr_t next_dma; + + grp_start = sw_desc->group_head; + slot_cnt = grp_start->slot_cnt; + slots_per_op = grp_start->slots_per_op; + + spin_lock_bh(&iop_chan->lock); + cookie = iop_desc_assign_cookie(iop_chan, sw_desc); + + old_chain_tail = list_entry(iop_chan->chain.prev, + struct iop_adma_desc_slot, chain_node); + list_splice_init(&sw_desc->async_tx.tx_list, + &old_chain_tail->chain_node); + + /* fix up the hardware chain */ + next_dma = grp_start->async_tx.phys; + iop_desc_set_next_desc(old_chain_tail, next_dma); + BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */ + + /* check for pre-chained descriptors */ + iop_paranoia(iop_desc_get_next_desc(sw_desc)); + + /* increment the pending count by the number of slots + * memcpy operations have a 1:1 (slot:operation) relation + * other operations are heavier and will pop the threshold + * more often. + */ + iop_chan->pending += slot_cnt; + iop_adma_check_threshold(iop_chan); + spin_unlock_bh(&iop_chan->lock); + + dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n", + __func__, sw_desc->async_tx.cookie, sw_desc->idx); + + return cookie; +} + +static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan); +static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan); + +/** + * iop_adma_alloc_chan_resources - returns the number of allocated descriptors + * @chan - allocate descriptor resources for this channel + * @client - current client requesting the channel be ready for requests + * + * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To + * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be + * greater than 2x the number slots needed to satisfy a device->max_xor + * request. + * */ +static int iop_adma_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + char *hw_desc; + int idx; + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *slot = NULL; + int init = iop_chan->slots_allocated ? 0 : 1; + struct iop_adma_platform_data *plat_data = + iop_chan->device->pdev->dev.platform_data; + int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE; + + /* Allocate descriptor slots */ + do { + idx = iop_chan->slots_allocated; + if (idx == num_descs_in_pool) + break; + + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "IOP ADMA Channel only initialized" + " %d descriptor slots", idx); + break; + } + hw_desc = (char *) iop_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = iop_adma_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->async_tx.tx_list); + hw_desc = (char *) iop_chan->device->dma_desc_pool; + slot->async_tx.phys = + (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE]; + slot->idx = idx; + + spin_lock_bh(&iop_chan->lock); + iop_chan->slots_allocated++; + list_add_tail(&slot->slot_node, &iop_chan->all_slots); + spin_unlock_bh(&iop_chan->lock); + } while (iop_chan->slots_allocated < num_descs_in_pool); + + if (idx && !iop_chan->last_used) + iop_chan->last_used = list_entry(iop_chan->all_slots.next, + struct iop_adma_desc_slot, + slot_node); + + dev_dbg(iop_chan->device->common.dev, + "allocated %d descriptor slots last_used: %p\n", + iop_chan->slots_allocated, iop_chan->last_used); + + /* initialize the channel and the chain with a null operation */ + if (init) { + if (dma_has_cap(DMA_MEMCPY, + iop_chan->device->common.cap_mask)) + iop_chan_start_null_memcpy(iop_chan); + else if (dma_has_cap(DMA_XOR, + iop_chan->device->common.cap_mask)) + iop_chan_start_null_xor(iop_chan); + else + BUG(); + } + + return (idx > 0) ? idx : -ENOMEM; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_interrupt(grp_start, iop_chan); + grp_start->unmap_len = 0; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t dma_src, size_t len, unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", + __func__, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_memcpy(grp_start, flags); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); + iop_desc_set_memcpy_src_addr(grp_start, dma_src); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest, + int value, size_t len, unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT)); + + dev_dbg(iop_chan->device->common.dev, "%s len: %u\n", + __func__, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_memset(grp_start, flags); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_block_fill_val(grp_start, value); + iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest, + dma_addr_t *dma_src, unsigned int src_cnt, size_t len, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT)); + + dev_dbg(iop_chan->device->common.dev, + "%s src_cnt: %d len: %u flags: %lx\n", + __func__, src_cnt, len, flags); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_xor(grp_start, src_cnt, flags); + iop_desc_set_byte_count(grp_start, iop_chan, len); + iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_xor_src_addr(grp_start, src_cnt, + dma_src[src_cnt]); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src, + unsigned int src_cnt, size_t len, u32 *result, + unsigned long flags) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *sw_desc, *grp_start; + int slot_cnt, slots_per_op; + + if (unlikely(!len)) + return NULL; + + dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n", + __func__, src_cnt, len); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + iop_desc_init_zero_sum(grp_start, src_cnt, flags); + iop_desc_set_zero_sum_byte_count(grp_start, len); + grp_start->xor_check_result = result; + pr_debug("\t%s: grp_start->xor_check_result: %p\n", + __func__, grp_start->xor_check_result); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + sw_desc->async_tx.flags = flags; + while (src_cnt--) + iop_desc_set_zero_sum_src_addr(grp_start, src_cnt, + dma_src[src_cnt]); + } + spin_unlock_bh(&iop_chan->lock); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void iop_adma_free_chan_resources(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + struct iop_adma_desc_slot *iter, *_iter; + int in_use_descs = 0; + + iop_adma_slot_cleanup(iop_chan); + + spin_lock_bh(&iop_chan->lock); + list_for_each_entry_safe(iter, _iter, &iop_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe_reverse( + iter, _iter, &iop_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + iop_chan->slots_allocated--; + } + iop_chan->last_used = NULL; + + dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n", + __func__, iop_chan->slots_allocated); + spin_unlock_bh(&iop_chan->lock); + + /* one is ok since we left it on there on purpose */ + if (in_use_descs > 1) + printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n", + in_use_descs - 1); +} + +/** + * iop_adma_is_complete - poll the status of an ADMA transaction + * @chan: ADMA channel handle + * @cookie: ADMA transaction identifier + */ +static enum dma_status iop_adma_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + last_used = chan->cookie; + last_complete = iop_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) + return ret; + + iop_adma_slot_cleanup(iop_chan); + + last_used = chan->cookie; + last_complete = iop_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static irqreturn_t iop_adma_eot_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, "%s\n", __func__); + + tasklet_schedule(&chan->irq_tasklet); + + iop_adma_device_clear_eot_status(chan); + + return IRQ_HANDLED; +} + +static irqreturn_t iop_adma_eoc_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + + dev_dbg(chan->device->common.dev, "%s\n", __func__); + + tasklet_schedule(&chan->irq_tasklet); + + iop_adma_device_clear_eoc_status(chan); + + return IRQ_HANDLED; +} + +static irqreturn_t iop_adma_err_handler(int irq, void *data) +{ + struct iop_adma_chan *chan = data; + unsigned long status = iop_chan_get_status(chan); + + dev_printk(KERN_ERR, chan->device->common.dev, + "error ( %s%s%s%s%s%s%s)\n", + iop_is_err_int_parity(status, chan) ? "int_parity " : "", + iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "", + iop_is_err_int_tabort(status, chan) ? "int_tabort " : "", + iop_is_err_int_mabort(status, chan) ? "int_mabort " : "", + iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "", + iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "", + iop_is_err_split_tx(status, chan) ? "split_tx " : ""); + + iop_adma_device_clear_err_status(chan); + + BUG(); + + return IRQ_HANDLED; +} + +static void iop_adma_issue_pending(struct dma_chan *chan) +{ + struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan); + + if (iop_chan->pending) { + iop_chan->pending = 0; + iop_chan_append(iop_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ +#define IOP_ADMA_TEST_SIZE 2000 + +static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device) +{ + int i; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + int err = 0; + struct iop_adma_chan *iop_chan; + + dev_dbg(device->common.dev, "%s\n", __func__); + + src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < IOP_ADMA_TEST_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) { + err = -ENODEV; + goto out; + } + + dest_dma = dma_map_single(dma_chan->device->dev, dest, + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); + src_dma = dma_map_single(dma_chan->device->dev, src, + IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE); + tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma, + IOP_ADMA_TEST_SIZE, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(1); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + iop_chan = to_iop_adma_chan(dma_chan); + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, + IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */ +static int __devinit +iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST]; + struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; + dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1]; + dma_addr_t dma_addr, dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + u32 zero_sum_result; + int err = 0; + struct iop_adma_chan *iop_chan; + + dev_dbg(device->common.dev, "%s\n", __func__); + + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + iop_chan = to_iop_adma_chan(dma_chan); + dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + } + dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_TO_DEVICE); + + /* skip zero sum if the capability is not present */ + if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask)) + goto free_resources; + + /* zero sum the sources with the destintation page */ + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++) + zero_sum_srcs[i] = xor_srcs[i]; + zero_sum_srcs[i] = dest; + + zero_sum_result = 1; + + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, + zero_sum_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test zero sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 0) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test zero sum failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + /* test memset */ + dma_addr = dma_map_page(dma_chan->device->dev, dest, 0, + PAGE_SIZE, DMA_FROM_DEVICE); + tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test memset timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) { + u32 *ptr = page_address(dest); + if (ptr[i]) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test memset failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + } + + /* test for non-zero parity sum */ + zero_sum_result = 0; + for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, + zero_sum_srcs[i], 0, PAGE_SIZE, + DMA_TO_DEVICE); + tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs, + IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE, + &zero_sum_result, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + + cookie = iop_adma_tx_submit(tx); + iop_adma_issue_pending(dma_chan); + msleep(8); + + if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test non-zero sum timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + if (zero_sum_result != 1) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test non-zero sum failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + iop_adma_free_chan_resources(dma_chan); +out: + src_idx = IOP_ADMA_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devexit iop_adma_remove(struct platform_device *dev) +{ + struct iop_adma_device *device = platform_get_drvdata(dev); + struct dma_chan *chan, *_chan; + struct iop_adma_chan *iop_chan; + int i; + struct iop_adma_platform_data *plat_data = dev->dev.platform_data; + + dma_async_device_unregister(&device->common); + + for (i = 0; i < 3; i++) { + unsigned int irq; + irq = platform_get_irq(dev, i); + free_irq(irq, device); + } + + dma_free_coherent(&dev->dev, plat_data->pool_size, + device->dma_desc_pool_virt, device->dma_desc_pool); + + do { + struct resource *res; + res = platform_get_resource(dev, IORESOURCE_MEM, 0); + release_mem_region(res->start, res->end - res->start); + } while (0); + + list_for_each_entry_safe(chan, _chan, &device->common.channels, + device_node) { + iop_chan = to_iop_adma_chan(chan); + list_del(&chan->device_node); + kfree(iop_chan); + } + kfree(device); + + return 0; +} + +static int __devinit iop_adma_probe(struct platform_device *pdev) +{ + struct resource *res; + int ret = 0, i; + struct iop_adma_device *adev; + struct iop_adma_chan *iop_chan; + struct dma_device *dma_dev; + struct iop_adma_platform_data *plat_data = pdev->dev.platform_data; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + if (!devm_request_mem_region(&pdev->dev, res->start, + res->end - res->start, pdev->name)) + return -EBUSY; + + adev = kzalloc(sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + dma_dev = &adev->common; + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL)) == NULL) { + ret = -ENOMEM; + goto err_free_adev; + } + + dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n", + __func__, adev->dma_desc_pool_virt, + (void *) adev->dma_desc_pool); + + adev->id = plat_data->hw_id; + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = plat_data->cap_mask; + + adev->pdev = pdev; + platform_set_drvdata(pdev, adev); + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources; + dma_dev->device_free_chan_resources = iop_adma_free_chan_resources; + dma_dev->device_is_tx_complete = iop_adma_is_complete; + dma_dev->device_issue_pending = iop_adma_issue_pending; + dma_dev->dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = iop_adma_get_max_xor(); + dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor; + } + if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask)) + dma_dev->device_prep_dma_zero_sum = + iop_adma_prep_dma_zero_sum; + if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask)) + dma_dev->device_prep_dma_interrupt = + iop_adma_prep_dma_interrupt; + + iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL); + if (!iop_chan) { + ret = -ENOMEM; + goto err_free_dma; + } + iop_chan->device = adev; + + iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start, + res->end - res->start); + if (!iop_chan->mmr_base) { + ret = -ENOMEM; + goto err_free_iop_chan; + } + tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long) + iop_chan); + + /* clear errors before enabling interrupts */ + iop_adma_device_clear_err_status(iop_chan); + + for (i = 0; i < 3; i++) { + irq_handler_t handler[] = { iop_adma_eot_handler, + iop_adma_eoc_handler, + iop_adma_err_handler }; + int irq = platform_get_irq(pdev, i); + if (irq < 0) { + ret = -ENXIO; + goto err_free_iop_chan; + } else { + ret = devm_request_irq(&pdev->dev, irq, + handler[i], 0, pdev->name, iop_chan); + if (ret) + goto err_free_iop_chan; + } + } + + spin_lock_init(&iop_chan->lock); + INIT_LIST_HEAD(&iop_chan->chain); + INIT_LIST_HEAD(&iop_chan->all_slots); + INIT_RCU_HEAD(&iop_chan->common.rcu); + iop_chan->common.device = dma_dev; + list_add_tail(&iop_chan->common.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = iop_adma_memcpy_self_test(adev); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_iop_chan; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) || + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) { + ret = iop_adma_xor_zero_sum_self_test(adev); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_iop_chan; + } + + dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: " + "( %s%s%s%s%s%s%s%s%s%s)\n", + dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "", + dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "", + dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "", + dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "", + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", + dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + dma_async_device_register(dma_dev); + goto out; + + err_free_iop_chan: + kfree(iop_chan); + err_free_dma: + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + err_free_adev: + kfree(adev); + out: + return ret; +} + +static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *sw_desc, *grp_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + + list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + async_tx_ack(&sw_desc->async_tx); + iop_desc_init_memcpy(grp_start, 0); + iop_desc_set_byte_count(grp_start, iop_chan, 0); + iop_desc_set_dest_addr(grp_start, iop_chan, 0); + iop_desc_set_memcpy_src_addr(grp_start, 0); + + cookie = iop_chan->common.cookie; + cookie++; + if (cookie <= 1) + cookie = 2; + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + iop_chan->completed_cookie = cookie - 1; + iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; + + /* channel should not be busy */ + BUG_ON(iop_chan_is_busy(iop_chan)); + + /* clear any prior error-status bits */ + iop_adma_device_clear_err_status(iop_chan); + + /* disable operation */ + iop_chan_disable(iop_chan); + + /* set the descriptor address */ + iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* run the descriptor */ + iop_chan_enable(iop_chan); + } else + dev_printk(KERN_ERR, iop_chan->device->common.dev, + "failed to allocate null descriptor\n"); + spin_unlock_bh(&iop_chan->lock); +} + +static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan) +{ + struct iop_adma_desc_slot *sw_desc, *grp_start; + dma_cookie_t cookie; + int slot_cnt, slots_per_op; + + dev_dbg(iop_chan->device->common.dev, "%s\n", __func__); + + spin_lock_bh(&iop_chan->lock); + slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op); + sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op); + if (sw_desc) { + grp_start = sw_desc->group_head; + list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain); + async_tx_ack(&sw_desc->async_tx); + iop_desc_init_null_xor(grp_start, 2, 0); + iop_desc_set_byte_count(grp_start, iop_chan, 0); + iop_desc_set_dest_addr(grp_start, iop_chan, 0); + iop_desc_set_xor_src_addr(grp_start, 0, 0); + iop_desc_set_xor_src_addr(grp_start, 1, 0); + + cookie = iop_chan->common.cookie; + cookie++; + if (cookie <= 1) + cookie = 2; + + /* initialize the completed cookie to be less than + * the most recently used cookie + */ + iop_chan->completed_cookie = cookie - 1; + iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie; + + /* channel should not be busy */ + BUG_ON(iop_chan_is_busy(iop_chan)); + + /* clear any prior error-status bits */ + iop_adma_device_clear_err_status(iop_chan); + + /* disable operation */ + iop_chan_disable(iop_chan); + + /* set the descriptor address */ + iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys); + + /* 1/ don't add pre-chained descriptors + * 2/ dummy read to flush next_desc write + */ + BUG_ON(iop_desc_get_next_desc(sw_desc)); + + /* run the descriptor */ + iop_chan_enable(iop_chan); + } else + dev_printk(KERN_ERR, iop_chan->device->common.dev, + "failed to allocate null descriptor\n"); + spin_unlock_bh(&iop_chan->lock); +} + +MODULE_ALIAS("platform:iop-adma"); + +static struct platform_driver iop_adma_driver = { + .probe = iop_adma_probe, + .remove = iop_adma_remove, + .driver = { + .owner = THIS_MODULE, + .name = "iop-adma", + }, +}; + +static int __init iop_adma_init (void) +{ + return platform_driver_register(&iop_adma_driver); +} + +/* it's currently unsafe to unload this module */ +#if 0 +static void __exit iop_adma_exit (void) +{ + platform_driver_unregister(&iop_adma_driver); + return; +} +module_exit(iop_adma_exit); +#endif + +module_init(iop_adma_init); + +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("IOP ADMA Engine Driver"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c new file mode 100644 index 0000000..9f6fe46 --- /dev/null +++ b/drivers/dma/iovlock.c @@ -0,0 +1,269 @@ +/* + * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved. + * Portions based on net/core/datagram.c and copyrighted by their authors. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., 59 + * Temple Place - Suite 330, Boston, MA 02111-1307, USA. + * + * The full GNU General Public License is included in this distribution in the + * file called COPYING. + */ + +/* + * This code allows the net stack to make use of a DMA engine for + * skb to iovec copies. + */ + +#include <linux/dmaengine.h> +#include <linux/pagemap.h> +#include <net/tcp.h> /* for memcpy_toiovec */ +#include <asm/io.h> +#include <asm/uaccess.h> + +static int num_pages_spanned(struct iovec *iov) +{ + return + ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) - + ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT); +} + +/* + * Pin down all the iovec pages needed for len bytes. + * Return a struct dma_pinned_list to keep track of pages pinned down. + * + * We are allocating a single chunk of memory, and then carving it up into + * 3 sections, the latter 2 whose size depends on the number of iovecs and the + * total number of pages, respectively. + */ +struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len) +{ + struct dma_pinned_list *local_list; + struct page **pages; + int i; + int ret; + int nr_iovecs = 0; + int iovec_len_used = 0; + int iovec_pages_used = 0; + + /* don't pin down non-user-based iovecs */ + if (segment_eq(get_fs(), KERNEL_DS)) + return NULL; + + /* determine how many iovecs/pages there are, up front */ + do { + iovec_len_used += iov[nr_iovecs].iov_len; + iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]); + nr_iovecs++; + } while (iovec_len_used < len); + + /* single kmalloc for pinned list, page_list[], and the page arrays */ + local_list = kmalloc(sizeof(*local_list) + + (nr_iovecs * sizeof (struct dma_page_list)) + + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL); + if (!local_list) + goto out; + + /* list of pages starts right after the page list array */ + pages = (struct page **) &local_list->page_list[nr_iovecs]; + + local_list->nr_iovecs = 0; + + for (i = 0; i < nr_iovecs; i++) { + struct dma_page_list *page_list = &local_list->page_list[i]; + + len -= iov[i].iov_len; + + if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len)) + goto unpin; + + page_list->nr_pages = num_pages_spanned(&iov[i]); + page_list->base_address = iov[i].iov_base; + + page_list->pages = pages; + pages += page_list->nr_pages; + + /* pin pages down */ + down_read(¤t->mm->mmap_sem); + ret = get_user_pages( + current, + current->mm, + (unsigned long) iov[i].iov_base, + page_list->nr_pages, + 1, /* write */ + 0, /* force */ + page_list->pages, + NULL); + up_read(¤t->mm->mmap_sem); + + if (ret != page_list->nr_pages) + goto unpin; + + local_list->nr_iovecs = i + 1; + } + + return local_list; + +unpin: + dma_unpin_iovec_pages(local_list); +out: + return NULL; +} + +void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list) +{ + int i, j; + + if (!pinned_list) + return; + + for (i = 0; i < pinned_list->nr_iovecs; i++) { + struct dma_page_list *page_list = &pinned_list->page_list[i]; + for (j = 0; j < page_list->nr_pages; j++) { + set_page_dirty_lock(page_list->pages[j]); + page_cache_release(page_list->pages[j]); + } + } + + kfree(pinned_list); +} + + +/* + * We have already pinned down the pages we will be using in the iovecs. + * Each entry in iov array has corresponding entry in pinned_list->page_list. + * Using array indexing to keep iov[] and page_list[] in sync. + * Initial elements in iov array's iov->iov_len will be 0 if already copied into + * by another call. + * iov array length remaining guaranteed to be bigger than len. + */ +dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len) +{ + int iov_byte_offset; + int copy; + dma_cookie_t dma_cookie = 0; + int iovec_idx; + int page_idx; + + if (!chan) + return memcpy_toiovec(iov, kdata, len); + + iovec_idx = 0; + while (iovec_idx < pinned_list->nr_iovecs) { + struct dma_page_list *page_list; + + /* skip already used-up iovecs */ + while (!iov[iovec_idx].iov_len) + iovec_idx++; + + page_list = &pinned_list->page_list[iovec_idx]; + + iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK); + page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK) + - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT; + + /* break up copies to not cross page boundary */ + while (iov[iovec_idx].iov_len) { + copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); + copy = min_t(int, copy, iov[iovec_idx].iov_len); + + dma_cookie = dma_async_memcpy_buf_to_pg(chan, + page_list->pages[page_idx], + iov_byte_offset, + kdata, + copy); + + len -= copy; + iov[iovec_idx].iov_len -= copy; + iov[iovec_idx].iov_base += copy; + + if (!len) + return dma_cookie; + + kdata += copy; + iov_byte_offset = 0; + page_idx++; + } + iovec_idx++; + } + + /* really bad if we ever run out of iovecs */ + BUG(); + return -EFAULT; +} + +dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov, + struct dma_pinned_list *pinned_list, struct page *page, + unsigned int offset, size_t len) +{ + int iov_byte_offset; + int copy; + dma_cookie_t dma_cookie = 0; + int iovec_idx; + int page_idx; + int err; + + /* this needs as-yet-unimplemented buf-to-buff, so punt. */ + /* TODO: use dma for this */ + if (!chan || !pinned_list) { + u8 *vaddr = kmap(page); + err = memcpy_toiovec(iov, vaddr + offset, len); + kunmap(page); + return err; + } + + iovec_idx = 0; + while (iovec_idx < pinned_list->nr_iovecs) { + struct dma_page_list *page_list; + + /* skip already used-up iovecs */ + while (!iov[iovec_idx].iov_len) + iovec_idx++; + + page_list = &pinned_list->page_list[iovec_idx]; + + iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK); + page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK) + - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT; + + /* break up copies to not cross page boundary */ + while (iov[iovec_idx].iov_len) { + copy = min_t(int, PAGE_SIZE - iov_byte_offset, len); + copy = min_t(int, copy, iov[iovec_idx].iov_len); + + dma_cookie = dma_async_memcpy_pg_to_pg(chan, + page_list->pages[page_idx], + iov_byte_offset, + page, + offset, + copy); + + len -= copy; + iov[iovec_idx].iov_len -= copy; + iov[iovec_idx].iov_base += copy; + + if (!len) + return dma_cookie; + + offset += copy; + iov_byte_offset = 0; + page_idx++; + } + iovec_idx++; + } + + /* really bad if we ever run out of iovecs */ + BUG(); + return -EFAULT; +} diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c new file mode 100644 index 0000000..bcda174 --- /dev/null +++ b/drivers/dma/mv_xor.c @@ -0,0 +1,1384 @@ +/* + * offload engine driver for the Marvell XOR engine + * Copyright (C) 2007, 2008, Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program; if not, write to the Free Software Foundation, Inc., + * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/async_tx.h> +#include <linux/delay.h> +#include <linux/dma-mapping.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/platform_device.h> +#include <linux/memory.h> +#include <plat/mv_xor.h> +#include "mv_xor.h" + +static void mv_xor_issue_pending(struct dma_chan *chan); + +#define to_mv_xor_chan(chan) \ + container_of(chan, struct mv_xor_chan, common) + +#define to_mv_xor_device(dev) \ + container_of(dev, struct mv_xor_device, common) + +#define to_mv_xor_slot(tx) \ + container_of(tx, struct mv_xor_desc_slot, async_tx) + +static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + + hw_desc->status = (1 << 31); + hw_desc->phy_next_desc = 0; + hw_desc->desc_command = (1 << 31); +} + +static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + return hw_desc->phy_dest_addr; +} + +static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc, + int src_idx) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + return hw_desc->phy_src_addr[src_idx]; +} + + +static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc, + u32 byte_count) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->byte_count = byte_count; +} + +static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc, + u32 next_desc_addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + BUG_ON(hw_desc->phy_next_desc); + hw_desc->phy_next_desc = next_desc_addr; +} + +static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_next_desc = 0; +} + +static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val) +{ + desc->value = val; +} + +static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc, + dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_dest_addr = addr; +} + +static int mv_chan_memset_slot_count(size_t len) +{ + return 1; +} + +#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c) + +static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc, + int index, dma_addr_t addr) +{ + struct mv_xor_desc *hw_desc = desc->hw_desc; + hw_desc->phy_src_addr[index] = addr; + if (desc->type == DMA_XOR) + hw_desc->desc_command |= (1 << index); +} + +static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan) +{ + return __raw_readl(XOR_CURR_DESC(chan)); +} + +static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan, + u32 next_desc_addr) +{ + __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan)); +} + +static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr) +{ + __raw_writel(desc_addr, XOR_DEST_POINTER(chan)); +} + +static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size) +{ + __raw_writel(block_size, XOR_BLOCK_SIZE(chan)); +} + +static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value) +{ + __raw_writel(value, XOR_INIT_VALUE_LOW(chan)); + __raw_writel(value, XOR_INIT_VALUE_HIGH(chan)); +} + +static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan) +{ + u32 val = __raw_readl(XOR_INTR_MASK(chan)); + val |= XOR_INTR_MASK_VALUE << (chan->idx * 16); + __raw_writel(val, XOR_INTR_MASK(chan)); +} + +static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan) +{ + u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan)); + intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF; + return intr_cause; +} + +static int mv_is_err_intr(u32 intr_cause) +{ + if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9))) + return 1; + + return 0; +} + +static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan) +{ + u32 val = (1 << (1 + (chan->idx * 16))); + dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val); + __raw_writel(val, XOR_INTR_CAUSE(chan)); +} + +static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan) +{ + u32 val = 0xFFFF0000 >> (chan->idx * 16); + __raw_writel(val, XOR_INTR_CAUSE(chan)); +} + +static int mv_can_chain(struct mv_xor_desc_slot *desc) +{ + struct mv_xor_desc_slot *chain_old_tail = list_entry( + desc->chain_node.prev, struct mv_xor_desc_slot, chain_node); + + if (chain_old_tail->type != desc->type) + return 0; + if (desc->type == DMA_MEMSET) + return 0; + + return 1; +} + +static void mv_set_mode(struct mv_xor_chan *chan, + enum dma_transaction_type type) +{ + u32 op_mode; + u32 config = __raw_readl(XOR_CONFIG(chan)); + + switch (type) { + case DMA_XOR: + op_mode = XOR_OPERATION_MODE_XOR; + break; + case DMA_MEMCPY: + op_mode = XOR_OPERATION_MODE_MEMCPY; + break; + case DMA_MEMSET: + op_mode = XOR_OPERATION_MODE_MEMSET; + break; + default: + dev_printk(KERN_ERR, chan->device->common.dev, + "error: unsupported operation %d.\n", + type); + BUG(); + return; + } + + config &= ~0x7; + config |= op_mode; + __raw_writel(config, XOR_CONFIG(chan)); + chan->current_type = type; +} + +static void mv_chan_activate(struct mv_xor_chan *chan) +{ + u32 activation; + + dev_dbg(chan->device->common.dev, " activate chan.\n"); + activation = __raw_readl(XOR_ACTIVATION(chan)); + activation |= 0x1; + __raw_writel(activation, XOR_ACTIVATION(chan)); +} + +static char mv_chan_is_busy(struct mv_xor_chan *chan) +{ + u32 state = __raw_readl(XOR_ACTIVATION(chan)); + + state = (state >> 4) & 0x3; + + return (state == 1) ? 1 : 0; +} + +static int mv_chan_xor_slot_count(size_t len, int src_cnt) +{ + return 1; +} + +/** + * mv_xor_free_slots - flags descriptor slots for reuse + * @slot: Slot to free + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_xor_free_slots(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *slot) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n", + __func__, __LINE__, slot); + + slot->slots_per_op = 0; + +} + +/* + * mv_xor_start_new_chain - program the engine to operate on new chain headed by + * sw_desc + * Caller must hold &mv_chan->lock while calling this function + */ +static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *sw_desc) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n", + __func__, __LINE__, sw_desc); + if (sw_desc->type != mv_chan->current_type) + mv_set_mode(mv_chan, sw_desc->type); + + if (sw_desc->type == DMA_MEMSET) { + /* for memset requests we need to program the engine, no + * descriptors used. + */ + struct mv_xor_desc *hw_desc = sw_desc->hw_desc; + mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr); + mv_chan_set_block_size(mv_chan, sw_desc->unmap_len); + mv_chan_set_value(mv_chan, sw_desc->value); + } else { + /* set the hardware chain */ + mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys); + } + mv_chan->pending += sw_desc->slot_cnt; + mv_xor_issue_pending(&mv_chan->common); +} + +static dma_cookie_t +mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan, dma_cookie_t cookie) +{ + BUG_ON(desc->async_tx.cookie < 0); + + if (desc->async_tx.cookie > 0) { + cookie = desc->async_tx.cookie; + + /* call the callback (must not sleep or submit new + * operations to this channel) + */ + if (desc->async_tx.callback) + desc->async_tx.callback( + desc->async_tx.callback_param); + + /* unmap dma addresses + * (unmap_single vs unmap_page?) + */ + if (desc->group_head && desc->unmap_len) { + struct mv_xor_desc_slot *unmap = desc->group_head; + struct device *dev = + &mv_chan->device->pdev->dev; + u32 len = unmap->unmap_len; + enum dma_ctrl_flags flags = desc->async_tx.flags; + u32 src_cnt; + dma_addr_t addr; + dma_addr_t dest; + + src_cnt = unmap->unmap_src_cnt; + dest = mv_desc_get_dest_addr(unmap); + if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) { + enum dma_data_direction dir; + + if (src_cnt > 1) /* is xor ? */ + dir = DMA_BIDIRECTIONAL; + else + dir = DMA_FROM_DEVICE; + dma_unmap_page(dev, dest, len, dir); + } + + if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) { + while (src_cnt--) { + addr = mv_desc_get_src_addr(unmap, + src_cnt); + if (addr == dest) + continue; + dma_unmap_page(dev, addr, len, + DMA_TO_DEVICE); + } + } + desc->group_head = NULL; + } + } + + /* run dependent operations */ + async_tx_run_dependencies(&desc->async_tx); + + return cookie; +} + +static int +mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__); + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + completed_node) { + + if (async_tx_test_ack(&iter->async_tx)) { + list_del(&iter->completed_node); + mv_xor_free_slots(mv_chan, iter); + } + } + return 0; +} + +static int +mv_xor_clean_slot(struct mv_xor_desc_slot *desc, + struct mv_xor_chan *mv_chan) +{ + dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n", + __func__, __LINE__, desc, desc->async_tx.flags); + list_del(&desc->chain_node); + /* the client is allowed to attach dependent operations + * until 'ack' is set + */ + if (!async_tx_test_ack(&desc->async_tx)) { + /* move this slot to the completed_slots */ + list_add_tail(&desc->completed_node, &mv_chan->completed_slots); + return 0; + } + + mv_xor_free_slots(mv_chan, desc); + return 0; +} + +static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + struct mv_xor_desc_slot *iter, *_iter; + dma_cookie_t cookie = 0; + int busy = mv_chan_is_busy(mv_chan); + u32 current_desc = mv_chan_get_current_desc(mv_chan); + int seen_current = 0; + + dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__); + dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc); + mv_xor_clean_completed_slots(mv_chan); + + /* free completed slots from the chain starting with + * the oldest descriptor + */ + + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + chain_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + + /* do not advance past the current descriptor loaded into the + * hardware channel, subsequent descriptors are either in + * process or have not been submitted + */ + if (seen_current) + break; + + /* stop the search if we reach the current descriptor and the + * channel is busy + */ + if (iter->async_tx.phys == current_desc) { + seen_current = 1; + if (busy) + break; + } + + cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie); + + if (mv_xor_clean_slot(iter, mv_chan)) + break; + } + + if ((busy == 0) && !list_empty(&mv_chan->chain)) { + struct mv_xor_desc_slot *chain_head; + chain_head = list_entry(mv_chan->chain.next, + struct mv_xor_desc_slot, + chain_node); + + mv_xor_start_new_chain(mv_chan, chain_head); + } + + if (cookie > 0) + mv_chan->completed_cookie = cookie; +} + +static void +mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan) +{ + spin_lock_bh(&mv_chan->lock); + __mv_xor_slot_cleanup(mv_chan); + spin_unlock_bh(&mv_chan->lock); +} + +static void mv_xor_tasklet(unsigned long data) +{ + struct mv_xor_chan *chan = (struct mv_xor_chan *) data; + __mv_xor_slot_cleanup(chan); +} + +static struct mv_xor_desc_slot * +mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots, + int slots_per_op) +{ + struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL; + LIST_HEAD(chain); + int slots_found, retry = 0; + + /* start search from the last allocated descrtiptor + * if a contiguous allocation can not be found start searching + * from the beginning of the list + */ +retry: + slots_found = 0; + if (retry == 0) + iter = mv_chan->last_used; + else + iter = list_entry(&mv_chan->all_slots, + struct mv_xor_desc_slot, + slot_node); + + list_for_each_entry_safe_continue( + iter, _iter, &mv_chan->all_slots, slot_node) { + prefetch(_iter); + prefetch(&_iter->async_tx); + if (iter->slots_per_op) { + /* give up after finding the first busy slot + * on the second pass through the list + */ + if (retry) + break; + + slots_found = 0; + continue; + } + + /* start the allocation if the slot is correctly aligned */ + if (!slots_found++) + alloc_start = iter; + + if (slots_found == num_slots) { + struct mv_xor_desc_slot *alloc_tail = NULL; + struct mv_xor_desc_slot *last_used = NULL; + iter = alloc_start; + while (num_slots) { + int i; + + /* pre-ack all but the last descriptor */ + async_tx_ack(&iter->async_tx); + + list_add_tail(&iter->chain_node, &chain); + alloc_tail = iter; + iter->async_tx.cookie = 0; + iter->slot_cnt = num_slots; + iter->xor_check_result = NULL; + for (i = 0; i < slots_per_op; i++) { + iter->slots_per_op = slots_per_op - i; + last_used = iter; + iter = list_entry(iter->slot_node.next, + struct mv_xor_desc_slot, + slot_node); + } + num_slots -= slots_per_op; + } + alloc_tail->group_head = alloc_start; + alloc_tail->async_tx.cookie = -EBUSY; + list_splice(&chain, &alloc_tail->async_tx.tx_list); + mv_chan->last_used = last_used; + mv_desc_clear_next_desc(alloc_start); + mv_desc_clear_next_desc(alloc_tail); + return alloc_tail; + } + } + if (!retry++) + goto retry; + + /* try to free some slots if the allocation fails */ + tasklet_schedule(&mv_chan->irq_tasklet); + + return NULL; +} + +static dma_cookie_t +mv_desc_assign_cookie(struct mv_xor_chan *mv_chan, + struct mv_xor_desc_slot *desc) +{ + dma_cookie_t cookie = mv_chan->common.cookie; + + if (++cookie < 0) + cookie = 1; + mv_chan->common.cookie = desc->async_tx.cookie = cookie; + return cookie; +} + +/************************ DMA engine API functions ****************************/ +static dma_cookie_t +mv_xor_tx_submit(struct dma_async_tx_descriptor *tx) +{ + struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx); + struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan); + struct mv_xor_desc_slot *grp_start, *old_chain_tail; + dma_cookie_t cookie; + int new_hw_chain = 1; + + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p: async_tx %p\n", + __func__, sw_desc, &sw_desc->async_tx); + + grp_start = sw_desc->group_head; + + spin_lock_bh(&mv_chan->lock); + cookie = mv_desc_assign_cookie(mv_chan, sw_desc); + + if (list_empty(&mv_chan->chain)) + list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain); + else { + new_hw_chain = 0; + + old_chain_tail = list_entry(mv_chan->chain.prev, + struct mv_xor_desc_slot, + chain_node); + list_splice_init(&grp_start->async_tx.tx_list, + &old_chain_tail->chain_node); + + if (!mv_can_chain(grp_start)) + goto submit_done; + + dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n", + old_chain_tail->async_tx.phys); + + /* fix up the hardware chain */ + mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys); + + /* if the channel is not busy */ + if (!mv_chan_is_busy(mv_chan)) { + u32 current_desc = mv_chan_get_current_desc(mv_chan); + /* + * and the curren desc is the end of the chain before + * the append, then we need to start the channel + */ + if (current_desc == old_chain_tail->async_tx.phys) + new_hw_chain = 1; + } + } + + if (new_hw_chain) + mv_xor_start_new_chain(mv_chan, grp_start); + +submit_done: + spin_unlock_bh(&mv_chan->lock); + + return cookie; +} + +/* returns the number of allocated descriptors */ +static int mv_xor_alloc_chan_resources(struct dma_chan *chan, + struct dma_client *client) +{ + char *hw_desc; + int idx; + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *slot = NULL; + struct mv_xor_platform_data *plat_data = + mv_chan->device->pdev->dev.platform_data; + int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE; + + /* Allocate descriptor slots */ + idx = mv_chan->slots_allocated; + while (idx < num_descs_in_pool) { + slot = kzalloc(sizeof(*slot), GFP_KERNEL); + if (!slot) { + printk(KERN_INFO "MV XOR Channel only initialized" + " %d descriptor slots", idx); + break; + } + hw_desc = (char *) mv_chan->device->dma_desc_pool_virt; + slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE]; + + dma_async_tx_descriptor_init(&slot->async_tx, chan); + slot->async_tx.tx_submit = mv_xor_tx_submit; + INIT_LIST_HEAD(&slot->chain_node); + INIT_LIST_HEAD(&slot->slot_node); + INIT_LIST_HEAD(&slot->async_tx.tx_list); + hw_desc = (char *) mv_chan->device->dma_desc_pool; + slot->async_tx.phys = + (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE]; + slot->idx = idx++; + + spin_lock_bh(&mv_chan->lock); + mv_chan->slots_allocated = idx; + list_add_tail(&slot->slot_node, &mv_chan->all_slots); + spin_unlock_bh(&mv_chan->lock); + } + + if (mv_chan->slots_allocated && !mv_chan->last_used) + mv_chan->last_used = list_entry(mv_chan->all_slots.next, + struct mv_xor_desc_slot, + slot_node); + + dev_dbg(mv_chan->device->common.dev, + "allocated %d descriptor slots last_used: %p\n", + mv_chan->slots_allocated, mv_chan->last_used); + + return mv_chan->slots_allocated ? : -ENOMEM; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src, + size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + dev_dbg(mv_chan->device->common.dev, + "%s dest: %x src %x len: %u flags: %ld\n", + __func__, dest, src, len, flags); + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_memcpy_slot_count(len); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_MEMCPY; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + mv_desc_set_src_addr(grp_start, 0, src); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&mv_chan->lock); + + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p\n", + __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0); + + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value, + size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + dev_dbg(mv_chan->device->common.dev, + "%s dest: %x len: %u flags: %ld\n", + __func__, dest, len, flags); + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_memset_slot_count(len); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_MEMSET; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + mv_desc_set_block_fill_val(grp_start, value); + sw_desc->unmap_src_cnt = 1; + sw_desc->unmap_len = len; + } + spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static struct dma_async_tx_descriptor * +mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src, + unsigned int src_cnt, size_t len, unsigned long flags) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *sw_desc, *grp_start; + int slot_cnt; + + if (unlikely(len < MV_XOR_MIN_BYTE_COUNT)) + return NULL; + + BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT)); + + dev_dbg(mv_chan->device->common.dev, + "%s src_cnt: %d len: dest %x %u flags: %ld\n", + __func__, src_cnt, len, dest, flags); + + spin_lock_bh(&mv_chan->lock); + slot_cnt = mv_chan_xor_slot_count(len, src_cnt); + sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1); + if (sw_desc) { + sw_desc->type = DMA_XOR; + sw_desc->async_tx.flags = flags; + grp_start = sw_desc->group_head; + mv_desc_init(grp_start, flags); + /* the byte count field is the same as in memcpy desc*/ + mv_desc_set_byte_count(grp_start, len); + mv_desc_set_dest_addr(sw_desc->group_head, dest); + sw_desc->unmap_src_cnt = src_cnt; + sw_desc->unmap_len = len; + while (src_cnt--) + mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]); + } + spin_unlock_bh(&mv_chan->lock); + dev_dbg(mv_chan->device->common.dev, + "%s sw_desc %p async_tx %p \n", + __func__, sw_desc, &sw_desc->async_tx); + return sw_desc ? &sw_desc->async_tx : NULL; +} + +static void mv_xor_free_chan_resources(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + struct mv_xor_desc_slot *iter, *_iter; + int in_use_descs = 0; + + mv_xor_slot_cleanup(mv_chan); + + spin_lock_bh(&mv_chan->lock); + list_for_each_entry_safe(iter, _iter, &mv_chan->chain, + chain_node) { + in_use_descs++; + list_del(&iter->chain_node); + } + list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots, + completed_node) { + in_use_descs++; + list_del(&iter->completed_node); + } + list_for_each_entry_safe_reverse( + iter, _iter, &mv_chan->all_slots, slot_node) { + list_del(&iter->slot_node); + kfree(iter); + mv_chan->slots_allocated--; + } + mv_chan->last_used = NULL; + + dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n", + __func__, mv_chan->slots_allocated); + spin_unlock_bh(&mv_chan->lock); + + if (in_use_descs) + dev_err(mv_chan->device->common.dev, + "freeing %d in use descriptors!\n", in_use_descs); +} + +/** + * mv_xor_is_complete - poll the status of an XOR transaction + * @chan: XOR channel handle + * @cookie: XOR transaction identifier + */ +static enum dma_status mv_xor_is_complete(struct dma_chan *chan, + dma_cookie_t cookie, + dma_cookie_t *done, + dma_cookie_t *used) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + dma_cookie_t last_used; + dma_cookie_t last_complete; + enum dma_status ret; + + last_used = chan->cookie; + last_complete = mv_chan->completed_cookie; + mv_chan->is_complete_cookie = cookie; + if (done) + *done = last_complete; + if (used) + *used = last_used; + + ret = dma_async_is_complete(cookie, last_complete, last_used); + if (ret == DMA_SUCCESS) { + mv_xor_clean_completed_slots(mv_chan); + return ret; + } + mv_xor_slot_cleanup(mv_chan); + + last_used = chan->cookie; + last_complete = mv_chan->completed_cookie; + + if (done) + *done = last_complete; + if (used) + *used = last_used; + + return dma_async_is_complete(cookie, last_complete, last_used); +} + +static void mv_dump_xor_regs(struct mv_xor_chan *chan) +{ + u32 val; + + val = __raw_readl(XOR_CONFIG(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "config 0x%08x.\n", val); + + val = __raw_readl(XOR_ACTIVATION(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "activation 0x%08x.\n", val); + + val = __raw_readl(XOR_INTR_CAUSE(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "intr cause 0x%08x.\n", val); + + val = __raw_readl(XOR_INTR_MASK(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "intr mask 0x%08x.\n", val); + + val = __raw_readl(XOR_ERROR_CAUSE(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "error cause 0x%08x.\n", val); + + val = __raw_readl(XOR_ERROR_ADDR(chan)); + dev_printk(KERN_ERR, chan->device->common.dev, + "error addr 0x%08x.\n", val); +} + +static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan, + u32 intr_cause) +{ + if (intr_cause & (1 << 4)) { + dev_dbg(chan->device->common.dev, + "ignore this error\n"); + return; + } + + dev_printk(KERN_ERR, chan->device->common.dev, + "error on chan %d. intr cause 0x%08x.\n", + chan->idx, intr_cause); + + mv_dump_xor_regs(chan); + BUG(); +} + +static irqreturn_t mv_xor_interrupt_handler(int irq, void *data) +{ + struct mv_xor_chan *chan = data; + u32 intr_cause = mv_chan_get_intr_cause(chan); + + dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause); + + if (mv_is_err_intr(intr_cause)) + mv_xor_err_interrupt_handler(chan, intr_cause); + + tasklet_schedule(&chan->irq_tasklet); + + mv_xor_device_clear_eoc_cause(chan); + + return IRQ_HANDLED; +} + +static void mv_xor_issue_pending(struct dma_chan *chan) +{ + struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan); + + if (mv_chan->pending >= MV_XOR_THRESHOLD) { + mv_chan->pending = 0; + mv_chan_activate(mv_chan); + } +} + +/* + * Perform a transaction to verify the HW works. + */ +#define MV_XOR_TEST_SIZE 2000 + +static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device) +{ + int i; + void *src, *dest; + dma_addr_t src_dma, dest_dma; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + struct dma_async_tx_descriptor *tx; + int err = 0; + struct mv_xor_chan *mv_chan; + + src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + if (!src) + return -ENOMEM; + + dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL); + if (!dest) { + kfree(src); + return -ENOMEM; + } + + /* Fill in src buffer */ + for (i = 0; i < MV_XOR_TEST_SIZE; i++) + ((u8 *) src)[i] = (u8)i; + + /* Start copy, using first DMA channel */ + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) { + err = -ENODEV; + goto out; + } + + dest_dma = dma_map_single(dma_chan->device->dev, dest, + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + + src_dma = dma_map_single(dma_chan->device->dev, src, + MV_XOR_TEST_SIZE, DMA_TO_DEVICE); + + tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma, + MV_XOR_TEST_SIZE, 0); + cookie = mv_xor_tx_submit(tx); + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(1); + + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_chan = to_mv_xor_chan(dma_chan); + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma, + MV_XOR_TEST_SIZE, DMA_FROM_DEVICE); + if (memcmp(src, dest, MV_XOR_TEST_SIZE)) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test copy failed compare, disabling\n"); + err = -ENODEV; + goto free_resources; + } + +free_resources: + mv_xor_free_chan_resources(dma_chan); +out: + kfree(src); + kfree(dest); + return err; +} + +#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */ +static int __devinit +mv_xor_xor_self_test(struct mv_xor_device *device) +{ + int i, src_idx; + struct page *dest; + struct page *xor_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST]; + dma_addr_t dest_dma; + struct dma_async_tx_descriptor *tx; + struct dma_chan *dma_chan; + dma_cookie_t cookie; + u8 cmp_byte = 0; + u32 cmp_word; + int err = 0; + struct mv_xor_chan *mv_chan; + + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + xor_srcs[src_idx] = alloc_page(GFP_KERNEL); + if (!xor_srcs[src_idx]) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + } + + dest = alloc_page(GFP_KERNEL); + if (!dest) + while (src_idx--) { + __free_page(xor_srcs[src_idx]); + return -ENOMEM; + } + + /* Fill in src buffers */ + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) { + u8 *ptr = page_address(xor_srcs[src_idx]); + for (i = 0; i < PAGE_SIZE; i++) + ptr[i] = (1 << src_idx); + } + + for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) + cmp_byte ^= (u8) (1 << src_idx); + + cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | + (cmp_byte << 8) | cmp_byte; + + memset(page_address(dest), 0, PAGE_SIZE); + + dma_chan = container_of(device->common.channels.next, + struct dma_chan, + device_node); + if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) { + err = -ENODEV; + goto out; + } + + /* test xor */ + dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE, + DMA_FROM_DEVICE); + + for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++) + dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i], + 0, PAGE_SIZE, DMA_TO_DEVICE); + + tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs, + MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0); + + cookie = mv_xor_tx_submit(tx); + mv_xor_issue_pending(dma_chan); + async_tx_ack(tx); + msleep(8); + + if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) != + DMA_SUCCESS) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor timed out, disabling\n"); + err = -ENODEV; + goto free_resources; + } + + mv_chan = to_mv_xor_chan(dma_chan); + dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma, + PAGE_SIZE, DMA_FROM_DEVICE); + for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) { + u32 *ptr = page_address(dest); + if (ptr[i] != cmp_word) { + dev_printk(KERN_ERR, dma_chan->device->dev, + "Self-test xor failed compare, disabling." + " index %d, data %x, expected %x\n", i, + ptr[i], cmp_word); + err = -ENODEV; + goto free_resources; + } + } + +free_resources: + mv_xor_free_chan_resources(dma_chan); +out: + src_idx = MV_XOR_NUM_SRC_TEST; + while (src_idx--) + __free_page(xor_srcs[src_idx]); + __free_page(dest); + return err; +} + +static int __devexit mv_xor_remove(struct platform_device *dev) +{ + struct mv_xor_device *device = platform_get_drvdata(dev); + struct dma_chan *chan, *_chan; + struct mv_xor_chan *mv_chan; + struct mv_xor_platform_data *plat_data = dev->dev.platform_data; + + dma_async_device_unregister(&device->common); + + dma_free_coherent(&dev->dev, plat_data->pool_size, + device->dma_desc_pool_virt, device->dma_desc_pool); + + list_for_each_entry_safe(chan, _chan, &device->common.channels, + device_node) { + mv_chan = to_mv_xor_chan(chan); + list_del(&chan->device_node); + } + + return 0; +} + +static int __devinit mv_xor_probe(struct platform_device *pdev) +{ + int ret = 0; + int irq; + struct mv_xor_device *adev; + struct mv_xor_chan *mv_chan; + struct dma_device *dma_dev; + struct mv_xor_platform_data *plat_data = pdev->dev.platform_data; + + + adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL); + if (!adev) + return -ENOMEM; + + dma_dev = &adev->common; + + /* allocate coherent memory for hardware descriptors + * note: writecombine gives slightly better performance, but + * requires that we explicitly flush the writes + */ + adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev, + plat_data->pool_size, + &adev->dma_desc_pool, + GFP_KERNEL); + if (!adev->dma_desc_pool_virt) + return -ENOMEM; + + adev->id = plat_data->hw_id; + + /* discover transaction capabilites from the platform data */ + dma_dev->cap_mask = plat_data->cap_mask; + adev->pdev = pdev; + platform_set_drvdata(pdev, adev); + + adev->shared = platform_get_drvdata(plat_data->shared); + + INIT_LIST_HEAD(&dma_dev->channels); + + /* set base routines */ + dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources; + dma_dev->device_free_chan_resources = mv_xor_free_chan_resources; + dma_dev->device_is_tx_complete = mv_xor_is_complete; + dma_dev->device_issue_pending = mv_xor_issue_pending; + dma_dev->dev = &pdev->dev; + + /* set prep routines based on capability */ + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy; + if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) + dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset; + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + dma_dev->max_xor = 8; ; + dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor; + } + + mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL); + if (!mv_chan) { + ret = -ENOMEM; + goto err_free_dma; + } + mv_chan->device = adev; + mv_chan->idx = plat_data->hw_id; + mv_chan->mmr_base = adev->shared->xor_base; + + if (!mv_chan->mmr_base) { + ret = -ENOMEM; + goto err_free_dma; + } + tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long) + mv_chan); + + /* clear errors before enabling interrupts */ + mv_xor_device_clear_err_status(mv_chan); + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + ret = irq; + goto err_free_dma; + } + ret = devm_request_irq(&pdev->dev, irq, + mv_xor_interrupt_handler, + 0, dev_name(&pdev->dev), mv_chan); + if (ret) + goto err_free_dma; + + mv_chan_unmask_interrupts(mv_chan); + + mv_set_mode(mv_chan, DMA_MEMCPY); + + spin_lock_init(&mv_chan->lock); + INIT_LIST_HEAD(&mv_chan->chain); + INIT_LIST_HEAD(&mv_chan->completed_slots); + INIT_LIST_HEAD(&mv_chan->all_slots); + INIT_RCU_HEAD(&mv_chan->common.rcu); + mv_chan->common.device = dma_dev; + + list_add_tail(&mv_chan->common.device_node, &dma_dev->channels); + + if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) { + ret = mv_xor_memcpy_self_test(adev); + dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret); + if (ret) + goto err_free_dma; + } + + if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) { + ret = mv_xor_xor_self_test(adev); + dev_dbg(&pdev->dev, "xor self test returned %d\n", ret); + if (ret) + goto err_free_dma; + } + + dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: " + "( %s%s%s%s)\n", + dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "", + dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "", + dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "", + dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : ""); + + dma_async_device_register(dma_dev); + goto out; + + err_free_dma: + dma_free_coherent(&adev->pdev->dev, plat_data->pool_size, + adev->dma_desc_pool_virt, adev->dma_desc_pool); + out: + return ret; +} + +static void +mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp, + struct mbus_dram_target_info *dram) +{ + void __iomem *base = msp->xor_base; + u32 win_enable = 0; + int i; + + for (i = 0; i < 8; i++) { + writel(0, base + WINDOW_BASE(i)); + writel(0, base + WINDOW_SIZE(i)); + if (i < 4) + writel(0, base + WINDOW_REMAP_HIGH(i)); + } + + for (i = 0; i < dram->num_cs; i++) { + struct mbus_dram_window *cs = dram->cs + i; + + writel((cs->base & 0xffff0000) | + (cs->mbus_attr << 8) | + dram->mbus_dram_target_id, base + WINDOW_BASE(i)); + writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i)); + + win_enable |= (1 << i); + win_enable |= 3 << (16 + (2 * i)); + } + + writel(win_enable, base + WINDOW_BAR_ENABLE(0)); + writel(win_enable, base + WINDOW_BAR_ENABLE(1)); +} + +static struct platform_driver mv_xor_driver = { + .probe = mv_xor_probe, + .remove = mv_xor_remove, + .driver = { + .owner = THIS_MODULE, + .name = MV_XOR_NAME, + }, +}; + +static int mv_xor_shared_probe(struct platform_device *pdev) +{ + struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data; + struct mv_xor_shared_private *msp; + struct resource *res; + + dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n"); + + msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL); + if (!msp) + return -ENOMEM; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) + return -ENODEV; + + msp->xor_base = devm_ioremap(&pdev->dev, res->start, + res->end - res->start + 1); + if (!msp->xor_base) + return -EBUSY; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 1); + if (!res) + return -ENODEV; + + msp->xor_high_base = devm_ioremap(&pdev->dev, res->start, + res->end - res->start + 1); + if (!msp->xor_high_base) + return -EBUSY; + + platform_set_drvdata(pdev, msp); + + /* + * (Re-)program MBUS remapping windows if we are asked to. + */ + if (msd != NULL && msd->dram != NULL) + mv_xor_conf_mbus_windows(msp, msd->dram); + + return 0; +} + +static int mv_xor_shared_remove(struct platform_device *pdev) +{ + return 0; +} + +static struct platform_driver mv_xor_shared_driver = { + .probe = mv_xor_shared_probe, + .remove = mv_xor_shared_remove, + .driver = { + .owner = THIS_MODULE, + .name = MV_XOR_SHARED_NAME, + }, +}; + + +static int __init mv_xor_init(void) +{ + int rc; + + rc = platform_driver_register(&mv_xor_shared_driver); + if (!rc) { + rc = platform_driver_register(&mv_xor_driver); + if (rc) + platform_driver_unregister(&mv_xor_shared_driver); + } + return rc; +} +module_init(mv_xor_init); + +/* it's currently unsafe to unload this module */ +#if 0 +static void __exit mv_xor_exit(void) +{ + platform_driver_unregister(&mv_xor_driver); + platform_driver_unregister(&mv_xor_shared_driver); + return; +} + +module_exit(mv_xor_exit); +#endif + +MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>"); +MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine"); +MODULE_LICENSE("GPL"); diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h new file mode 100644 index 0000000..06cafe1 --- /dev/null +++ b/drivers/dma/mv_xor.h @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2007, 2008, Marvell International Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#ifndef MV_XOR_H +#define MV_XOR_H + +#include <linux/types.h> +#include <linux/io.h> +#include <linux/dmaengine.h> +#include <linux/interrupt.h> + +#define USE_TIMER +#define MV_XOR_SLOT_SIZE 64 +#define MV_XOR_THRESHOLD 1 + +#define XOR_OPERATION_MODE_XOR 0 +#define XOR_OPERATION_MODE_MEMCPY 2 +#define XOR_OPERATION_MODE_MEMSET 4 + +#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4)) +#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4)) +#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4)) +#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4)) +#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4)) +#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0) +#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4) + +#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4)) +#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4)) +#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30) +#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40) +#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50) +#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60) +#define XOR_INTR_MASK_VALUE 0x3F5 + +#define WINDOW_BASE(w) (0x250 + ((w) << 2)) +#define WINDOW_SIZE(w) (0x270 + ((w) << 2)) +#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2)) +#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2)) + +struct mv_xor_shared_private { + void __iomem *xor_base; + void __iomem *xor_high_base; +}; + + +/** + * struct mv_xor_device - internal representation of a XOR device + * @pdev: Platform device + * @id: HW XOR Device selector + * @dma_desc_pool: base of DMA descriptor region (DMA address) + * @dma_desc_pool_virt: base of DMA descriptor region (CPU address) + * @common: embedded struct dma_device + */ +struct mv_xor_device { + struct platform_device *pdev; + int id; + dma_addr_t dma_desc_pool; + void *dma_desc_pool_virt; + struct dma_device common; + struct mv_xor_shared_private *shared; +}; + +/** + * struct mv_xor_chan - internal representation of a XOR channel + * @pending: allows batching of hardware operations + * @completed_cookie: identifier for the most recently completed operation + * @lock: serializes enqueue/dequeue operations to the descriptors pool + * @mmr_base: memory mapped register base + * @idx: the index of the xor channel + * @chain: device chain view of the descriptors + * @completed_slots: slots completed by HW but still need to be acked + * @device: parent device + * @common: common dmaengine channel object members + * @last_used: place holder for allocation to continue from where it left off + * @all_slots: complete domain of slots usable by the channel + * @slots_allocated: records the actual size of the descriptor slot pool + * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs + */ +struct mv_xor_chan { + int pending; + dma_cookie_t completed_cookie; + spinlock_t lock; /* protects the descriptor slot pool */ + void __iomem *mmr_base; + unsigned int idx; + enum dma_transaction_type current_type; + struct list_head chain; + struct list_head completed_slots; + struct mv_xor_device *device; + struct dma_chan common; + struct mv_xor_desc_slot *last_used; + struct list_head all_slots; + int slots_allocated; + struct tasklet_struct irq_tasklet; +#ifdef USE_TIMER + unsigned long cleanup_time; + u32 current_on_last_cleanup; + dma_cookie_t is_complete_cookie; +#endif +}; + +/** + * struct mv_xor_desc_slot - software descriptor + * @slot_node: node on the mv_xor_chan.all_slots list + * @chain_node: node on the mv_xor_chan.chain list + * @completed_node: node on the mv_xor_chan.completed_slots list + * @hw_desc: virtual address of the hardware descriptor chain + * @phys: hardware address of the hardware descriptor chain + * @group_head: first operation in a transaction + * @slot_cnt: total slots used in an transaction (group of operations) + * @slots_per_op: number of slots per operation + * @idx: pool index + * @unmap_src_cnt: number of xor sources + * @unmap_len: transaction bytecount + * @async_tx: support for the async_tx api + * @group_list: list of slots that make up a multi-descriptor transaction + * for example transfer lengths larger than the supported hw max + * @xor_check_result: result of zero sum + * @crc32_result: result crc calculation + */ +struct mv_xor_desc_slot { + struct list_head slot_node; + struct list_head chain_node; + struct list_head completed_node; + enum dma_transaction_type type; + void *hw_desc; + struct mv_xor_desc_slot *group_head; + u16 slot_cnt; + u16 slots_per_op; + u16 idx; + u16 unmap_src_cnt; + u32 value; + size_t unmap_len; + struct dma_async_tx_descriptor async_tx; + union { + u32 *xor_check_result; + u32 *crc32_result; + }; +#ifdef USE_TIMER + unsigned long arrival_time; + struct timer_list timeout; +#endif +}; + +/* This structure describes XOR descriptor size 64bytes */ +struct mv_xor_desc { + u32 status; /* descriptor execution status */ + u32 crc32_result; /* result of CRC-32 calculation */ + u32 desc_command; /* type of operation to be carried out */ + u32 phy_next_desc; /* next descriptor address pointer */ + u32 byte_count; /* size of src/dst blocks in bytes */ + u32 phy_dest_addr; /* destination block address */ + u32 phy_src_addr[8]; /* source block addresses */ + u32 reserved0; + u32 reserved1; +}; + +#define to_mv_sw_desc(addr_hw_desc) \ + container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc) + +#define mv_hw_desc_slot_idx(hw_desc, idx) \ + ((void *)(((unsigned long)hw_desc) + ((idx) << 5))) + +#define MV_XOR_MIN_BYTE_COUNT (128) +#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1) +#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT + + +#endif |