summaryrefslogtreecommitdiffstats
path: root/drivers/dma
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/dma')
-rw-r--r--drivers/dma/Kconfig91
-rw-r--r--drivers/dma/Makefile9
-rw-r--r--drivers/dma/dmaengine.c635
-rw-r--r--drivers/dma/dmatest.c449
-rw-r--r--drivers/dma/dw_dmac.c1122
-rw-r--r--drivers/dma/dw_dmac_regs.h225
-rw-r--r--drivers/dma/fsldma.c1036
-rw-r--r--drivers/dma/fsldma.h198
-rw-r--r--drivers/dma/ioat.c226
-rw-r--r--drivers/dma/ioat_dca.c657
-rw-r--r--drivers/dma/ioat_dma.c1723
-rw-r--r--drivers/dma/ioatdma.h163
-rw-r--r--drivers/dma/ioatdma_hw.h70
-rw-r--r--drivers/dma/ioatdma_registers.h226
-rw-r--r--drivers/dma/iop-adma.c1448
-rw-r--r--drivers/dma/iovlock.c269
-rw-r--r--drivers/dma/mv_xor.c1384
-rw-r--r--drivers/dma/mv_xor.h183
18 files changed, 10114 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644
index 0000000..904e575
--- /dev/null
+++ b/drivers/dma/Kconfig
@@ -0,0 +1,91 @@
+#
+# DMA engine configuration
+#
+
+menuconfig DMADEVICES
+ bool "DMA Engine support"
+ depends on !HIGHMEM64G && HAS_DMA
+ help
+ DMA engines can do asynchronous data transfers without
+ involving the host CPU. Currently, this framework can be
+ used to offload memory copies in the network stack and
+ RAID operations in the MD driver. This menu only presents
+ DMA Device drivers supported by the configured arch, it may
+ be empty in some cases.
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+config INTEL_IOATDMA
+ tristate "Intel I/OAT DMA support"
+ depends on PCI && X86
+ select DMA_ENGINE
+ select DCA
+ help
+ Enable support for the Intel(R) I/OAT DMA engine present
+ in recent Intel Xeon chipsets.
+
+ Say Y here if you have such a chipset.
+
+ If unsure, say N.
+
+config INTEL_IOP_ADMA
+ tristate "Intel IOP ADMA support"
+ depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+ select ASYNC_CORE
+ select DMA_ENGINE
+ help
+ Enable support for the Intel(R) IOP Series RAID engines.
+
+config DW_DMAC
+ tristate "Synopsys DesignWare AHB DMA support"
+ depends on AVR32
+ select DMA_ENGINE
+ default y if CPU_AT32AP7000
+ help
+ Support the Synopsys DesignWare AHB DMA controller. This
+ can be integrated in chips such as the Atmel AT32ap7000.
+
+config FSL_DMA
+ tristate "Freescale Elo and Elo Plus DMA support"
+ depends on FSL_SOC
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Freescale Elo and Elo Plus DMA controllers.
+ The Elo is the DMA controller on some 82xx and 83xx parts, and the
+ Elo Plus is the DMA controller on 85xx and 86xx parts.
+
+config MV_XOR
+ bool "Marvell XOR engine support"
+ depends on PLAT_ORION
+ select ASYNC_CORE
+ select DMA_ENGINE
+ ---help---
+ Enable support for the Marvell XOR engine.
+
+config DMA_ENGINE
+ bool
+
+comment "DMA Clients"
+ depends on DMA_ENGINE
+
+config NET_DMA
+ bool "Network: TCP receive copy offload"
+ depends on DMA_ENGINE && NET
+ default (INTEL_IOATDMA || FSL_DMA)
+ help
+ This enables the use of DMA engines in the network stack to
+ offload receive copy-to-user operations, freeing CPU cycles.
+
+ Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
+ say N.
+
+config DMATEST
+ tristate "DMA Test client"
+ depends on DMA_ENGINE
+ help
+ Simple DMA test client. Say N unless you're debugging a
+ DMA Device driver.
+
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644
index 0000000..14f5952
--- /dev/null
+++ b/drivers/dma/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_DMATEST) += dmatest.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_DW_DMAC) += dw_dmac.o
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644
index 0000000..6579965
--- /dev/null
+++ b/drivers/dma/dmaengine.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps two global lists, dma_device_list and dma_client_list.
+ * Both of these are protected by a mutex, dma_list_mutex.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * Each client is responsible for keeping track of the channels it uses. See
+ * the definition of dma_event_callback in dmaengine.h.
+ *
+ * Each device has a kref, which is initialized to 1 when the device is
+ * registered. A kref_get is done for each device registered. When the
+ * device is released, the corresponding kref_put is done in the release
+ * method. Every time one of the device's channels is allocated to a client,
+ * a kref_get occurs. When the channel is freed, the corresponding kref_put
+ * happens. The device's release function does a completion, so
+ * unregister_device does a remove event, device_unregister, a kref_put
+ * for the first reference, then waits on the completion for all other
+ * references to finish.
+ *
+ * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
+ * with a kref and a per_cpu local_t. A dma_chan_get is called when a client
+ * signals that it wants to use a channel, and dma_chan_put is called when
+ * a channel is removed or a client using it is unregistered. A client can
+ * take extra references per outstanding transaction, as is the case with
+ * the NET DMA client. The release function does a kref_put on the device.
+ * -ChrisL, DanW
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+
+static DEFINE_MUTEX(dma_list_mutex);
+static LIST_HEAD(dma_device_list);
+static LIST_HEAD(dma_client_list);
+
+/* --- sysfs implementation --- */
+
+static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct dma_chan *chan = to_dma_chan(dev);
+ unsigned long count = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ count += per_cpu_ptr(chan->local, i)->memcpy_count;
+
+ return sprintf(buf, "%lu\n", count);
+}
+
+static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr,
+ char *buf)
+{
+ struct dma_chan *chan = to_dma_chan(dev);
+ unsigned long count = 0;
+ int i;
+
+ for_each_possible_cpu(i)
+ count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+
+ return sprintf(buf, "%lu\n", count);
+}
+
+static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct dma_chan *chan = to_dma_chan(dev);
+ int in_use = 0;
+
+ if (unlikely(chan->slow_ref) &&
+ atomic_read(&chan->refcount.refcount) > 1)
+ in_use = 1;
+ else {
+ if (local_read(&(per_cpu_ptr(chan->local,
+ get_cpu())->refcount)) > 0)
+ in_use = 1;
+ put_cpu();
+ }
+
+ return sprintf(buf, "%d\n", in_use);
+}
+
+static struct device_attribute dma_attrs[] = {
+ __ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
+ __ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
+ __ATTR(in_use, S_IRUGO, show_in_use, NULL),
+ __ATTR_NULL
+};
+
+static void dma_async_device_cleanup(struct kref *kref);
+
+static void dma_dev_release(struct device *dev)
+{
+ struct dma_chan *chan = to_dma_chan(dev);
+ kref_put(&chan->device->refcount, dma_async_device_cleanup);
+}
+
+static struct class dma_devclass = {
+ .name = "dma",
+ .dev_attrs = dma_attrs,
+ .dev_release = dma_dev_release,
+};
+
+/* --- client and device registration --- */
+
+#define dma_chan_satisfies_mask(chan, mask) \
+ __dma_chan_satisfies_mask((chan), &(mask))
+static int
+__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want)
+{
+ dma_cap_mask_t has;
+
+ bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits,
+ DMA_TX_TYPE_END);
+ return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
+}
+
+/**
+ * dma_client_chan_alloc - try to allocate channels to a client
+ * @client: &dma_client
+ *
+ * Called with dma_list_mutex held.
+ */
+static void dma_client_chan_alloc(struct dma_client *client)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+ int desc; /* allocated descriptor count */
+ enum dma_state_client ack;
+
+ /* Find a channel */
+ list_for_each_entry(device, &dma_device_list, global_node) {
+ /* Does the client require a specific DMA controller? */
+ if (client->slave && client->slave->dma_dev
+ && client->slave->dma_dev != device->dev)
+ continue;
+
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (!dma_chan_satisfies_mask(chan, client->cap_mask))
+ continue;
+
+ desc = chan->device->device_alloc_chan_resources(
+ chan, client);
+ if (desc >= 0) {
+ ack = client->event_callback(client,
+ chan,
+ DMA_RESOURCE_AVAILABLE);
+
+ /* we are done once this client rejects
+ * an available resource
+ */
+ if (ack == DMA_ACK) {
+ dma_chan_get(chan);
+ chan->client_count++;
+ } else if (ack == DMA_NAK)
+ return;
+ }
+ }
+ }
+}
+
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+ enum dma_status status;
+ unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+ dma_async_issue_pending(chan);
+ do {
+ status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+ if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+ printk(KERN_ERR "dma_sync_wait_timeout!\n");
+ return DMA_ERROR;
+ }
+ } while (status == DMA_IN_PROGRESS);
+
+ return status;
+}
+EXPORT_SYMBOL(dma_sync_wait);
+
+/**
+ * dma_chan_cleanup - release a DMA channel's resources
+ * @kref: kernel reference structure that contains the DMA channel device
+ */
+void dma_chan_cleanup(struct kref *kref)
+{
+ struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
+ chan->device->device_free_chan_resources(chan);
+ kref_put(&chan->device->refcount, dma_async_device_cleanup);
+}
+EXPORT_SYMBOL(dma_chan_cleanup);
+
+static void dma_chan_free_rcu(struct rcu_head *rcu)
+{
+ struct dma_chan *chan = container_of(rcu, struct dma_chan, rcu);
+ int bias = 0x7FFFFFFF;
+ int i;
+ for_each_possible_cpu(i)
+ bias -= local_read(&per_cpu_ptr(chan->local, i)->refcount);
+ atomic_sub(bias, &chan->refcount.refcount);
+ kref_put(&chan->refcount, dma_chan_cleanup);
+}
+
+static void dma_chan_release(struct dma_chan *chan)
+{
+ atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
+ chan->slow_ref = 1;
+ call_rcu(&chan->rcu, dma_chan_free_rcu);
+}
+
+/**
+ * dma_chans_notify_available - broadcast available channels to the clients
+ */
+static void dma_clients_notify_available(void)
+{
+ struct dma_client *client;
+
+ mutex_lock(&dma_list_mutex);
+
+ list_for_each_entry(client, &dma_client_list, global_node)
+ dma_client_chan_alloc(client);
+
+ mutex_unlock(&dma_list_mutex);
+}
+
+/**
+ * dma_chans_notify_available - tell the clients that a channel is going away
+ * @chan: channel on its way out
+ */
+static void dma_clients_notify_removed(struct dma_chan *chan)
+{
+ struct dma_client *client;
+ enum dma_state_client ack;
+
+ mutex_lock(&dma_list_mutex);
+
+ list_for_each_entry(client, &dma_client_list, global_node) {
+ ack = client->event_callback(client, chan,
+ DMA_RESOURCE_REMOVED);
+
+ /* client was holding resources for this channel so
+ * free it
+ */
+ if (ack == DMA_ACK) {
+ dma_chan_put(chan);
+ chan->client_count--;
+ }
+ }
+
+ mutex_unlock(&dma_list_mutex);
+}
+
+/**
+ * dma_async_client_register - register a &dma_client
+ * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask'
+ */
+void dma_async_client_register(struct dma_client *client)
+{
+ /* validate client data */
+ BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) &&
+ !client->slave);
+
+ mutex_lock(&dma_list_mutex);
+ list_add_tail(&client->global_node, &dma_client_list);
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_client_register);
+
+/**
+ * dma_async_client_unregister - unregister a client and free the &dma_client
+ * @client: &dma_client to free
+ *
+ * Force frees any allocated DMA channels, frees the &dma_client memory
+ */
+void dma_async_client_unregister(struct dma_client *client)
+{
+ struct dma_device *device;
+ struct dma_chan *chan;
+ enum dma_state_client ack;
+
+ if (!client)
+ return;
+
+ mutex_lock(&dma_list_mutex);
+ /* free all channels the client is holding */
+ list_for_each_entry(device, &dma_device_list, global_node)
+ list_for_each_entry(chan, &device->channels, device_node) {
+ ack = client->event_callback(client, chan,
+ DMA_RESOURCE_REMOVED);
+
+ if (ack == DMA_ACK) {
+ dma_chan_put(chan);
+ chan->client_count--;
+ }
+ }
+
+ list_del(&client->global_node);
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_client_unregister);
+
+/**
+ * dma_async_client_chan_request - send all available channels to the
+ * client that satisfy the capability mask
+ * @client - requester
+ */
+void dma_async_client_chan_request(struct dma_client *client)
+{
+ mutex_lock(&dma_list_mutex);
+ dma_client_chan_alloc(client);
+ mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_client_chan_request);
+
+/**
+ * dma_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+ static int id;
+ int chancnt = 0, rc;
+ struct dma_chan* chan;
+
+ if (!device)
+ return -ENODEV;
+
+ /* validate device routines */
+ BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
+ !device->device_prep_dma_memcpy);
+ BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
+ !device->device_prep_dma_xor);
+ BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
+ !device->device_prep_dma_zero_sum);
+ BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+ !device->device_prep_dma_memset);
+ BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
+ !device->device_prep_dma_interrupt);
+ BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+ !device->device_prep_slave_sg);
+ BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+ !device->device_terminate_all);
+
+ BUG_ON(!device->device_alloc_chan_resources);
+ BUG_ON(!device->device_free_chan_resources);
+ BUG_ON(!device->device_is_tx_complete);
+ BUG_ON(!device->device_issue_pending);
+ BUG_ON(!device->dev);
+
+ init_completion(&device->done);
+ kref_init(&device->refcount);
+
+ mutex_lock(&dma_list_mutex);
+ device->dev_id = id++;
+ mutex_unlock(&dma_list_mutex);
+
+ /* represent channels in sysfs. Probably want devs too */
+ list_for_each_entry(chan, &device->channels, device_node) {
+ chan->local = alloc_percpu(typeof(*chan->local));
+ if (chan->local == NULL)
+ continue;
+
+ chan->chan_id = chancnt++;
+ chan->dev.class = &dma_devclass;
+ chan->dev.parent = device->dev;
+ dev_set_name(&chan->dev, "dma%dchan%d",
+ device->dev_id, chan->chan_id);
+
+ rc = device_register(&chan->dev);
+ if (rc) {
+ chancnt--;
+ free_percpu(chan->local);
+ chan->local = NULL;
+ goto err_out;
+ }
+
+ /* One for the channel, one of the class device */
+ kref_get(&device->refcount);
+ kref_get(&device->refcount);
+ kref_init(&chan->refcount);
+ chan->client_count = 0;
+ chan->slow_ref = 0;
+ INIT_RCU_HEAD(&chan->rcu);
+ }
+
+ mutex_lock(&dma_list_mutex);
+ list_add_tail(&device->global_node, &dma_device_list);
+ mutex_unlock(&dma_list_mutex);
+
+ dma_clients_notify_available();
+
+ return 0;
+
+err_out:
+ list_for_each_entry(chan, &device->channels, device_node) {
+ if (chan->local == NULL)
+ continue;
+ kref_put(&device->refcount, dma_async_device_cleanup);
+ device_unregister(&chan->dev);
+ chancnt--;
+ free_percpu(chan->local);
+ }
+ return rc;
+}
+EXPORT_SYMBOL(dma_async_device_register);
+
+/**
+ * dma_async_device_cleanup - function called when all references are released
+ * @kref: kernel reference object
+ */
+static void dma_async_device_cleanup(struct kref *kref)
+{
+ struct dma_device *device;
+
+ device = container_of(kref, struct dma_device, refcount);
+ complete(&device->done);
+}
+
+/**
+ * dma_async_device_unregister - unregisters DMA devices
+ * @device: &dma_device
+ */
+void dma_async_device_unregister(struct dma_device *device)
+{
+ struct dma_chan *chan;
+
+ mutex_lock(&dma_list_mutex);
+ list_del(&device->global_node);
+ mutex_unlock(&dma_list_mutex);
+
+ list_for_each_entry(chan, &device->channels, device_node) {
+ dma_clients_notify_removed(chan);
+ device_unregister(&chan->dev);
+ dma_chan_release(chan);
+ }
+
+ kref_put(&device->refcount, dma_async_device_cleanup);
+ wait_for_completion(&device->done);
+}
+EXPORT_SYMBOL(dma_async_device_unregister);
+
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+ void *src, size_t len)
+{
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int cpu;
+
+ dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
+ dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
+ tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
+ DMA_CTRL_ACK);
+
+ if (!tx) {
+ dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+ dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ return -ENOMEM;
+ }
+
+ tx->callback = NULL;
+ cookie = tx->tx_submit(tx);
+
+ cpu = get_cpu();
+ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+ per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+ put_cpu();
+
+ return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+ unsigned int offset, void *kdata, size_t len)
+{
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int cpu;
+
+ dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
+ dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
+ tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
+ DMA_CTRL_ACK);
+
+ if (!tx) {
+ dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ return -ENOMEM;
+ }
+
+ tx->callback = NULL;
+ cookie = tx->tx_submit(tx);
+
+ cpu = get_cpu();
+ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+ per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+ put_cpu();
+
+ return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
+/**
+ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
+ * @chan: DMA channel to offload copy to
+ * @dest_pg: destination page
+ * @dest_off: offset in page to copy to
+ * @src_pg: source page
+ * @src_off: offset in page to copy from
+ * @len: length
+ *
+ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
+ * address according to the DMA mapping API rules for streaming mappings.
+ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
+ * (kernel memory or locked user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
+ unsigned int dest_off, struct page *src_pg, unsigned int src_off,
+ size_t len)
+{
+ struct dma_device *dev = chan->device;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int cpu;
+
+ dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
+ dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
+ DMA_FROM_DEVICE);
+ tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
+ DMA_CTRL_ACK);
+
+ if (!tx) {
+ dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
+ dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+ return -ENOMEM;
+ }
+
+ tx->callback = NULL;
+ cookie = tx->tx_submit(tx);
+
+ cpu = get_cpu();
+ per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+ per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+ put_cpu();
+
+ return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
+
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+ struct dma_chan *chan)
+{
+ tx->chan = chan;
+ spin_lock_init(&tx->lock);
+}
+EXPORT_SYMBOL(dma_async_tx_descriptor_init);
+
+static int __init dma_bus_init(void)
+{
+ mutex_init(&dma_list_mutex);
+ return class_register(&dma_devclass);
+}
+subsys_initcall(dma_bus_init);
+
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644
index 0000000..ed9636b
--- /dev/null
+++ b/drivers/dma/dmatest.c
@@ -0,0 +1,449 @@
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, S_IRUGO);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_channel[20];
+module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static char test_device[20];
+module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, S_IRUGO);
+MODULE_PARM_DESC(threads_per_chan,
+ "Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(nr_channels,
+ "Maximum number of channels to use (default: all)");
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC 0x80
+#define PATTERN_DST 0x00
+#define PATTERN_COPY 0x40
+#define PATTERN_OVERWRITE 0x20
+#define PATTERN_COUNT_MASK 0x1f
+
+struct dmatest_thread {
+ struct list_head node;
+ struct task_struct *task;
+ struct dma_chan *chan;
+ u8 *srcbuf;
+ u8 *dstbuf;
+};
+
+struct dmatest_chan {
+ struct list_head node;
+ struct dma_chan *chan;
+ struct list_head threads;
+};
+
+/*
+ * These are protected by dma_list_mutex since they're only used by
+ * the DMA client event callback
+ */
+static LIST_HEAD(dmatest_channels);
+static unsigned int nr_channels;
+
+static bool dmatest_match_channel(struct dma_chan *chan)
+{
+ if (test_channel[0] == '\0')
+ return true;
+ return strcmp(dev_name(&chan->dev), test_channel) == 0;
+}
+
+static bool dmatest_match_device(struct dma_device *device)
+{
+ if (test_device[0] == '\0')
+ return true;
+ return strcmp(dev_name(device->dev), test_device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+ unsigned long buf;
+
+ get_random_bytes(&buf, sizeof(buf));
+ return buf;
+}
+
+static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < start; i++)
+ buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < start + len; i++)
+ buf[i] = PATTERN_SRC | PATTERN_COPY
+ | (~i & PATTERN_COUNT_MASK);;
+ for ( ; i < test_buf_size; i++)
+ buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+}
+
+static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len)
+{
+ unsigned int i;
+
+ for (i = 0; i < start; i++)
+ buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < start + len; i++)
+ buf[i] = PATTERN_DST | PATTERN_OVERWRITE
+ | (~i & PATTERN_COUNT_MASK);
+ for ( ; i < test_buf_size; i++)
+ buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+ unsigned int counter, bool is_srcbuf)
+{
+ u8 diff = actual ^ pattern;
+ u8 expected = pattern | (~counter & PATTERN_COUNT_MASK);
+ const char *thread_name = current->comm;
+
+ if (is_srcbuf)
+ pr_warning("%s: srcbuf[0x%x] overwritten!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if ((pattern & PATTERN_COPY)
+ && (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+ pr_warning("%s: dstbuf[0x%x] not copied!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else if (diff & PATTERN_SRC)
+ pr_warning("%s: dstbuf[0x%x] was copied!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+ else
+ pr_warning("%s: dstbuf[0x%x] mismatch!"
+ " Expected %02x, got %02x\n",
+ thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 *buf, unsigned int start,
+ unsigned int end, unsigned int counter, u8 pattern,
+ bool is_srcbuf)
+{
+ unsigned int i;
+ unsigned int error_count = 0;
+ u8 actual;
+
+ for (i = start; i < end; i++) {
+ actual = buf[i];
+ if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) {
+ if (error_count < 32)
+ dmatest_mismatch(actual, pattern, i, counter,
+ is_srcbuf);
+ error_count++;
+ }
+ counter++;
+ }
+
+ if (error_count > 32)
+ pr_warning("%s: %u errors suppressed\n",
+ current->comm, error_count - 32);
+
+ return error_count;
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets until it is told to exit by kthread_stop(). There may be
+ * multiple threads running this function in parallel for a single
+ * channel, and there may be multiple channels being tested in
+ * parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+ struct dmatest_thread *thread = data;
+ struct dma_chan *chan;
+ const char *thread_name;
+ unsigned int src_off, dst_off, len;
+ unsigned int error_count;
+ unsigned int failed_tests = 0;
+ unsigned int total_tests = 0;
+ dma_cookie_t cookie;
+ enum dma_status status;
+ int ret;
+
+ thread_name = current->comm;
+
+ ret = -ENOMEM;
+ thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL);
+ if (!thread->srcbuf)
+ goto err_srcbuf;
+ thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL);
+ if (!thread->dstbuf)
+ goto err_dstbuf;
+
+ smp_rmb();
+ chan = thread->chan;
+ dma_chan_get(chan);
+
+ while (!kthread_should_stop()) {
+ total_tests++;
+
+ len = dmatest_random() % test_buf_size + 1;
+ src_off = dmatest_random() % (test_buf_size - len + 1);
+ dst_off = dmatest_random() % (test_buf_size - len + 1);
+
+ dmatest_init_srcbuf(thread->srcbuf, src_off, len);
+ dmatest_init_dstbuf(thread->dstbuf, dst_off, len);
+
+ cookie = dma_async_memcpy_buf_to_buf(chan,
+ thread->dstbuf + dst_off,
+ thread->srcbuf + src_off,
+ len);
+ if (dma_submit_error(cookie)) {
+ pr_warning("%s: #%u: submit error %d with src_off=0x%x "
+ "dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1, cookie,
+ src_off, dst_off, len);
+ msleep(100);
+ failed_tests++;
+ continue;
+ }
+ dma_async_memcpy_issue_pending(chan);
+
+ do {
+ msleep(1);
+ status = dma_async_memcpy_complete(
+ chan, cookie, NULL, NULL);
+ } while (status == DMA_IN_PROGRESS);
+
+ if (status == DMA_ERROR) {
+ pr_warning("%s: #%u: error during copy\n",
+ thread_name, total_tests - 1);
+ failed_tests++;
+ continue;
+ }
+
+ error_count = 0;
+
+ pr_debug("%s: verifying source buffer...\n", thread_name);
+ error_count += dmatest_verify(thread->srcbuf, 0, src_off,
+ 0, PATTERN_SRC, true);
+ error_count += dmatest_verify(thread->srcbuf, src_off,
+ src_off + len, src_off,
+ PATTERN_SRC | PATTERN_COPY, true);
+ error_count += dmatest_verify(thread->srcbuf, src_off + len,
+ test_buf_size, src_off + len,
+ PATTERN_SRC, true);
+
+ pr_debug("%s: verifying dest buffer...\n",
+ thread->task->comm);
+ error_count += dmatest_verify(thread->dstbuf, 0, dst_off,
+ 0, PATTERN_DST, false);
+ error_count += dmatest_verify(thread->dstbuf, dst_off,
+ dst_off + len, src_off,
+ PATTERN_SRC | PATTERN_COPY, false);
+ error_count += dmatest_verify(thread->dstbuf, dst_off + len,
+ test_buf_size, dst_off + len,
+ PATTERN_DST, false);
+
+ if (error_count) {
+ pr_warning("%s: #%u: %u errors with "
+ "src_off=0x%x dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1, error_count,
+ src_off, dst_off, len);
+ failed_tests++;
+ } else {
+ pr_debug("%s: #%u: No errors with "
+ "src_off=0x%x dst_off=0x%x len=0x%x\n",
+ thread_name, total_tests - 1,
+ src_off, dst_off, len);
+ }
+ }
+
+ ret = 0;
+ dma_chan_put(chan);
+ kfree(thread->dstbuf);
+err_dstbuf:
+ kfree(thread->srcbuf);
+err_srcbuf:
+ pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
+ thread_name, total_tests, failed_tests, ret);
+ return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+ struct dmatest_thread *thread;
+ struct dmatest_thread *_thread;
+ int ret;
+
+ list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+ ret = kthread_stop(thread->task);
+ pr_debug("dmatest: thread %s exited with status %d\n",
+ thread->task->comm, ret);
+ list_del(&thread->node);
+ kfree(thread);
+ }
+ kfree(dtc);
+}
+
+static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
+{
+ struct dmatest_chan *dtc;
+ struct dmatest_thread *thread;
+ unsigned int i;
+
+ /* Have we already been told about this channel? */
+ list_for_each_entry(dtc, &dmatest_channels, node)
+ if (dtc->chan == chan)
+ return DMA_DUP;
+
+ dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+ if (!dtc) {
+ pr_warning("dmatest: No memory for %s\n", dev_name(&chan->dev));
+ return DMA_NAK;
+ }
+
+ dtc->chan = chan;
+ INIT_LIST_HEAD(&dtc->threads);
+
+ for (i = 0; i < threads_per_chan; i++) {
+ thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+ if (!thread) {
+ pr_warning("dmatest: No memory for %s-test%u\n",
+ dev_name(&chan->dev), i);
+ break;
+ }
+ thread->chan = dtc->chan;
+ smp_wmb();
+ thread->task = kthread_run(dmatest_func, thread, "%s-test%u",
+ dev_name(&chan->dev), i);
+ if (IS_ERR(thread->task)) {
+ pr_warning("dmatest: Failed to run thread %s-test%u\n",
+ dev_name(&chan->dev), i);
+ kfree(thread);
+ break;
+ }
+
+ /* srcbuf and dstbuf are allocated by the thread itself */
+
+ list_add_tail(&thread->node, &dtc->threads);
+ }
+
+ pr_info("dmatest: Started %u threads using %s\n", i, dev_name(&chan->dev));
+
+ list_add_tail(&dtc->node, &dmatest_channels);
+ nr_channels++;
+
+ return DMA_ACK;
+}
+
+static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan)
+{
+ struct dmatest_chan *dtc, *_dtc;
+
+ list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
+ if (dtc->chan == chan) {
+ list_del(&dtc->node);
+ dmatest_cleanup_channel(dtc);
+ pr_debug("dmatest: lost channel %s\n",
+ dev_name(&chan->dev));
+ return DMA_ACK;
+ }
+ }
+
+ return DMA_DUP;
+}
+
+/*
+ * Start testing threads as new channels are assigned to us, and kill
+ * them when the channels go away.
+ *
+ * When we unregister the client, all channels are removed so this
+ * will also take care of cleaning things up when the module is
+ * unloaded.
+ */
+static enum dma_state_client
+dmatest_event(struct dma_client *client, struct dma_chan *chan,
+ enum dma_state state)
+{
+ enum dma_state_client ack = DMA_NAK;
+
+ switch (state) {
+ case DMA_RESOURCE_AVAILABLE:
+ if (!dmatest_match_channel(chan)
+ || !dmatest_match_device(chan->device))
+ ack = DMA_DUP;
+ else if (max_channels && nr_channels >= max_channels)
+ ack = DMA_NAK;
+ else
+ ack = dmatest_add_channel(chan);
+ break;
+
+ case DMA_RESOURCE_REMOVED:
+ ack = dmatest_remove_channel(chan);
+ break;
+
+ default:
+ pr_info("dmatest: Unhandled event %u (%s)\n",
+ state, dev_name(&chan->dev));
+ break;
+ }
+
+ return ack;
+}
+
+static struct dma_client dmatest_client = {
+ .event_callback = dmatest_event,
+};
+
+static int __init dmatest_init(void)
+{
+ dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask);
+ dma_async_client_register(&dmatest_client);
+ dma_async_client_chan_request(&dmatest_client);
+
+ return 0;
+}
+module_init(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+ dma_async_client_unregister(&dmatest_client);
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
new file mode 100644
index 0000000..0778d99
--- /dev/null
+++ b/drivers/dma/dw_dmac.c
@@ -0,0 +1,1122 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
+ * AVR32 systems.)
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dw_dmac_regs.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more). See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ *
+ * The driver has currently been tested only with the Atmel AT32AP7000,
+ * which does not support descriptor writeback.
+ */
+
+/* NOTE: DMS+SMS is system-specific. We should get this information
+ * from the platform code somehow.
+ */
+#define DWC_DEFAULT_CTLLO (DWC_CTLL_DST_MSIZE(0) \
+ | DWC_CTLL_SRC_MSIZE(0) \
+ | DWC_CTLL_DMS(0) \
+ | DWC_CTLL_SMS(1) \
+ | DWC_CTLL_LLP_D_EN \
+ | DWC_CTLL_LLP_S_EN)
+
+/*
+ * This is configuration-dependent and usually a funny size like 4095.
+ * Let's round it down to the nearest power of two.
+ *
+ * Note that this is a transfer count, i.e. if we transfer 32-bit
+ * words, we can do 8192 bytes per descriptor.
+ *
+ * This parameter is also system-specific.
+ */
+#define DWC_MAX_COUNT 2048U
+
+/*
+ * Number of descriptors to allocate for each channel. This should be
+ * made configurable somehow; preferably, the clients (at least the
+ * ones using slave transfers) should be able to give us a hint.
+ */
+#define NR_DESCS_PER_CHANNEL 64
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because we're not relying on writeback from the controller (it may not
+ * even be configured into the core!) we don't need to use dma_pool. These
+ * descriptors -- and associated data -- are cacheable. We do need to make
+ * sure their dcache entries are written back before handing them off to
+ * the controller, though.
+ */
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+ return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
+{
+ return list_entry(dwc->queue.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+ struct dw_desc *desc, *_desc;
+ struct dw_desc *ret = NULL;
+ unsigned int i = 0;
+
+ spin_lock_bh(&dwc->lock);
+ list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+ if (async_tx_test_ack(&desc->txd)) {
+ list_del(&desc->desc_node);
+ ret = desc;
+ break;
+ }
+ dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc);
+ i++;
+ }
+ spin_unlock_bh(&dwc->lock);
+
+ dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i);
+
+ return ret;
+}
+
+static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ struct dw_desc *child;
+
+ list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ dma_sync_single_for_cpu(dwc->chan.dev.parent,
+ child->txd.phys, sizeof(child->lli),
+ DMA_TO_DEVICE);
+ dma_sync_single_for_cpu(dwc->chan.dev.parent,
+ desc->txd.phys, sizeof(desc->lli),
+ DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ if (desc) {
+ struct dw_desc *child;
+
+ dwc_sync_desc_for_cpu(dwc, desc);
+
+ spin_lock_bh(&dwc->lock);
+ list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ dev_vdbg(&dwc->chan.dev,
+ "moving child desc %p to freelist\n",
+ child);
+ list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+ dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc);
+ list_add(&desc->desc_node, &dwc->free_list);
+ spin_unlock_bh(&dwc->lock);
+ }
+}
+
+/* Called with dwc->lock held and bh disabled */
+static dma_cookie_t
+dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ dma_cookie_t cookie = dwc->chan.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+
+ dwc->chan.cookie = cookie;
+ desc->txd.cookie = cookie;
+
+ return cookie;
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+ struct dw_dma *dw = to_dw_dma(dwc->chan.device);
+
+ /* ASSERT: channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(&dwc->chan.dev,
+ "BUG: Attempted to start non-idle channel\n");
+ dev_err(&dwc->chan.dev,
+ " SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+ channel_readl(dwc, SAR),
+ channel_readl(dwc, DAR),
+ channel_readl(dwc, LLP),
+ channel_readl(dwc, CTL_HI),
+ channel_readl(dwc, CTL_LO));
+
+ /* The tasklet will hopefully advance the queue... */
+ return;
+ }
+
+ channel_writel(dwc, LLP, first->txd.phys);
+ channel_writel(dwc, CTL_LO,
+ DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+ channel_writel(dwc, CTL_HI, 0);
+ channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+ dma_async_tx_callback callback;
+ void *param;
+ struct dma_async_tx_descriptor *txd = &desc->txd;
+
+ dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie);
+
+ dwc->completed = txd->cookie;
+ callback = txd->callback;
+ param = txd->callback_param;
+
+ dwc_sync_desc_for_cpu(dwc, desc);
+ list_splice_init(&txd->tx_list, &dwc->free_list);
+ list_move(&desc->desc_node, &dwc->free_list);
+
+ /*
+ * We use dma_unmap_page() regardless of how the buffers were
+ * mapped before they were submitted...
+ */
+ if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len,
+ DMA_FROM_DEVICE);
+ if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
+ dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len,
+ DMA_TO_DEVICE);
+
+ /*
+ * The API requires that no submissions are done from a
+ * callback, so we don't need to drop the lock here
+ */
+ if (callback)
+ callback(param);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_err(&dwc->chan.dev,
+ "BUG: XFER bit set, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+ }
+
+ /*
+ * Submit queued descriptors ASAP, i.e. before we go through
+ * the completed ones.
+ */
+ if (!list_empty(&dwc->queue))
+ dwc_dostart(dwc, dwc_first_queued(dwc));
+ list_splice_init(&dwc->active_list, &list);
+ list_splice_init(&dwc->queue, &dwc->active_list);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ dwc_descriptor_complete(dwc, desc);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ dma_addr_t llp;
+ struct dw_desc *desc, *_desc;
+ struct dw_desc *child;
+ u32 status_xfer;
+
+ /*
+ * Clear block interrupt flag before scanning so that we don't
+ * miss any, and read LLP before RAW_XFER to ensure it is
+ * valid if we decide to scan the list.
+ */
+ dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+ llp = channel_readl(dwc, LLP);
+ status_xfer = dma_readl(dw, RAW.XFER);
+
+ if (status_xfer & dwc->mask) {
+ /* Everything we've submitted is done */
+ dma_writel(dw, CLEAR.XFER, dwc->mask);
+ dwc_complete_all(dw, dwc);
+ return;
+ }
+
+ dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp);
+
+ list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+ if (desc->lli.llp == llp)
+ /* This one is currently in progress */
+ return;
+
+ list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+ if (child->lli.llp == llp)
+ /* Currently in progress */
+ return;
+
+ /*
+ * No descriptors so far seem to be in progress, i.e.
+ * this one must be done.
+ */
+ dwc_descriptor_complete(dwc, desc);
+ }
+
+ dev_err(&dwc->chan.dev,
+ "BUG: All descriptors done, but channel not idle!\n");
+
+ /* Try to continue after resetting the channel... */
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ if (!list_empty(&dwc->queue)) {
+ dwc_dostart(dwc, dwc_first_queued(dwc));
+ list_splice_init(&dwc->queue, &dwc->active_list);
+ }
+}
+
+static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+{
+ dev_printk(KERN_CRIT, &dwc->chan.dev,
+ " desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+ lli->sar, lli->dar, lli->llp,
+ lli->ctlhi, lli->ctllo);
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+ struct dw_desc *bad_desc;
+ struct dw_desc *child;
+
+ dwc_scan_descriptors(dw, dwc);
+
+ /*
+ * The descriptor currently at the head of the active list is
+ * borked. Since we don't have any way to report errors, we'll
+ * just have to scream loudly and try to carry on.
+ */
+ bad_desc = dwc_first_active(dwc);
+ list_del_init(&bad_desc->desc_node);
+ list_splice_init(&dwc->queue, dwc->active_list.prev);
+
+ /* Clear the error flag and try to restart the controller */
+ dma_writel(dw, CLEAR.ERROR, dwc->mask);
+ if (!list_empty(&dwc->active_list))
+ dwc_dostart(dwc, dwc_first_active(dwc));
+
+ /*
+ * KERN_CRITICAL may seem harsh, but since this only happens
+ * when someone submits a bad physical address in a
+ * descriptor, we should consider ourselves lucky that the
+ * controller flagged an error instead of scribbling over
+ * random memory locations.
+ */
+ dev_printk(KERN_CRIT, &dwc->chan.dev,
+ "Bad descriptor submitted for DMA!\n");
+ dev_printk(KERN_CRIT, &dwc->chan.dev,
+ " cookie: %d\n", bad_desc->txd.cookie);
+ dwc_dump_lli(dwc, &bad_desc->lli);
+ list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+ dwc_dump_lli(dwc, &child->lli);
+
+ /* Pretend the descriptor completed successfully */
+ dwc_descriptor_complete(dwc, bad_desc);
+}
+
+static void dw_dma_tasklet(unsigned long data)
+{
+ struct dw_dma *dw = (struct dw_dma *)data;
+ struct dw_dma_chan *dwc;
+ u32 status_block;
+ u32 status_xfer;
+ u32 status_err;
+ int i;
+
+ status_block = dma_readl(dw, RAW.BLOCK);
+ status_xfer = dma_readl(dw, RAW.XFER);
+ status_err = dma_readl(dw, RAW.ERROR);
+
+ dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
+ status_block, status_err);
+
+ for (i = 0; i < dw->dma.chancnt; i++) {
+ dwc = &dw->chan[i];
+ spin_lock(&dwc->lock);
+ if (status_err & (1 << i))
+ dwc_handle_error(dw, dwc);
+ else if ((status_block | status_xfer) & (1 << i))
+ dwc_scan_descriptors(dw, dwc);
+ spin_unlock(&dwc->lock);
+ }
+
+ /*
+ * Re-enable interrupts. Block Complete interrupts are only
+ * enabled if the INT_EN bit in the descriptor is set. This
+ * will trigger a scan before the whole list is done.
+ */
+ channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+ struct dw_dma *dw = dev_id;
+ u32 status;
+
+ dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
+ dma_readl(dw, STATUS_INT));
+
+ /*
+ * Just disable the interrupts. We'll turn them back on in the
+ * softirq handler.
+ */
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ status = dma_readl(dw, STATUS_INT);
+ if (status) {
+ dev_err(dw->dma.dev,
+ "BUG: Unexpected interrupts pending: 0x%x\n",
+ status);
+
+ /* Try to recover */
+ channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+ channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+ }
+
+ tasklet_schedule(&dw->tasklet);
+
+ return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct dw_desc *desc = txd_to_dw_desc(tx);
+ struct dw_dma_chan *dwc = to_dw_dma_chan(tx->chan);
+ dma_cookie_t cookie;
+
+ spin_lock_bh(&dwc->lock);
+ cookie = dwc_assign_cookie(dwc, desc);
+
+ /*
+ * REVISIT: We should attempt to chain as many descriptors as
+ * possible, perhaps even appending to those already submitted
+ * for DMA. But this is hard to do in a race-free manner.
+ */
+ if (list_empty(&dwc->active_list)) {
+ dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n",
+ desc->txd.cookie);
+ dwc_dostart(dwc, desc);
+ list_add_tail(&desc->desc_node, &dwc->active_list);
+ } else {
+ dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n",
+ desc->txd.cookie);
+
+ list_add_tail(&desc->desc_node, &dwc->queue);
+ }
+
+ spin_unlock_bh(&dwc->lock);
+
+ return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_desc *desc;
+ struct dw_desc *first;
+ struct dw_desc *prev;
+ size_t xfer_count;
+ size_t offset;
+ unsigned int src_width;
+ unsigned int dst_width;
+ u32 ctllo;
+
+ dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
+ dest, src, len, flags);
+
+ if (unlikely(!len)) {
+ dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n");
+ return NULL;
+ }
+
+ /*
+ * We can be a lot more clever here, but this should take care
+ * of the most common optimization.
+ */
+ if (!((src | dest | len) & 3))
+ src_width = dst_width = 2;
+ else if (!((src | dest | len) & 1))
+ src_width = dst_width = 1;
+ else
+ src_width = dst_width = 0;
+
+ ctllo = DWC_DEFAULT_CTLLO
+ | DWC_CTLL_DST_WIDTH(dst_width)
+ | DWC_CTLL_SRC_WIDTH(src_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2M;
+ prev = first = NULL;
+
+ for (offset = 0; offset < len; offset += xfer_count << src_width) {
+ xfer_count = min_t(size_t, (len - offset) >> src_width,
+ DWC_MAX_COUNT);
+
+ desc = dwc_desc_get(dwc);
+ if (!desc)
+ goto err_desc_get;
+
+ desc->lli.sar = src + offset;
+ desc->lli.dar = dest + offset;
+ desc->lli.ctllo = ctllo;
+ desc->lli.ctlhi = xfer_count;
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan->dev.parent,
+ prev->txd.phys, sizeof(prev->lli),
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node,
+ &first->txd.tx_list);
+ }
+ prev = desc;
+ }
+
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+ prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+ prev->lli.llp = 0;
+ dma_sync_single_for_device(chan->dev.parent,
+ prev->txd.phys, sizeof(prev->lli),
+ DMA_TO_DEVICE);
+
+ first->txd.flags = flags;
+ first->len = len;
+
+ return &first->txd;
+
+err_desc_get:
+ dwc_desc_put(dwc, first);
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+ unsigned int sg_len, enum dma_data_direction direction,
+ unsigned long flags)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma_slave *dws = dwc->dws;
+ struct dw_desc *prev;
+ struct dw_desc *first;
+ u32 ctllo;
+ dma_addr_t reg;
+ unsigned int reg_width;
+ unsigned int mem_width;
+ unsigned int i;
+ struct scatterlist *sg;
+ size_t total_len = 0;
+
+ dev_vdbg(&chan->dev, "prep_dma_slave\n");
+
+ if (unlikely(!dws || !sg_len))
+ return NULL;
+
+ reg_width = dws->slave.reg_width;
+ prev = first = NULL;
+
+ sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction);
+
+ switch (direction) {
+ case DMA_TO_DEVICE:
+ ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_DST_WIDTH(reg_width)
+ | DWC_CTLL_DST_FIX
+ | DWC_CTLL_SRC_INC
+ | DWC_CTLL_FC_M2P);
+ reg = dws->slave.tx_reg;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = dwc_desc_get(dwc);
+ if (!desc) {
+ dev_err(&chan->dev,
+ "not enough descriptors available\n");
+ goto err_desc_get;
+ }
+
+ mem = sg_phys(sg);
+ len = sg_dma_len(sg);
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.sar = mem;
+ desc->lli.dar = reg;
+ desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
+ desc->lli.ctlhi = len >> mem_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan->dev.parent,
+ prev->txd.phys,
+ sizeof(prev->lli),
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node,
+ &first->txd.tx_list);
+ }
+ prev = desc;
+ total_len += len;
+ }
+ break;
+ case DMA_FROM_DEVICE:
+ ctllo = (DWC_DEFAULT_CTLLO
+ | DWC_CTLL_SRC_WIDTH(reg_width)
+ | DWC_CTLL_DST_INC
+ | DWC_CTLL_SRC_FIX
+ | DWC_CTLL_FC_P2M);
+
+ reg = dws->slave.rx_reg;
+ for_each_sg(sgl, sg, sg_len, i) {
+ struct dw_desc *desc;
+ u32 len;
+ u32 mem;
+
+ desc = dwc_desc_get(dwc);
+ if (!desc) {
+ dev_err(&chan->dev,
+ "not enough descriptors available\n");
+ goto err_desc_get;
+ }
+
+ mem = sg_phys(sg);
+ len = sg_dma_len(sg);
+ mem_width = 2;
+ if (unlikely(mem & 3 || len & 3))
+ mem_width = 0;
+
+ desc->lli.sar = reg;
+ desc->lli.dar = mem;
+ desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
+ desc->lli.ctlhi = len >> reg_width;
+
+ if (!first) {
+ first = desc;
+ } else {
+ prev->lli.llp = desc->txd.phys;
+ dma_sync_single_for_device(chan->dev.parent,
+ prev->txd.phys,
+ sizeof(prev->lli),
+ DMA_TO_DEVICE);
+ list_add_tail(&desc->desc_node,
+ &first->txd.tx_list);
+ }
+ prev = desc;
+ total_len += len;
+ }
+ break;
+ default:
+ return NULL;
+ }
+
+ if (flags & DMA_PREP_INTERRUPT)
+ /* Trigger interrupt after last block */
+ prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+ prev->lli.llp = 0;
+ dma_sync_single_for_device(chan->dev.parent,
+ prev->txd.phys, sizeof(prev->lli),
+ DMA_TO_DEVICE);
+
+ first->len = total_len;
+
+ return &first->txd;
+
+err_desc_get:
+ dwc_desc_put(dwc, first);
+ return NULL;
+}
+
+static void dwc_terminate_all(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ /*
+ * This is only called when something went wrong elsewhere, so
+ * we don't really care about the data. Just disable the
+ * channel. We still have to poll the channel enable bit due
+ * to AHB/HSB limitations.
+ */
+ spin_lock_bh(&dwc->lock);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+
+ while (dma_readl(dw, CH_EN) & dwc->mask)
+ cpu_relax();
+
+ /* active_list entries will end up before queued entries */
+ list_splice_init(&dwc->queue, &list);
+ list_splice_init(&dwc->active_list, &list);
+
+ spin_unlock_bh(&dwc->lock);
+
+ /* Flush all pending and queued descriptors */
+ list_for_each_entry_safe(desc, _desc, &list, desc_node)
+ dwc_descriptor_complete(dwc, desc);
+}
+
+static enum dma_status
+dwc_is_tx_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done, dma_cookie_t *used)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ int ret;
+
+ last_complete = dwc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret != DMA_SUCCESS) {
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+ last_complete = dwc->completed;
+ last_used = chan->cookie;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ }
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+
+ spin_lock_bh(&dwc->lock);
+ if (!list_empty(&dwc->queue))
+ dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+ spin_unlock_bh(&dwc->lock);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan,
+ struct dma_client *client)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc;
+ struct dma_slave *slave;
+ struct dw_dma_slave *dws;
+ int i;
+ u32 cfghi;
+ u32 cfglo;
+
+ dev_vdbg(&chan->dev, "alloc_chan_resources\n");
+
+ /* Channels doing slave DMA can only handle one client. */
+ if (dwc->dws || client->slave) {
+ if (chan->client_count)
+ return -EBUSY;
+ }
+
+ /* ASSERT: channel is idle */
+ if (dma_readl(dw, CH_EN) & dwc->mask) {
+ dev_dbg(&chan->dev, "DMA channel not idle?\n");
+ return -EIO;
+ }
+
+ dwc->completed = chan->cookie = 1;
+
+ cfghi = DWC_CFGH_FIFO_MODE;
+ cfglo = 0;
+
+ slave = client->slave;
+ if (slave) {
+ /*
+ * We need controller-specific data to set up slave
+ * transfers.
+ */
+ BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev);
+
+ dws = container_of(slave, struct dw_dma_slave, slave);
+
+ dwc->dws = dws;
+ cfghi = dws->cfg_hi;
+ cfglo = dws->cfg_lo;
+ } else {
+ dwc->dws = NULL;
+ }
+
+ channel_writel(dwc, CFG_LO, cfglo);
+ channel_writel(dwc, CFG_HI, cfghi);
+
+ /*
+ * NOTE: some controllers may have additional features that we
+ * need to initialize here, like "scatter-gather" (which
+ * doesn't mean what you think it means), and status writeback.
+ */
+
+ spin_lock_bh(&dwc->lock);
+ i = dwc->descs_allocated;
+ while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+ spin_unlock_bh(&dwc->lock);
+
+ desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
+ if (!desc) {
+ dev_info(&chan->dev,
+ "only allocated %d descriptors\n", i);
+ spin_lock_bh(&dwc->lock);
+ break;
+ }
+
+ dma_async_tx_descriptor_init(&desc->txd, chan);
+ desc->txd.tx_submit = dwc_tx_submit;
+ desc->txd.flags = DMA_CTRL_ACK;
+ INIT_LIST_HEAD(&desc->txd.tx_list);
+ desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli,
+ sizeof(desc->lli), DMA_TO_DEVICE);
+ dwc_desc_put(dwc, desc);
+
+ spin_lock_bh(&dwc->lock);
+ i = ++dwc->descs_allocated;
+ }
+
+ /* Enable interrupts */
+ channel_set_bit(dw, MASK.XFER, dwc->mask);
+ channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+ channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+ spin_unlock_bh(&dwc->lock);
+
+ dev_dbg(&chan->dev,
+ "alloc_chan_resources allocated %d descriptors\n", i);
+
+ return i;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+ struct dw_dma_chan *dwc = to_dw_dma_chan(chan);
+ struct dw_dma *dw = to_dw_dma(chan->device);
+ struct dw_desc *desc, *_desc;
+ LIST_HEAD(list);
+
+ dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n",
+ dwc->descs_allocated);
+
+ /* ASSERT: channel is idle */
+ BUG_ON(!list_empty(&dwc->active_list));
+ BUG_ON(!list_empty(&dwc->queue));
+ BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+ spin_lock_bh(&dwc->lock);
+ list_splice_init(&dwc->free_list, &list);
+ dwc->descs_allocated = 0;
+ dwc->dws = NULL;
+
+ /* Disable interrupts */
+ channel_clear_bit(dw, MASK.XFER, dwc->mask);
+ channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
+ channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+ spin_unlock_bh(&dwc->lock);
+
+ list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+ dev_vdbg(&chan->dev, " freeing descriptor %p\n", desc);
+ dma_unmap_single(chan->dev.parent, desc->txd.phys,
+ sizeof(desc->lli), DMA_TO_DEVICE);
+ kfree(desc);
+ }
+
+ dev_vdbg(&chan->dev, "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void dw_dma_off(struct dw_dma *dw)
+{
+ dma_writel(dw, CFG, 0);
+
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+ cpu_relax();
+}
+
+static int __init dw_probe(struct platform_device *pdev)
+{
+ struct dw_dma_platform_data *pdata;
+ struct resource *io;
+ struct dw_dma *dw;
+ size_t size;
+ int irq;
+ int err;
+ int i;
+
+ pdata = pdev->dev.platform_data;
+ if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
+ return -EINVAL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!io)
+ return -EINVAL;
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ size = sizeof(struct dw_dma);
+ size += pdata->nr_channels * sizeof(struct dw_dma_chan);
+ dw = kzalloc(size, GFP_KERNEL);
+ if (!dw)
+ return -ENOMEM;
+
+ if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
+ err = -EBUSY;
+ goto err_kfree;
+ }
+
+ memset(dw, 0, sizeof *dw);
+
+ dw->regs = ioremap(io->start, DW_REGLEN);
+ if (!dw->regs) {
+ err = -ENOMEM;
+ goto err_release_r;
+ }
+
+ dw->clk = clk_get(&pdev->dev, "hclk");
+ if (IS_ERR(dw->clk)) {
+ err = PTR_ERR(dw->clk);
+ goto err_clk;
+ }
+ clk_enable(dw->clk);
+
+ /* force dma off, just in case */
+ dw_dma_off(dw);
+
+ err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+ if (err)
+ goto err_irq;
+
+ platform_set_drvdata(pdev, dw);
+
+ tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
+
+ dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+ INIT_LIST_HEAD(&dw->dma.channels);
+ for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) {
+ struct dw_dma_chan *dwc = &dw->chan[i];
+
+ dwc->chan.device = &dw->dma;
+ dwc->chan.cookie = dwc->completed = 1;
+ dwc->chan.chan_id = i;
+ list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
+
+ dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+ spin_lock_init(&dwc->lock);
+ dwc->mask = 1 << i;
+
+ INIT_LIST_HEAD(&dwc->active_list);
+ INIT_LIST_HEAD(&dwc->queue);
+ INIT_LIST_HEAD(&dwc->free_list);
+
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ }
+
+ /* Clear/disable all interrupts on all channels. */
+ dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+ dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+ channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+ channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+ dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+ dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+ dw->dma.dev = &pdev->dev;
+ dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+ dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+ dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+
+ dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+ dw->dma.device_terminate_all = dwc_terminate_all;
+
+ dw->dma.device_is_tx_complete = dwc_is_tx_complete;
+ dw->dma.device_issue_pending = dwc_issue_pending;
+
+ dma_writel(dw, CFG, DW_CFG_DMA_EN);
+
+ printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
+ pdev->dev.bus_id, dw->dma.chancnt);
+
+ dma_async_device_register(&dw->dma);
+
+ return 0;
+
+err_irq:
+ clk_disable(dw->clk);
+ clk_put(dw->clk);
+err_clk:
+ iounmap(dw->regs);
+ dw->regs = NULL;
+err_release_r:
+ release_resource(io);
+err_kfree:
+ kfree(dw);
+ return err;
+}
+
+static int __exit dw_remove(struct platform_device *pdev)
+{
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+ struct dw_dma_chan *dwc, *_dwc;
+ struct resource *io;
+
+ dw_dma_off(dw);
+ dma_async_device_unregister(&dw->dma);
+
+ free_irq(platform_get_irq(pdev, 0), dw);
+ tasklet_kill(&dw->tasklet);
+
+ list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+ chan.device_node) {
+ list_del(&dwc->chan.device_node);
+ channel_clear_bit(dw, CH_EN, dwc->mask);
+ }
+
+ clk_disable(dw->clk);
+ clk_put(dw->clk);
+
+ iounmap(dw->regs);
+ dw->regs = NULL;
+
+ io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ release_mem_region(io->start, DW_REGLEN);
+
+ kfree(dw);
+
+ return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+
+ dw_dma_off(platform_get_drvdata(pdev));
+ clk_disable(dw->clk);
+}
+
+static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+{
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+
+ dw_dma_off(platform_get_drvdata(pdev));
+ clk_disable(dw->clk);
+ return 0;
+}
+
+static int dw_resume_early(struct platform_device *pdev)
+{
+ struct dw_dma *dw = platform_get_drvdata(pdev);
+
+ clk_enable(dw->clk);
+ dma_writel(dw, CFG, DW_CFG_DMA_EN);
+ return 0;
+
+}
+
+static struct platform_driver dw_driver = {
+ .remove = __exit_p(dw_remove),
+ .shutdown = dw_shutdown,
+ .suspend_late = dw_suspend_late,
+ .resume_early = dw_resume_early,
+ .driver = {
+ .name = "dw_dmac",
+ },
+};
+
+static int __init dw_init(void)
+{
+ return platform_driver_probe(&dw_driver, dw_probe);
+}
+module_init(dw_init);
+
+static void __exit dw_exit(void)
+{
+ platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
+MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
new file mode 100644
index 0000000..00fdd18
--- /dev/null
+++ b/drivers/dma/dw_dmac_regs.h
@@ -0,0 +1,225 @@
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dw_dmac.h>
+
+#define DW_DMA_MAX_NR_CHANNELS 8
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name) u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+ DW_REG(SAR); /* Source Address Register */
+ DW_REG(DAR); /* Destination Address Register */
+ DW_REG(LLP); /* Linked List Pointer */
+ u32 CTL_LO; /* Control Register Low */
+ u32 CTL_HI; /* Control Register High */
+ DW_REG(SSTAT);
+ DW_REG(DSTAT);
+ DW_REG(SSTATAR);
+ DW_REG(DSTATAR);
+ u32 CFG_LO; /* Configuration Register Low */
+ u32 CFG_HI; /* Configuration Register High */
+ DW_REG(SGR);
+ DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+ DW_REG(XFER);
+ DW_REG(BLOCK);
+ DW_REG(SRC_TRAN);
+ DW_REG(DST_TRAN);
+ DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+ /* per-channel registers */
+ struct dw_dma_chan_regs CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+ /* irq handling */
+ struct dw_dma_irq_regs RAW; /* r */
+ struct dw_dma_irq_regs STATUS; /* r (raw & mask) */
+ struct dw_dma_irq_regs MASK; /* rw (set = irq enabled) */
+ struct dw_dma_irq_regs CLEAR; /* w (ack, affects "raw") */
+
+ DW_REG(STATUS_INT); /* r */
+
+ /* software handshaking */
+ DW_REG(REQ_SRC);
+ DW_REG(REQ_DST);
+ DW_REG(SGL_REQ_SRC);
+ DW_REG(SGL_REQ_DST);
+ DW_REG(LAST_SRC);
+ DW_REG(LAST_DST);
+
+ /* miscellaneous */
+ DW_REG(CFG);
+ DW_REG(CH_EN);
+ DW_REG(ID);
+ DW_REG(TEST);
+
+ /* optional encoded params, 0x3c8..0x3 */
+};
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN (1 << 0) /* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n) ((n)<<1) /* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n) ((n)<<4)
+#define DWC_CTLL_DST_INC (0<<7) /* DAR update/not */
+#define DWC_CTLL_DST_DEC (1<<7)
+#define DWC_CTLL_DST_FIX (2<<7)
+#define DWC_CTLL_SRC_INC (0<<7) /* SAR update/not */
+#define DWC_CTLL_SRC_DEC (1<<9)
+#define DWC_CTLL_SRC_FIX (2<<9)
+#define DWC_CTLL_DST_MSIZE(n) ((n)<<11) /* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n) ((n)<<14)
+#define DWC_CTLL_S_GATH_EN (1 << 17) /* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN (1 << 18) /* dst scatter, !FIX */
+#define DWC_CTLL_FC_M2M (0 << 20) /* mem-to-mem */
+#define DWC_CTLL_FC_M2P (1 << 20) /* mem-to-periph */
+#define DWC_CTLL_FC_P2M (2 << 20) /* periph-to-mem */
+#define DWC_CTLL_FC_P2P (3 << 20) /* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n) ((n)<<23) /* dst master select */
+#define DWC_CTLL_SMS(n) ((n)<<25) /* src master select */
+#define DWC_CTLL_LLP_D_EN (1 << 27) /* dest block chain */
+#define DWC_CTLL_LLP_S_EN (1 << 28) /* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_DONE 0x00001000
+#define DWC_CTLH_BLOCK_TS_MASK 0x00000fff
+
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_SUSP (1 << 8) /* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY (1 << 9) /* pause xfer */
+#define DWC_CFGL_HS_DST (1 << 10) /* handshake w/dst */
+#define DWC_CFGL_HS_SRC (1 << 11) /* handshake w/src */
+#define DWC_CFGL_MAX_BURST(x) ((x) << 20)
+#define DWC_CFGL_RELOAD_SAR (1 << 30)
+#define DWC_CFGL_RELOAD_DAR (1 << 31)
+
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGH_DS_UPD_EN (1 << 5)
+#define DWC_CFGH_SS_UPD_EN (1 << 6)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x) ((x) << 0)
+#define DWC_SGR_SGC(x) ((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x) ((x) << 0)
+#define DWC_DSR_DSC(x) ((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN (1 << 0)
+
+#define DW_REGLEN 0x400
+
+struct dw_dma_chan {
+ struct dma_chan chan;
+ void __iomem *ch_regs;
+ u8 mask;
+
+ spinlock_t lock;
+
+ /* these other elements are all protected by lock */
+ dma_cookie_t completed;
+ struct list_head active_list;
+ struct list_head queue;
+ struct list_head free_list;
+
+ struct dw_dma_slave *dws;
+
+ unsigned int descs_allocated;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+ return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+ __raw_readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+ __raw_writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+ return container_of(chan, struct dw_dma_chan, chan);
+}
+
+
+struct dw_dma {
+ struct dma_device dma;
+ void __iomem *regs;
+ struct tasklet_struct tasklet;
+ struct clk *clk;
+
+ u8 all_chan_mask;
+
+ struct dw_dma_chan chan[0];
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+ return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+ __raw_readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+ __raw_writel((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+ dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+ dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+ return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+ /* values that are not changed by hardware */
+ dma_addr_t sar;
+ dma_addr_t dar;
+ dma_addr_t llp; /* chain to next lli */
+ u32 ctllo;
+ /* values that may get written back: */
+ u32 ctlhi;
+ /* sstat and dstat can snapshot peripheral register state.
+ * silicon config may discard either or both...
+ */
+ u32 sstat;
+ u32 dstat;
+};
+
+struct dw_desc {
+ /* FIRST values the hardware uses */
+ struct dw_lli lli;
+
+ /* THEN values for driver housekeeping */
+ struct list_head desc_node;
+ struct dma_async_tx_descriptor txd;
+ size_t len;
+};
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+ return container_of(txd, struct dw_desc, txd);
+}
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
new file mode 100644
index 0000000..0b95dcc
--- /dev/null
+++ b/drivers/dma/fsldma.c
@@ -0,0 +1,1036 @@
+/*
+ * Freescale MPC85xx, MPC83xx DMA Engine support
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ * Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ * Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * Description:
+ * DMA engine driver for Freescale MPC8540 DMA controller, which is
+ * also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc.
+ * The support for MPC8349 DMA contorller is also added.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/of_platform.h>
+
+#include "fsldma.h"
+
+static void dma_init(struct fsl_dma_chan *fsl_chan)
+{
+ /* Reset the channel */
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 0, 32);
+
+ switch (fsl_chan->feature & FSL_DMA_IP_MASK) {
+ case FSL_DMA_IP_85XX:
+ /* Set the channel to below modes:
+ * EIE - Error interrupt enable
+ * EOSIE - End of segments interrupt enable (basic mode)
+ * EOLNIE - End of links interrupt enable
+ */
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EIE
+ | FSL_DMA_MR_EOLNIE | FSL_DMA_MR_EOSIE, 32);
+ break;
+ case FSL_DMA_IP_83XX:
+ /* Set the channel to below modes:
+ * EOTIE - End-of-transfer interrupt enable
+ */
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE,
+ 32);
+ break;
+ }
+
+}
+
+static void set_sr(struct fsl_dma_chan *fsl_chan, u32 val)
+{
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->sr, val, 32);
+}
+
+static u32 get_sr(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->sr, 32);
+}
+
+static void set_desc_cnt(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, u32 count)
+{
+ hw->count = CPU_TO_DMA(fsl_chan, count, 32);
+}
+
+static void set_desc_src(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t src)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+ ? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+ hw->src_addr = CPU_TO_DMA(fsl_chan, snoop_bits | src, 64);
+}
+
+static void set_desc_dest(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t dest)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+ ? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+ hw->dst_addr = CPU_TO_DMA(fsl_chan, snoop_bits | dest, 64);
+}
+
+static void set_desc_next(struct fsl_dma_chan *fsl_chan,
+ struct fsl_dma_ld_hw *hw, dma_addr_t next)
+{
+ u64 snoop_bits;
+
+ snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+ ? FSL_DMA_SNEN : 0;
+ hw->next_ln_addr = CPU_TO_DMA(fsl_chan, snoop_bits | next, 64);
+}
+
+static void set_cdar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+{
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static void set_ndar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+{
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->ndar, addr, 64);
+}
+
+static dma_addr_t get_ndar(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->ndar, 64);
+}
+
+static u32 get_bcr(struct fsl_dma_chan *fsl_chan)
+{
+ return DMA_IN(fsl_chan, &fsl_chan->reg_base->bcr, 32);
+}
+
+static int dma_is_idle(struct fsl_dma_chan *fsl_chan)
+{
+ u32 sr = get_sr(fsl_chan);
+ return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
+}
+
+static void dma_start(struct fsl_dma_chan *fsl_chan)
+{
+ u32 mr_set = 0;;
+
+ if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32);
+ mr_set |= FSL_DMA_MR_EMP_EN;
+ } else
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ & ~FSL_DMA_MR_EMP_EN, 32);
+
+ if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT)
+ mr_set |= FSL_DMA_MR_EMS_EN;
+ else
+ mr_set |= FSL_DMA_MR_CS;
+
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ | mr_set, 32);
+}
+
+static void dma_halt(struct fsl_dma_chan *fsl_chan)
+{
+ int i = 0;
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | FSL_DMA_MR_CA,
+ 32);
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & ~(FSL_DMA_MR_CS
+ | FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA), 32);
+
+ while (!dma_is_idle(fsl_chan) && (i++ < 100))
+ udelay(10);
+ if (i >= 100 && !dma_is_idle(fsl_chan))
+ dev_err(fsl_chan->dev, "DMA halt timeout!\n");
+}
+
+static void set_ld_eol(struct fsl_dma_chan *fsl_chan,
+ struct fsl_desc_sw *desc)
+{
+ desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
+ DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64) | FSL_DMA_EOL,
+ 64);
+}
+
+static void append_ld_queue(struct fsl_dma_chan *fsl_chan,
+ struct fsl_desc_sw *new_desc)
+{
+ struct fsl_desc_sw *queue_tail = to_fsl_desc(fsl_chan->ld_queue.prev);
+
+ if (list_empty(&fsl_chan->ld_queue))
+ return;
+
+ /* Link to the new descriptor physical address and
+ * Enable End-of-segment interrupt for
+ * the last link descriptor.
+ * (the previous node's next link descriptor)
+ *
+ * For FSL_DMA_IP_83xx, the snoop enable bit need be set.
+ */
+ queue_tail->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
+ new_desc->async_tx.phys | FSL_DMA_EOSIE |
+ (((fsl_chan->feature & FSL_DMA_IP_MASK)
+ == FSL_DMA_IP_83XX) ? FSL_DMA_SNEN : 0), 64);
+}
+
+/**
+ * fsl_chan_set_src_loop_size - Set source address hold transfer size
+ * @fsl_chan : Freescale DMA channel
+ * @size : Address loop size, 0 for disable loop
+ *
+ * The set source address hold transfer size. The source
+ * address hold or loop transfer size is when the DMA transfer
+ * data from source address (SA), if the loop size is 4, the DMA will
+ * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
+ * SA + 1 ... and so on.
+ */
+static void fsl_chan_set_src_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+{
+ switch (size) {
+ case 0:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
+ (~FSL_DMA_MR_SAHE), 32);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
+ FSL_DMA_MR_SAHE | (__ilog2(size) << 14),
+ 32);
+ break;
+ }
+}
+
+/**
+ * fsl_chan_set_dest_loop_size - Set destination address hold transfer size
+ * @fsl_chan : Freescale DMA channel
+ * @size : Address loop size, 0 for disable loop
+ *
+ * The set destination address hold transfer size. The destination
+ * address hold or loop transfer size is when the DMA transfer
+ * data to destination address (TA), if the loop size is 4, the DMA will
+ * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
+ * TA + 1 ... and so on.
+ */
+static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+{
+ switch (size) {
+ case 0:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
+ (~FSL_DMA_MR_DAHE), 32);
+ break;
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
+ FSL_DMA_MR_DAHE | (__ilog2(size) << 16),
+ 32);
+ break;
+ }
+}
+
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @size : Pause control size, 0 for disable external pause control.
+ * The maximum is 1024.
+ *
+ * The Freescale DMA channel can be controlled by the external
+ * signal DREQ#. The pause control size is how many bytes are allowed
+ * to transfer before pausing the channel, after which a new assertion
+ * of DREQ# resumes channel operation.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
+{
+ if (size > 1024)
+ return;
+
+ if (size) {
+ DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+ DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+ | ((__ilog2(size) << 24) & 0x0f000000),
+ 32);
+ fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+ } else
+ fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+}
+
+/**
+ * fsl_chan_toggle_ext_start - Toggle channel external start status
+ * @fsl_chan : Freescale DMA channel
+ * @enable : 0 is disabled, 1 is enabled.
+ *
+ * If enable the external start, the channel can be started by an
+ * external DMA start pin. So the dma_start() does not start the
+ * transfer immediately. The DMA channel will wait for the
+ * control pin asserted.
+ */
+static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
+{
+ if (enable)
+ fsl_chan->feature |= FSL_DMA_CHAN_START_EXT;
+ else
+ fsl_chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
+ unsigned long flags;
+ dma_cookie_t cookie;
+
+ /* cookie increment and adding to ld_queue must be atomic */
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ cookie = fsl_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ desc->async_tx.cookie = cookie;
+ fsl_chan->common.cookie = desc->async_tx.cookie;
+
+ append_ld_queue(fsl_chan, desc);
+ list_splice_init(&desc->async_tx.tx_list, fsl_chan->ld_queue.prev);
+
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+
+ return cookie;
+}
+
+/**
+ * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
+ * @fsl_chan : Freescale DMA channel
+ *
+ * Return - The descriptor allocated. NULL for failed.
+ */
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
+ struct fsl_dma_chan *fsl_chan)
+{
+ dma_addr_t pdesc;
+ struct fsl_desc_sw *desc_sw;
+
+ desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
+ if (desc_sw) {
+ memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+ dma_async_tx_descriptor_init(&desc_sw->async_tx,
+ &fsl_chan->common);
+ desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
+ INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
+ desc_sw->async_tx.phys = pdesc;
+ }
+
+ return desc_sw;
+}
+
+
+/**
+ * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
+ * @fsl_chan : Freescale DMA channel
+ *
+ * This function will create a dma pool for descriptor allocation.
+ *
+ * Return - The number of descriptors allocated.
+ */
+static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
+ struct dma_client *client)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+
+ /* Has this channel already been allocated? */
+ if (fsl_chan->desc_pool)
+ return 1;
+
+ /* We need the descriptor to be aligned to 32bytes
+ * for meeting FSL DMA specification requirement.
+ */
+ fsl_chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
+ fsl_chan->dev, sizeof(struct fsl_desc_sw),
+ 32, 0);
+ if (!fsl_chan->desc_pool) {
+ dev_err(fsl_chan->dev, "No memory for channel %d "
+ "descriptor dma pool.\n", fsl_chan->id);
+ return 0;
+ }
+
+ return 1;
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+ struct fsl_desc_sw *desc, *_desc;
+ unsigned long flags;
+
+ dev_dbg(fsl_chan->dev, "Free all channel resources.\n");
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+ list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev,
+ "LD %p will be released.\n", desc);
+#endif
+ list_del(&desc->node);
+ /* free link descriptor */
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+ }
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+ dma_pool_destroy(fsl_chan->desc_pool);
+
+ fsl_chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *new;
+
+ if (!chan)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev, "No free memory for link descriptor\n");
+ return NULL;
+ }
+
+ new->async_tx.cookie = -EBUSY;
+ new->async_tx.flags = flags;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &new->async_tx.tx_list);
+
+ /* Set End-of-link to the last link descriptor of new list*/
+ set_ld_eol(fsl_chan, new);
+
+ return &new->async_tx;
+}
+
+static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
+ struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+ size_t len, unsigned long flags)
+{
+ struct fsl_dma_chan *fsl_chan;
+ struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+ size_t copy;
+ LIST_HEAD(link_chain);
+
+ if (!chan)
+ return NULL;
+
+ if (!len)
+ return NULL;
+
+ fsl_chan = to_fsl_chan(chan);
+
+ do {
+
+ /* Allocate the link descriptor from DMA pool */
+ new = fsl_dma_alloc_descriptor(fsl_chan);
+ if (!new) {
+ dev_err(fsl_chan->dev,
+ "No free memory for link descriptor\n");
+ return NULL;
+ }
+#ifdef FSL_DMA_LD_DEBUG
+ dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+ copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
+
+ set_desc_cnt(fsl_chan, &new->hw, copy);
+ set_desc_src(fsl_chan, &new->hw, dma_src);
+ set_desc_dest(fsl_chan, &new->hw, dma_dest);
+
+ if (!first)
+ first = new;
+ else
+ set_desc_next(fsl_chan, &prev->hw, new->async_tx.phys);
+
+ new->async_tx.cookie = 0;
+ async_tx_ack(&new->async_tx);
+
+ prev = new;
+ len -= copy;
+ dma_src += copy;
+ dma_dest += copy;
+
+ /* Insert the link descriptor to the LD ring */
+ list_add_tail(&new->node, &first->async_tx.tx_list);
+ } while (len);
+
+ new->async_tx.flags = flags; /* client is in control of this ack */
+ new->async_tx.cookie = -EBUSY;
+
+ /* Set End-of-link to the last link descriptor of new list*/
+ set_ld_eol(fsl_chan, new);
+
+ return first ? &first->async_tx : NULL;
+}
+
+/**
+ * fsl_dma_update_completed_cookie - Update the completed cookie.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_update_completed_cookie(struct fsl_dma_chan *fsl_chan)
+{
+ struct fsl_desc_sw *cur_desc, *desc;
+ dma_addr_t ld_phy;
+
+ ld_phy = get_cdar(fsl_chan) & FSL_DMA_NLDA_MASK;
+
+ if (ld_phy) {
+ cur_desc = NULL;
+ list_for_each_entry(desc, &fsl_chan->ld_queue, node)
+ if (desc->async_tx.phys == ld_phy) {
+ cur_desc = desc;
+ break;
+ }
+
+ if (cur_desc && cur_desc->async_tx.cookie) {
+ if (dma_is_idle(fsl_chan))
+ fsl_chan->completed_cookie =
+ cur_desc->async_tx.cookie;
+ else
+ fsl_chan->completed_cookie =
+ cur_desc->async_tx.cookie - 1;
+ }
+ }
+}
+
+/**
+ * fsl_chan_ld_cleanup - Clean up link descriptors
+ * @fsl_chan : Freescale DMA channel
+ *
+ * This function clean up the ld_queue of DMA channel.
+ * If 'in_intr' is set, the function will move the link descriptor to
+ * the recycle list. Otherwise, free it directly.
+ */
+static void fsl_chan_ld_cleanup(struct fsl_dma_chan *fsl_chan)
+{
+ struct fsl_desc_sw *desc, *_desc;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ dev_dbg(fsl_chan->dev, "chan completed_cookie = %d\n",
+ fsl_chan->completed_cookie);
+ list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+ dma_async_tx_callback callback;
+ void *callback_param;
+
+ if (dma_async_is_complete(desc->async_tx.cookie,
+ fsl_chan->completed_cookie, fsl_chan->common.cookie)
+ == DMA_IN_PROGRESS)
+ break;
+
+ callback = desc->async_tx.callback;
+ callback_param = desc->async_tx.callback_param;
+
+ /* Remove from ld_queue list */
+ list_del(&desc->node);
+
+ dev_dbg(fsl_chan->dev, "link descriptor %p will be recycle.\n",
+ desc);
+ dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+
+ /* Run the link descriptor callback function */
+ if (callback) {
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+ dev_dbg(fsl_chan->dev, "link descriptor %p callback\n",
+ desc);
+ callback(callback_param);
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+ }
+ }
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
+/**
+ * fsl_chan_xfer_ld_queue - Transfer link descriptors in channel ld_queue.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
+{
+ struct list_head *ld_node;
+ dma_addr_t next_dest_addr;
+ unsigned long flags;
+
+ if (!dma_is_idle(fsl_chan))
+ return;
+
+ dma_halt(fsl_chan);
+
+ /* If there are some link descriptors
+ * not transfered in queue. We need to start it.
+ */
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+ /* Find the first un-transfer desciptor */
+ for (ld_node = fsl_chan->ld_queue.next;
+ (ld_node != &fsl_chan->ld_queue)
+ && (dma_async_is_complete(
+ to_fsl_desc(ld_node)->async_tx.cookie,
+ fsl_chan->completed_cookie,
+ fsl_chan->common.cookie) == DMA_SUCCESS);
+ ld_node = ld_node->next);
+
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+
+ if (ld_node != &fsl_chan->ld_queue) {
+ /* Get the ld start address from ld_queue */
+ next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
+ dev_dbg(fsl_chan->dev, "xfer LDs staring from %p\n",
+ (void *)next_dest_addr);
+ set_cdar(fsl_chan, next_dest_addr);
+ dma_start(fsl_chan);
+ } else {
+ set_cdar(fsl_chan, 0);
+ set_ndar(fsl_chan, 0);
+ }
+}
+
+/**
+ * fsl_dma_memcpy_issue_pending - Issue the DMA start command
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+
+#ifdef FSL_DMA_LD_DEBUG
+ struct fsl_desc_sw *ld;
+ unsigned long flags;
+
+ spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+ if (list_empty(&fsl_chan->ld_queue)) {
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+ return;
+ }
+
+ dev_dbg(fsl_chan->dev, "--memcpy issue--\n");
+ list_for_each_entry(ld, &fsl_chan->ld_queue, node) {
+ int i;
+ dev_dbg(fsl_chan->dev, "Ch %d, LD %08x\n",
+ fsl_chan->id, ld->async_tx.phys);
+ for (i = 0; i < 8; i++)
+ dev_dbg(fsl_chan->dev, "LD offset %d: %08x\n",
+ i, *(((u32 *)&ld->hw) + i));
+ }
+ dev_dbg(fsl_chan->dev, "----------------\n");
+ spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+#endif
+
+ fsl_chan_xfer_ld_queue(fsl_chan);
+}
+
+/**
+ * fsl_dma_is_complete - Determine the DMA status
+ * @fsl_chan : Freescale DMA channel
+ */
+static enum dma_status fsl_dma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+
+ fsl_chan_ld_cleanup(fsl_chan);
+
+ last_used = chan->cookie;
+ last_complete = fsl_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data)
+{
+ struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
+ u32 stat;
+ int update_cookie = 0;
+ int xfer_ld_q = 0;
+
+ stat = get_sr(fsl_chan);
+ dev_dbg(fsl_chan->dev, "event: channel %d, stat = 0x%x\n",
+ fsl_chan->id, stat);
+ set_sr(fsl_chan, stat); /* Clear the event register */
+
+ stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
+ if (!stat)
+ return IRQ_NONE;
+
+ if (stat & FSL_DMA_SR_TE)
+ dev_err(fsl_chan->dev, "Transfer Error!\n");
+
+ /* Programming Error
+ * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+ * triger a PE interrupt.
+ */
+ if (stat & FSL_DMA_SR_PE) {
+ dev_dbg(fsl_chan->dev, "event: Programming Error INT\n");
+ if (get_bcr(fsl_chan) == 0) {
+ /* BCR register is 0, this is a DMA_INTERRUPT async_tx.
+ * Now, update the completed cookie, and continue the
+ * next uncompleted transfer.
+ */
+ update_cookie = 1;
+ xfer_ld_q = 1;
+ }
+ stat &= ~FSL_DMA_SR_PE;
+ }
+
+ /* If the link descriptor segment transfer finishes,
+ * we will recycle the used descriptor.
+ */
+ if (stat & FSL_DMA_SR_EOSI) {
+ dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n");
+ dev_dbg(fsl_chan->dev, "event: clndar %p, nlndar %p\n",
+ (void *)get_cdar(fsl_chan), (void *)get_ndar(fsl_chan));
+ stat &= ~FSL_DMA_SR_EOSI;
+ update_cookie = 1;
+ }
+
+ /* For MPC8349, EOCDI event need to update cookie
+ * and start the next transfer if it exist.
+ */
+ if (stat & FSL_DMA_SR_EOCDI) {
+ dev_dbg(fsl_chan->dev, "event: End-of-Chain link INT\n");
+ stat &= ~FSL_DMA_SR_EOCDI;
+ update_cookie = 1;
+ xfer_ld_q = 1;
+ }
+
+ /* If it current transfer is the end-of-transfer,
+ * we should clear the Channel Start bit for
+ * prepare next transfer.
+ */
+ if (stat & FSL_DMA_SR_EOLNI) {
+ dev_dbg(fsl_chan->dev, "event: End-of-link INT\n");
+ stat &= ~FSL_DMA_SR_EOLNI;
+ xfer_ld_q = 1;
+ }
+
+ if (update_cookie)
+ fsl_dma_update_completed_cookie(fsl_chan);
+ if (xfer_ld_q)
+ fsl_chan_xfer_ld_queue(fsl_chan);
+ if (stat)
+ dev_dbg(fsl_chan->dev, "event: unhandled sr 0x%02x\n",
+ stat);
+
+ dev_dbg(fsl_chan->dev, "event: Exit\n");
+ tasklet_schedule(&fsl_chan->tasklet);
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_dma_do_interrupt(int irq, void *data)
+{
+ struct fsl_dma_device *fdev = (struct fsl_dma_device *)data;
+ u32 gsr;
+ int ch_nr;
+
+ gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->reg_base)
+ : in_le32(fdev->reg_base);
+ ch_nr = (32 - ffs(gsr)) / 8;
+
+ return fdev->chan[ch_nr] ? fsl_dma_chan_do_interrupt(irq,
+ fdev->chan[ch_nr]) : IRQ_NONE;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+ struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
+ fsl_chan_ld_cleanup(fsl_chan);
+}
+
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+ struct device_node *node, u32 feature, const char *compatible)
+{
+ struct fsl_dma_chan *new_fsl_chan;
+ int err;
+
+ /* alloc channel */
+ new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
+ if (!new_fsl_chan) {
+ dev_err(fdev->dev, "No free memory for allocating "
+ "dma channels!\n");
+ return -ENOMEM;
+ }
+
+ /* get dma channel register base */
+ err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
+ if (err) {
+ dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+ node->full_name);
+ goto err_no_reg;
+ }
+
+ new_fsl_chan->feature = feature;
+
+ if (!fdev->feature)
+ fdev->feature = new_fsl_chan->feature;
+
+ /* If the DMA device's feature is different than its channels',
+ * report the bug.
+ */
+ WARN_ON(fdev->feature != new_fsl_chan->feature);
+
+ new_fsl_chan->dev = &new_fsl_chan->common.dev;
+ new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
+ new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
+
+ new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
+ if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
+ dev_err(fdev->dev, "There is no %d channel!\n",
+ new_fsl_chan->id);
+ err = -EINVAL;
+ goto err_no_chan;
+ }
+ fdev->chan[new_fsl_chan->id] = new_fsl_chan;
+ tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet,
+ (unsigned long)new_fsl_chan);
+
+ /* Init the channel */
+ dma_init(new_fsl_chan);
+
+ /* Clear cdar registers */
+ set_cdar(new_fsl_chan, 0);
+
+ switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) {
+ case FSL_DMA_IP_85XX:
+ new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+ new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+ case FSL_DMA_IP_83XX:
+ new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+ new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+ }
+
+ spin_lock_init(&new_fsl_chan->desc_lock);
+ INIT_LIST_HEAD(&new_fsl_chan->ld_queue);
+
+ new_fsl_chan->common.device = &fdev->common;
+
+ /* Add the channel to DMA device channel list */
+ list_add_tail(&new_fsl_chan->common.device_node,
+ &fdev->common.channels);
+ fdev->common.chancnt++;
+
+ new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
+ if (new_fsl_chan->irq != NO_IRQ) {
+ err = request_irq(new_fsl_chan->irq,
+ &fsl_dma_chan_do_interrupt, IRQF_SHARED,
+ "fsldma-channel", new_fsl_chan);
+ if (err) {
+ dev_err(fdev->dev, "DMA channel %s request_irq error "
+ "with return %d\n", node->full_name, err);
+ goto err_no_irq;
+ }
+ }
+
+ dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+ compatible, new_fsl_chan->irq);
+
+ return 0;
+
+err_no_irq:
+ list_del(&new_fsl_chan->common.device_node);
+err_no_chan:
+ iounmap(new_fsl_chan->reg_base);
+err_no_reg:
+ kfree(new_fsl_chan);
+ return err;
+}
+
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
+{
+ free_irq(fchan->irq, fchan);
+ list_del(&fchan->common.device_node);
+ iounmap(fchan->reg_base);
+ kfree(fchan);
+}
+
+static int __devinit of_fsl_dma_probe(struct of_device *dev,
+ const struct of_device_id *match)
+{
+ int err;
+ struct fsl_dma_device *fdev;
+ struct device_node *child;
+
+ fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
+ if (!fdev) {
+ dev_err(&dev->dev, "No enough memory for 'priv'\n");
+ return -ENOMEM;
+ }
+ fdev->dev = &dev->dev;
+ INIT_LIST_HEAD(&fdev->common.channels);
+
+ /* get DMA controller register base */
+ err = of_address_to_resource(dev->node, 0, &fdev->reg);
+ if (err) {
+ dev_err(&dev->dev, "Can't get %s property 'reg'\n",
+ dev->node->full_name);
+ goto err_no_reg;
+ }
+
+ dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
+ "controller at %p...\n",
+ match->compatible, (void *)fdev->reg.start);
+ fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end
+ - fdev->reg.start + 1);
+
+ dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+ dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+ fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
+ fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
+ fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
+ fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+ fdev->common.device_is_tx_complete = fsl_dma_is_complete;
+ fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+ fdev->common.dev = &dev->dev;
+
+ fdev->irq = irq_of_parse_and_map(dev->node, 0);
+ if (fdev->irq != NO_IRQ) {
+ err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+ "fsldma-device", fdev);
+ if (err) {
+ dev_err(&dev->dev, "DMA device request_irq error "
+ "with return %d\n", err);
+ goto err;
+ }
+ }
+
+ dev_set_drvdata(&(dev->dev), fdev);
+
+ /* We cannot use of_platform_bus_probe() because there is no
+ * of_platform_bus_remove. Instead, we manually instantiate every DMA
+ * channel object.
+ */
+ for_each_child_of_node(dev->node, child) {
+ if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+ fsl_dma_chan_probe(fdev, child,
+ FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+ "fsl,eloplus-dma-channel");
+ if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+ fsl_dma_chan_probe(fdev, child,
+ FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+ "fsl,elo-dma-channel");
+ }
+
+ dma_async_device_register(&fdev->common);
+ return 0;
+
+err:
+ iounmap(fdev->reg_base);
+err_no_reg:
+ kfree(fdev);
+ return err;
+}
+
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+ struct fsl_dma_device *fdev;
+ unsigned int i;
+
+ fdev = dev_get_drvdata(&of_dev->dev);
+
+ dma_async_device_unregister(&fdev->common);
+
+ for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+ if (fdev->chan[i])
+ fsl_dma_chan_remove(fdev->chan[i]);
+
+ if (fdev->irq != NO_IRQ)
+ free_irq(fdev->irq, fdev);
+
+ iounmap(fdev->reg_base);
+
+ kfree(fdev);
+ dev_set_drvdata(&of_dev->dev, NULL);
+
+ return 0;
+}
+
+static struct of_device_id of_fsl_dma_ids[] = {
+ { .compatible = "fsl,eloplus-dma", },
+ { .compatible = "fsl,elo-dma", },
+ {}
+};
+
+static struct of_platform_driver of_fsl_dma_driver = {
+ .name = "fsl-elo-dma",
+ .match_table = of_fsl_dma_ids,
+ .probe = of_fsl_dma_probe,
+ .remove = of_fsl_dma_remove,
+};
+
+static __init int of_fsl_dma_init(void)
+{
+ int ret;
+
+ pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+ ret = of_register_platform_driver(&of_fsl_dma_driver);
+ if (ret)
+ pr_err("fsldma: failed to register platform driver\n");
+
+ return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+ of_unregister_platform_driver(&of_fsl_dma_driver);
+}
+
+subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
new file mode 100644
index 0000000..4f21a51
--- /dev/null
+++ b/drivers/dma/fsldma.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ * Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ * Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_FSLDMA_H
+#define __DMA_FSLDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+/* Define data structures needed by Freescale
+ * MPC8540 and MPC8349 DMA controller.
+ */
+#define FSL_DMA_MR_CS 0x00000001
+#define FSL_DMA_MR_CC 0x00000002
+#define FSL_DMA_MR_CA 0x00000008
+#define FSL_DMA_MR_EIE 0x00000040
+#define FSL_DMA_MR_XFE 0x00000020
+#define FSL_DMA_MR_EOLNIE 0x00000100
+#define FSL_DMA_MR_EOLSIE 0x00000080
+#define FSL_DMA_MR_EOSIE 0x00000200
+#define FSL_DMA_MR_CDSM 0x00000010
+#define FSL_DMA_MR_CTM 0x00000004
+#define FSL_DMA_MR_EMP_EN 0x00200000
+#define FSL_DMA_MR_EMS_EN 0x00040000
+#define FSL_DMA_MR_DAHE 0x00002000
+#define FSL_DMA_MR_SAHE 0x00001000
+
+/* Special MR definition for MPC8349 */
+#define FSL_DMA_MR_EOTIE 0x00000080
+
+#define FSL_DMA_SR_CH 0x00000020
+#define FSL_DMA_SR_PE 0x00000010
+#define FSL_DMA_SR_CB 0x00000004
+#define FSL_DMA_SR_TE 0x00000080
+#define FSL_DMA_SR_EOSI 0x00000002
+#define FSL_DMA_SR_EOLSI 0x00000001
+#define FSL_DMA_SR_EOCDI 0x00000001
+#define FSL_DMA_SR_EOLNI 0x00000008
+
+#define FSL_DMA_SATR_SBPATMU 0x20000000
+#define FSL_DMA_SATR_STRANSINT_RIO 0x00c00000
+#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ 0x00050000
+#define FSL_DMA_SATR_SREADTYPE_BP_IORH 0x00020000
+#define FSL_DMA_SATR_SREADTYPE_BP_NREAD 0x00040000
+#define FSL_DMA_SATR_SREADTYPE_BP_MREAD 0x00070000
+
+#define FSL_DMA_DATR_DBPATMU 0x20000000
+#define FSL_DMA_DATR_DTRANSINT_RIO 0x00c00000
+#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE 0x00050000
+#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH 0x00010000
+
+#define FSL_DMA_EOL ((u64)0x1)
+#define FSL_DMA_SNEN ((u64)0x10)
+#define FSL_DMA_EOSIE 0x8
+#define FSL_DMA_NLDA_MASK (~(u64)0x1f)
+
+#define FSL_DMA_BCR_MAX_CNT 0x03ffffffu
+
+#define FSL_DMA_DGSR_TE 0x80
+#define FSL_DMA_DGSR_CH 0x20
+#define FSL_DMA_DGSR_PE 0x10
+#define FSL_DMA_DGSR_EOLNI 0x08
+#define FSL_DMA_DGSR_CB 0x04
+#define FSL_DMA_DGSR_EOSI 0x02
+#define FSL_DMA_DGSR_EOLSI 0x01
+
+typedef u64 __bitwise v64;
+typedef u32 __bitwise v32;
+
+struct fsl_dma_ld_hw {
+ v64 src_addr;
+ v64 dst_addr;
+ v64 next_ln_addr;
+ v32 count;
+ v32 reserve;
+} __attribute__((aligned(32)));
+
+struct fsl_desc_sw {
+ struct fsl_dma_ld_hw hw;
+ struct list_head node;
+ struct dma_async_tx_descriptor async_tx;
+ struct list_head *ld;
+ void *priv;
+} __attribute__((aligned(32)));
+
+struct fsl_dma_chan_regs {
+ u32 mr; /* 0x00 - Mode Register */
+ u32 sr; /* 0x04 - Status Register */
+ u64 cdar; /* 0x08 - Current descriptor address register */
+ u64 sar; /* 0x10 - Source Address Register */
+ u64 dar; /* 0x18 - Destination Address Register */
+ u32 bcr; /* 0x20 - Byte Count Register */
+ u64 ndar; /* 0x24 - Next Descriptor Address Register */
+};
+
+struct fsl_dma_chan;
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+
+struct fsl_dma_device {
+ void __iomem *reg_base; /* DGSR register base */
+ struct resource reg; /* Resource for register */
+ struct device *dev;
+ struct dma_device common;
+ struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+ u32 feature; /* The same as DMA channels */
+ int irq; /* Channel IRQ */
+};
+
+/* Define macros for fsl_dma_chan->feature property */
+#define FSL_DMA_LITTLE_ENDIAN 0x00000000
+#define FSL_DMA_BIG_ENDIAN 0x00000001
+
+#define FSL_DMA_IP_MASK 0x00000ff0
+#define FSL_DMA_IP_85XX 0x00000010
+#define FSL_DMA_IP_83XX 0x00000020
+
+#define FSL_DMA_CHAN_PAUSE_EXT 0x00001000
+#define FSL_DMA_CHAN_START_EXT 0x00002000
+
+struct fsl_dma_chan {
+ struct fsl_dma_chan_regs __iomem *reg_base;
+ dma_cookie_t completed_cookie; /* The maximum cookie completed */
+ spinlock_t desc_lock; /* Descriptor operation lock */
+ struct list_head ld_queue; /* Link descriptors queue */
+ struct dma_chan common; /* DMA common channel */
+ struct dma_pool *desc_pool; /* Descriptors pool */
+ struct device *dev; /* Channel device */
+ struct resource reg; /* Resource for register */
+ int irq; /* Channel IRQ */
+ int id; /* Raw id of this channel */
+ struct tasklet_struct tasklet;
+ u32 feature;
+
+ void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
+ void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+ void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+};
+
+#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
+#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
+#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
+
+#ifndef __powerpc64__
+static u64 in_be64(const u64 __iomem *addr)
+{
+ return ((u64)in_be32((u32 __iomem *)addr) << 32) |
+ (in_be32((u32 __iomem *)addr + 1));
+}
+
+static void out_be64(u64 __iomem *addr, u64 val)
+{
+ out_be32((u32 __iomem *)addr, val >> 32);
+ out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static u64 in_le64(const u64 __iomem *addr)
+{
+ return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) |
+ (in_le32((u32 __iomem *)addr));
+}
+
+static void out_le64(u64 __iomem *addr, u64 val)
+{
+ out_le32((u32 __iomem *)addr + 1, val >> 32);
+ out_le32((u32 __iomem *)addr, (u32)val);
+}
+#endif
+
+#define DMA_IN(fsl_chan, addr, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ in_be##width(addr) : in_le##width(addr))
+#define DMA_OUT(fsl_chan, addr, val, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ out_be##width(addr, val) : out_le##width(addr, val))
+
+#define DMA_TO_CPU(fsl_chan, d, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ be##width##_to_cpu((__force __be##width)(v##width)d) : \
+ le##width##_to_cpu((__force __le##width)(v##width)d))
+#define CPU_TO_DMA(fsl_chan, c, width) \
+ (((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ? \
+ (__force v##width)cpu_to_be##width(c) : \
+ (__force v##width)cpu_to_le##width(c))
+
+#endif /* __DMA_FSLDMA_H */
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
new file mode 100644
index 0000000..9b16a3a
--- /dev/null
+++ b/drivers/dma/ioat.c
@@ -0,0 +1,226 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+#include "ioatdma_hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+ /* I/OAT v1 platforms */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+ { PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+ /* I/OAT v2 platforms */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+ /* I/OAT v3 platforms */
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+ { PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+ { 0, }
+};
+
+struct ioat_device {
+ struct pci_dev *pdev;
+ void __iomem *iobase;
+ struct ioatdma_device *dma;
+ struct dca_provider *dca;
+};
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct ioat_device *device = pci_get_drvdata(pdev);
+ u8 version;
+ int err = 0;
+
+ version = readb(iobase + IOAT_VER_OFFSET);
+ switch (version) {
+ case IOAT_VER_1_2:
+ device->dma = ioat_dma_probe(pdev, iobase);
+ if (device->dma && ioat_dca_enabled)
+ device->dca = ioat_dca_init(pdev, iobase);
+ break;
+ case IOAT_VER_2_0:
+ device->dma = ioat_dma_probe(pdev, iobase);
+ if (device->dma && ioat_dca_enabled)
+ device->dca = ioat2_dca_init(pdev, iobase);
+ break;
+ case IOAT_VER_3_0:
+ device->dma = ioat_dma_probe(pdev, iobase);
+ if (device->dma && ioat_dca_enabled)
+ device->dca = ioat3_dca_init(pdev, iobase);
+ break;
+ default:
+ err = -ENODEV;
+ break;
+ }
+ if (!device->dma)
+ err = -ENODEV;
+ return err;
+}
+
+static void ioat_shutdown_functionality(struct pci_dev *pdev)
+{
+ struct ioat_device *device = pci_get_drvdata(pdev);
+
+ dev_err(&pdev->dev, "Removing dma and dca services\n");
+ if (device->dca) {
+ unregister_dca_provider(device->dca);
+ free_dca_provider(device->dca);
+ device->dca = NULL;
+ }
+
+ if (device->dma) {
+ ioat_dma_remove(device->dma);
+ device->dma = NULL;
+ }
+}
+
+static struct pci_driver ioat_pci_driver = {
+ .name = "ioatdma",
+ .id_table = ioat_pci_tbl,
+ .probe = ioat_probe,
+ .shutdown = ioat_shutdown_functionality,
+ .remove = __devexit_p(ioat_remove),
+};
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+{
+ void __iomem *iobase;
+ struct ioat_device *device;
+ unsigned long mmio_start, mmio_len;
+ int err;
+
+ err = pci_enable_device(pdev);
+ if (err)
+ goto err_enable_device;
+
+ err = pci_request_regions(pdev, ioat_pci_driver.name);
+ if (err)
+ goto err_request_regions;
+
+ err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err)
+ err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err)
+ goto err_set_dma_mask;
+
+ err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+ if (err)
+ err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+ if (err)
+ goto err_set_dma_mask;
+
+ mmio_start = pci_resource_start(pdev, 0);
+ mmio_len = pci_resource_len(pdev, 0);
+ iobase = ioremap(mmio_start, mmio_len);
+ if (!iobase) {
+ err = -ENOMEM;
+ goto err_ioremap;
+ }
+
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (!device) {
+ err = -ENOMEM;
+ goto err_kzalloc;
+ }
+ device->pdev = pdev;
+ pci_set_drvdata(pdev, device);
+ device->iobase = iobase;
+
+ pci_set_master(pdev);
+
+ err = ioat_setup_functionality(pdev, iobase);
+ if (err)
+ goto err_version;
+
+ return 0;
+
+err_version:
+ kfree(device);
+err_kzalloc:
+ iounmap(iobase);
+err_ioremap:
+err_set_dma_mask:
+ pci_release_regions(pdev);
+ pci_disable_device(pdev);
+err_request_regions:
+err_enable_device:
+ return err;
+}
+
+/*
+ * It is unsafe to remove this module: if removed while a requested
+ * dma is outstanding, esp. from tcp, it is possible to hang while
+ * waiting for something that will never finish. However, if you're
+ * feeling lucky, this usually works just fine.
+ */
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+ struct ioat_device *device = pci_get_drvdata(pdev);
+
+ ioat_shutdown_functionality(pdev);
+
+ kfree(device);
+}
+
+static int __init ioat_init_module(void)
+{
+ return pci_register_driver(&ioat_pci_driver);
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+ pci_unregister_driver(&ioat_pci_driver);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
new file mode 100644
index 0000000..6cf622d
--- /dev/null
+++ b/drivers/dma/ioat_dca.c
@@ -0,0 +1,657 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag. If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device. Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x) (DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN 8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+ 1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+ return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+ /* CPUID level 9 returns DCA configuration */
+ /* Bit 0 indicates DCA enabled by the BIOS */
+ unsigned long cpuid_level_9;
+ int res;
+
+ cpuid_level_9 = cpuid_eax(9);
+ res = test_bit(0, &cpuid_level_9);
+ if (!res)
+ dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
+
+ return res;
+}
+
+static int system_has_dca_enabled(struct pci_dev *pdev)
+{
+ if (boot_cpu_has(X86_FEATURE_DCA))
+ return dca_enabled_in_bios(pdev);
+
+ dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+ return 0;
+}
+
+struct ioat_dca_slot {
+ struct pci_dev *pdev; /* requester device */
+ u16 rid; /* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+ void __iomem *iobase;
+ void __iomem *dca_base;
+ int max_requesters;
+ int requester_count;
+ u8 tag_map[IOAT_TAG_MAP_LEN];
+ struct ioat_dca_slot req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8] PCI-Express Bus Number
+ * [7:3] PCI-Express Device Number
+ * [2:0] PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1] Reserved (0)
+ * [0] Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ writew(id, ioatdca->dca_base + (i * 4));
+ /* make sure the ignore function bit is off */
+ writeb(0, ioatdca->dca_base + (i * 4) + 2);
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ writew(0, ioatdca->dca_base + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry, tag;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA_TAG_MAP_VALID) {
+ bit = entry & ~DCA_TAG_MAP_VALID;
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else {
+ value = entry ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+ return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+
+ pdev = to_pci_dev(dev);
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev)
+ return 1;
+ }
+ return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+ .add_requester = ioat_dca_add_requester,
+ .remove_requester = ioat_dca_remove_requester,
+ .get_tag = ioat_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ u8 *tag_map = NULL;
+ int i;
+ int err;
+ u8 version;
+ u8 max_requesters;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ /* I/OAT v1 systems must have a known tag_map to support DCA */
+ switch (pdev->vendor) {
+ case PCI_VENDOR_ID_INTEL:
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_INTEL_IOAT:
+ tag_map = ioat_tag_map_BNB;
+ break;
+ case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+ tag_map = ioat_tag_map_CNB;
+ break;
+ case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+ tag_map = ioat_tag_map_SCNB;
+ break;
+ }
+ break;
+ case PCI_VENDOR_ID_UNISYS:
+ switch (pdev->device) {
+ case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+ tag_map = ioat_tag_map_UNISYS;
+ break;
+ }
+ break;
+ }
+ if (tag_map == NULL)
+ return NULL;
+
+ version = readb(iobase + IOAT_VER_OFFSET);
+ if (version == IOAT_VER_3_0)
+ max_requesters = IOAT3_DCA_MAX_REQ;
+ else
+ max_requesters = IOAT_DCA_MAX_REQ;
+
+ dca = alloc_dca_provider(&ioat_dca_ops,
+ sizeof(*ioatdca) +
+ (sizeof(struct ioat_dca_slot) * max_requesters));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->max_requesters = max_requesters;
+ ioatdca->dca_base = iobase + 0x54;
+
+ /* copy over the APIC ID to DCA tag mapping */
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+ ioatdca->tag_map[i] = tag_map[i];
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ tag = ioat_dca_get_tag(dca, dev, cpu);
+ tag = (~tag) & 0x1F;
+ return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+ .add_requester = ioat2_dca_add_requester,
+ .remove_requester = ioat2_dca_remove_requester,
+ .get_tag = ioat2_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u32 tag_map;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat2_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+ pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+ for (i = 0; i < 5; i++) {
+ bit = (tag_map >> (4 * i)) & 0x0f;
+ if (bit < 8)
+ ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+ else
+ ioatdca->tag_map[i] = 0;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 id;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+ id = dcaid_from_pcidev(pdev);
+
+ if (ioatdca->requester_count == ioatdca->max_requesters)
+ return -ENODEV;
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == NULL) {
+ /* found an empty slot */
+ ioatdca->requester_count++;
+ ioatdca->req_slots[i].pdev = pdev;
+ ioatdca->req_slots[i].rid = id;
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(id | IOAT_DCA_GREQID_VALID,
+ ioatdca->iobase + global_req_table + (i * 4));
+ return i;
+ }
+ }
+ /* Error, ioatdma->requester_count is out of whack */
+ return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+ struct device *dev)
+{
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ struct pci_dev *pdev;
+ int i;
+ u16 global_req_table;
+
+ /* This implementation only supports PCI-Express */
+ if (dev->bus != &pci_bus_type)
+ return -ENODEV;
+ pdev = to_pci_dev(dev);
+
+ for (i = 0; i < ioatdca->max_requesters; i++) {
+ if (ioatdca->req_slots[i].pdev == pdev) {
+ global_req_table =
+ readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+ writel(0, ioatdca->iobase + global_req_table + (i * 4));
+ ioatdca->req_slots[i].pdev = NULL;
+ ioatdca->req_slots[i].rid = 0;
+ ioatdca->requester_count--;
+ return i;
+ }
+ }
+ return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+ struct device *dev,
+ int cpu)
+{
+ u8 tag;
+
+ struct ioat_dca_priv *ioatdca = dca_priv(dca);
+ int i, apic_id, bit, value;
+ u8 entry;
+
+ tag = 0;
+ apic_id = cpu_physical_id(cpu);
+
+ for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+ entry = ioatdca->tag_map[i];
+ if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+ bit = entry &
+ ~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+ value = (apic_id & (1 << bit)) ? 1 : 0;
+ } else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+ bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+ value = (apic_id & (1 << bit)) ? 0 : 1;
+ } else {
+ value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+ }
+ tag |= (value << i);
+ }
+
+ return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+ .add_requester = ioat3_dca_add_requester,
+ .remove_requester = ioat3_dca_remove_requester,
+ .get_tag = ioat3_dca_get_tag,
+ .dev_managed = ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+ int slots = 0;
+ u32 req;
+ u16 global_req_table;
+
+ global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+ if (global_req_table == 0)
+ return 0;
+
+ do {
+ req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+ slots++;
+ } while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+ return slots;
+}
+
+struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+ struct dca_provider *dca;
+ struct ioat_dca_priv *ioatdca;
+ int slots;
+ int i;
+ int err;
+ u16 dca_offset;
+ u16 csi_fsb_control;
+ u16 pcie_control;
+ u8 bit;
+
+ union {
+ u64 full;
+ struct {
+ u32 low;
+ u32 high;
+ };
+ } tag_map;
+
+ if (!system_has_dca_enabled(pdev))
+ return NULL;
+
+ dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+ if (dca_offset == 0)
+ return NULL;
+
+ slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+ if (slots == 0)
+ return NULL;
+
+ dca = alloc_dca_provider(&ioat3_dca_ops,
+ sizeof(*ioatdca)
+ + (sizeof(struct ioat_dca_slot) * slots));
+ if (!dca)
+ return NULL;
+
+ ioatdca = dca_priv(dca);
+ ioatdca->iobase = iobase;
+ ioatdca->dca_base = iobase + dca_offset;
+ ioatdca->max_requesters = slots;
+
+ /* some bios might not know to turn these on */
+ csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+ csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+ writew(csi_fsb_control,
+ ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+ }
+ pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+ pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+ writew(pcie_control,
+ ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+ }
+
+
+ /* TODO version, compatibility and configuration checks */
+
+ /* copy out the APIC to DCA tag map */
+ tag_map.low =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+ tag_map.high =
+ readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+ for (i = 0; i < 8; i++) {
+ bit = tag_map.full >> (8 * i);
+ ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+ }
+
+ err = register_dca_provider(dca, &pdev->dev);
+ if (err) {
+ free_dca_provider(dca);
+ return NULL;
+ }
+
+ return dca;
+}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
new file mode 100644
index 0000000..f2ed033
--- /dev/null
+++ b/drivers/dma/ioat_dma.c
@@ -0,0 +1,1723 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+#include "ioatdma_hw.h"
+
+#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+static int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+ "high-water mark for pushing ioat descriptors (default: 4)");
+
+#define RESET_DELAY msecs_to_jiffies(100)
+#define WATCHDOG_DELAY round_jiffies(msecs_to_jiffies(2000))
+static void ioat_dma_chan_reset_part2(struct work_struct *work);
+static void ioat_dma_chan_watchdog(struct work_struct *work);
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/* internal functions */
+static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
+
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
+static struct ioat_desc_sw *
+ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
+
+static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
+ struct ioatdma_device *device,
+ int index)
+{
+ return device->idx[index];
+}
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+ struct ioatdma_device *instance = data;
+ struct ioat_dma_chan *ioat_chan;
+ unsigned long attnstatus;
+ int bit;
+ u8 intrctrl;
+
+ intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+ if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+ return IRQ_NONE;
+
+ if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_NONE;
+ }
+
+ attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+ for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+ ioat_chan = ioat_lookup_chan_by_index(instance, bit);
+ tasklet_schedule(&ioat_chan->cleanup_task);
+ }
+
+ writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+ return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+ struct ioat_dma_chan *ioat_chan = data;
+
+ tasklet_schedule(&ioat_chan->cleanup_task);
+
+ return IRQ_HANDLED;
+}
+
+static void ioat_dma_cleanup_tasklet(unsigned long data);
+
+/**
+ * ioat_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
+{
+ u8 xfercap_scale;
+ u32 xfercap;
+ int i;
+ struct ioat_dma_chan *ioat_chan;
+
+ /*
+ * IOAT ver.3 workarounds
+ */
+ if (device->version == IOAT_VER_3_0) {
+ u32 chan_err_mask;
+ u16 dev_id;
+ u32 dmauncerrsts;
+
+ /*
+ * Write CHANERRMSK_INT with 3E07h to mask out the errors
+ * that can cause stability issues for IOAT ver.3
+ */
+ chan_err_mask = 0x3E07;
+ pci_write_config_dword(device->pdev,
+ IOAT_PCI_CHANERRMASK_INT_OFFSET,
+ chan_err_mask);
+
+ /*
+ * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+ * (workaround for spurious config parity error after restart)
+ */
+ pci_read_config_word(device->pdev,
+ IOAT_PCI_DEVICE_ID_OFFSET,
+ &dev_id);
+ if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+ dmauncerrsts = 0x10;
+ pci_write_config_dword(device->pdev,
+ IOAT_PCI_DMAUNCERRSTS_OFFSET,
+ dmauncerrsts);
+ }
+ }
+
+ device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+ xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+ xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+
+#ifdef CONFIG_I7300_IDLE_IOAT_CHANNEL
+ if (i7300_idle_platform_probe(NULL, NULL) == 0) {
+ device->common.chancnt--;
+ }
+#endif
+ for (i = 0; i < device->common.chancnt; i++) {
+ ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
+ if (!ioat_chan) {
+ device->common.chancnt = i;
+ break;
+ }
+
+ ioat_chan->device = device;
+ ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
+ ioat_chan->xfercap = xfercap;
+ ioat_chan->desccount = 0;
+ INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
+ if (ioat_chan->device->version != IOAT_VER_1_2) {
+ writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE
+ | IOAT_DMA_DCA_ANY_CPU,
+ ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+ }
+ spin_lock_init(&ioat_chan->cleanup_lock);
+ spin_lock_init(&ioat_chan->desc_lock);
+ INIT_LIST_HEAD(&ioat_chan->free_desc);
+ INIT_LIST_HEAD(&ioat_chan->used_desc);
+ /* This should be made common somewhere in dmaengine.c */
+ ioat_chan->common.device = &device->common;
+ list_add_tail(&ioat_chan->common.device_node,
+ &device->common.channels);
+ device->idx[i] = ioat_chan;
+ tasklet_init(&ioat_chan->cleanup_task,
+ ioat_dma_cleanup_tasklet,
+ (unsigned long) ioat_chan);
+ tasklet_disable(&ioat_chan->cleanup_task);
+ }
+ return device->common.chancnt;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ * descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void __ioat1_dma_memcpy_issue_pending(
+ struct ioat_dma_chan *ioat_chan)
+{
+ ioat_chan->pending = 0;
+ writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (ioat_chan->pending > 0) {
+ spin_lock_bh(&ioat_chan->desc_lock);
+ __ioat1_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ }
+}
+
+static inline void __ioat2_dma_memcpy_issue_pending(
+ struct ioat_dma_chan *ioat_chan)
+{
+ ioat_chan->pending = 0;
+ writew(ioat_chan->dmacount,
+ ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+ if (ioat_chan->pending > 0) {
+ spin_lock_bh(&ioat_chan->desc_lock);
+ __ioat2_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ }
+}
+
+
+/**
+ * ioat_dma_chan_reset_part2 - reinit the channel after a reset
+ */
+static void ioat_dma_chan_reset_part2(struct work_struct *work)
+{
+ struct ioat_dma_chan *ioat_chan =
+ container_of(work, struct ioat_dma_chan, work.work);
+ struct ioat_desc_sw *desc;
+
+ spin_lock_bh(&ioat_chan->cleanup_lock);
+ spin_lock_bh(&ioat_chan->desc_lock);
+
+ ioat_chan->completion_virt->low = 0;
+ ioat_chan->completion_virt->high = 0;
+ ioat_chan->pending = 0;
+
+ /*
+ * count the descriptors waiting, and be sure to do it
+ * right for both the CB1 line and the CB2 ring
+ */
+ ioat_chan->dmacount = 0;
+ if (ioat_chan->used_desc.prev) {
+ desc = to_ioat_desc(ioat_chan->used_desc.prev);
+ do {
+ ioat_chan->dmacount++;
+ desc = to_ioat_desc(desc->node.next);
+ } while (&desc->node != ioat_chan->used_desc.next);
+ }
+
+ /*
+ * write the new starting descriptor address
+ * this puts channel engine into ARMED state
+ */
+ desc = to_ioat_desc(ioat_chan->used_desc.prev);
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+ writel(((u64) desc->async_tx.phys) >> 32,
+ ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+
+ writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+ + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+ break;
+ case IOAT_VER_2_0:
+ writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(((u64) desc->async_tx.phys) >> 32,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+
+ /* tell the engine to go with what's left to be done */
+ writew(ioat_chan->dmacount,
+ ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+
+ break;
+ }
+ dev_err(&ioat_chan->device->pdev->dev,
+ "chan%d reset - %d descs waiting, %d total desc\n",
+ chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+/**
+ * ioat_dma_reset_channel - restart a channel
+ * @ioat_chan: IOAT DMA channel handle
+ */
+static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
+{
+ u32 chansts, chanerr;
+
+ if (!ioat_chan->used_desc.prev)
+ return;
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ chansts = (ioat_chan->completion_virt->low
+ & IOAT_CHANSTS_DMA_TRANSFER_STATUS);
+ if (chanerr) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+ chan_num(ioat_chan), chansts, chanerr);
+ writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /*
+ * whack it upside the head with a reset
+ * and wait for things to settle out.
+ * force the pending count to a really big negative
+ * to make sure no one forces an issue_pending
+ * while we're waiting.
+ */
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+ ioat_chan->pending = INT_MIN;
+ writeb(IOAT_CHANCMD_RESET,
+ ioat_chan->reg_base
+ + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ /* schedule the 2nd half instead of sleeping a long time */
+ schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
+}
+
+/**
+ * ioat_dma_chan_watchdog - watch for stuck channels
+ */
+static void ioat_dma_chan_watchdog(struct work_struct *work)
+{
+ struct ioatdma_device *device =
+ container_of(work, struct ioatdma_device, work.work);
+ struct ioat_dma_chan *ioat_chan;
+ int i;
+
+ union {
+ u64 full;
+ struct {
+ u32 low;
+ u32 high;
+ };
+ } completion_hw;
+ unsigned long compl_desc_addr_hw;
+
+ for (i = 0; i < device->common.chancnt; i++) {
+ ioat_chan = ioat_lookup_chan_by_index(device, i);
+
+ if (ioat_chan->device->version == IOAT_VER_1_2
+ /* have we started processing anything yet */
+ && ioat_chan->last_completion
+ /* have we completed any since last watchdog cycle? */
+ && (ioat_chan->last_completion ==
+ ioat_chan->watchdog_completion)
+ /* has TCP stuck on one cookie since last watchdog? */
+ && (ioat_chan->watchdog_tcp_cookie ==
+ ioat_chan->watchdog_last_tcp_cookie)
+ && (ioat_chan->watchdog_tcp_cookie !=
+ ioat_chan->completed_cookie)
+ /* is there something in the chain to be processed? */
+ /* CB1 chain always has at least the last one processed */
+ && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
+ && ioat_chan->pending == 0) {
+
+ /*
+ * check CHANSTS register for completed
+ * descriptor address.
+ * if it is different than completion writeback,
+ * it is not zero
+ * and it has changed since the last watchdog
+ * we can assume that channel
+ * is still working correctly
+ * and the problem is in completion writeback.
+ * update completion writeback
+ * with actual CHANSTS value
+ * else
+ * try resetting the channel
+ */
+
+ completion_hw.low = readl(ioat_chan->reg_base +
+ IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
+ completion_hw.high = readl(ioat_chan->reg_base +
+ IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
+#if (BITS_PER_LONG == 64)
+ compl_desc_addr_hw =
+ completion_hw.full
+ & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+#else
+ compl_desc_addr_hw =
+ completion_hw.low & IOAT_LOW_COMPLETION_MASK;
+#endif
+
+ if ((compl_desc_addr_hw != 0)
+ && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
+ && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
+ ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
+ ioat_chan->completion_virt->low = completion_hw.low;
+ ioat_chan->completion_virt->high = completion_hw.high;
+ } else {
+ ioat_dma_reset_channel(ioat_chan);
+ ioat_chan->watchdog_completion = 0;
+ ioat_chan->last_compl_desc_addr_hw = 0;
+ }
+
+ /*
+ * for version 2.0 if there are descriptors yet to be processed
+ * and the last completed hasn't changed since the last watchdog
+ * if they haven't hit the pending level
+ * issue the pending to push them through
+ * else
+ * try resetting the channel
+ */
+ } else if (ioat_chan->device->version == IOAT_VER_2_0
+ && ioat_chan->used_desc.prev
+ && ioat_chan->last_completion
+ && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
+
+ if (ioat_chan->pending < ioat_pending_level)
+ ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
+ else {
+ ioat_dma_reset_channel(ioat_chan);
+ ioat_chan->watchdog_completion = 0;
+ }
+ } else {
+ ioat_chan->last_compl_desc_addr_hw = 0;
+ ioat_chan->watchdog_completion
+ = ioat_chan->last_completion;
+ }
+
+ ioat_chan->watchdog_last_tcp_cookie =
+ ioat_chan->watchdog_tcp_cookie;
+ }
+
+ schedule_delayed_work(&device->work, WATCHDOG_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
+ struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
+ struct ioat_desc_sw *prev, *new;
+ struct ioat_dma_descriptor *hw;
+ dma_cookie_t cookie;
+ LIST_HEAD(new_chain);
+ u32 copy;
+ size_t len;
+ dma_addr_t src, dst;
+ unsigned long orig_flags;
+ unsigned int desc_count = 0;
+
+ /* src and dest and len are stored in the initial descriptor */
+ len = first->len;
+ src = first->src;
+ dst = first->dst;
+ orig_flags = first->async_tx.flags;
+ new = first;
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+ prev = to_ioat_desc(ioat_chan->used_desc.prev);
+ prefetch(prev->hw);
+ do {
+ copy = min_t(size_t, len, ioat_chan->xfercap);
+
+ async_tx_ack(&new->async_tx);
+
+ hw = new->hw;
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+ hw->next = 0;
+
+ /* chain together the physical address list for the HW */
+ wmb();
+ prev->hw->next = (u64) new->async_tx.phys;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+
+ list_add_tail(&new->node, &new_chain);
+ desc_count++;
+ prev = new;
+ } while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
+
+ if (!new) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "tx submit failed\n");
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ return -ENOMEM;
+ }
+
+ hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+ if (first->async_tx.callback) {
+ hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
+ if (first != new) {
+ /* move callback into to last desc */
+ new->async_tx.callback = first->async_tx.callback;
+ new->async_tx.callback_param
+ = first->async_tx.callback_param;
+ first->async_tx.callback = NULL;
+ first->async_tx.callback_param = NULL;
+ }
+ }
+
+ new->tx_cnt = desc_count;
+ new->async_tx.flags = orig_flags; /* client is in control of this ack */
+
+ /* store the original values for use in later cleanup */
+ if (new != first) {
+ new->src = first->src;
+ new->dst = first->dst;
+ new->len = first->len;
+ }
+
+ /* cookie incr and addition to used_list must be atomic */
+ cookie = ioat_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ ioat_chan->common.cookie = new->async_tx.cookie = cookie;
+
+ /* write address into NextDescriptor field of last desc in chain */
+ to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
+ first->async_tx.phys;
+ list_splice_tail(&new_chain, &ioat_chan->used_desc);
+
+ ioat_chan->dmacount += desc_count;
+ ioat_chan->pending += desc_count;
+ if (ioat_chan->pending >= ioat_pending_level)
+ __ioat1_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ return cookie;
+}
+
+static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
+ struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
+ struct ioat_desc_sw *new;
+ struct ioat_dma_descriptor *hw;
+ dma_cookie_t cookie;
+ u32 copy;
+ size_t len;
+ dma_addr_t src, dst;
+ unsigned long orig_flags;
+ unsigned int desc_count = 0;
+
+ /* src and dest and len are stored in the initial descriptor */
+ len = first->len;
+ src = first->src;
+ dst = first->dst;
+ orig_flags = first->async_tx.flags;
+ new = first;
+
+ /*
+ * ioat_chan->desc_lock is still in force in version 2 path
+ * it gets unlocked at end of this function
+ */
+ do {
+ copy = min_t(size_t, len, ioat_chan->xfercap);
+
+ async_tx_ack(&new->async_tx);
+
+ hw = new->hw;
+ hw->size = copy;
+ hw->ctl = 0;
+ hw->src_addr = src;
+ hw->dst_addr = dst;
+
+ len -= copy;
+ dst += copy;
+ src += copy;
+ desc_count++;
+ } while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
+
+ if (!new) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "tx submit failed\n");
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ return -ENOMEM;
+ }
+
+ hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+ if (first->async_tx.callback) {
+ hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
+ if (first != new) {
+ /* move callback into to last desc */
+ new->async_tx.callback = first->async_tx.callback;
+ new->async_tx.callback_param
+ = first->async_tx.callback_param;
+ first->async_tx.callback = NULL;
+ first->async_tx.callback_param = NULL;
+ }
+ }
+
+ new->tx_cnt = desc_count;
+ new->async_tx.flags = orig_flags; /* client is in control of this ack */
+
+ /* store the original values for use in later cleanup */
+ if (new != first) {
+ new->src = first->src;
+ new->dst = first->dst;
+ new->len = first->len;
+ }
+
+ /* cookie incr and addition to used_list must be atomic */
+ cookie = ioat_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ ioat_chan->common.cookie = new->async_tx.cookie = cookie;
+
+ ioat_chan->dmacount += desc_count;
+ ioat_chan->pending += desc_count;
+ if (ioat_chan->pending >= ioat_pending_level)
+ __ioat2_dma_memcpy_issue_pending(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat_chan: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
+ struct ioat_dma_chan *ioat_chan,
+ gfp_t flags)
+{
+ struct ioat_dma_descriptor *desc;
+ struct ioat_desc_sw *desc_sw;
+ struct ioatdma_device *ioatdma_device;
+ dma_addr_t phys;
+
+ ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
+ desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+ if (unlikely(!desc))
+ return NULL;
+
+ desc_sw = kzalloc(sizeof(*desc_sw), flags);
+ if (unlikely(!desc_sw)) {
+ pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+ return NULL;
+ }
+
+ memset(desc, 0, sizeof(*desc));
+ dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ desc_sw->async_tx.tx_submit = ioat1_tx_submit;
+ break;
+ case IOAT_VER_2_0:
+ case IOAT_VER_3_0:
+ desc_sw->async_tx.tx_submit = ioat2_tx_submit;
+ break;
+ }
+ INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
+
+ desc_sw->hw = desc;
+ desc_sw->async_tx.phys = phys;
+
+ return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+ "initial descriptors per channel (default: 256)");
+
+/**
+ * ioat2_dma_massage_chan_desc - link the descriptors into a circle
+ * @ioat_chan: the channel to be massaged
+ */
+static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
+{
+ struct ioat_desc_sw *desc, *_desc;
+
+ /* setup used_desc */
+ ioat_chan->used_desc.next = ioat_chan->free_desc.next;
+ ioat_chan->used_desc.prev = NULL;
+
+ /* pull free_desc out of the circle so that every node is a hw
+ * descriptor, but leave it pointing to the list
+ */
+ ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
+ ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
+
+ /* circle link the hw descriptors */
+ desc = to_ioat_desc(ioat_chan->free_desc.next);
+ desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
+ list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
+ desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
+ }
+}
+
+/**
+ * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat_dma_alloc_chan_resources(struct dma_chan *chan,
+ struct dma_client *client)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+ struct ioat_desc_sw *desc;
+ u16 chanctrl;
+ u32 chanerr;
+ int i;
+ LIST_HEAD(tmp_list);
+
+ /* have we already been set up? */
+ if (!list_empty(&ioat_chan->free_desc))
+ return ioat_chan->desccount;
+
+ /* Setup register to interrupt and write completion status on error */
+ chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
+ IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
+ IOAT_CHANCTRL_ERR_COMPLETION_EN;
+ writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+ chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ if (chanerr) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "CHANERR = %x, clearing\n", chanerr);
+ writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+ }
+
+ /* Allocate descriptors */
+ for (i = 0; i < ioat_initial_desc_count; i++) {
+ desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
+ if (!desc) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "Only %d initial descriptors\n", i);
+ break;
+ }
+ list_add_tail(&desc->node, &tmp_list);
+ }
+ spin_lock_bh(&ioat_chan->desc_lock);
+ ioat_chan->desccount = i;
+ list_splice(&tmp_list, &ioat_chan->free_desc);
+ if (ioat_chan->device->version != IOAT_VER_1_2)
+ ioat2_dma_massage_chan_desc(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ /* allocate a completion writeback area */
+ /* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+ ioat_chan->completion_virt =
+ pci_pool_alloc(ioat_chan->device->completion_pool,
+ GFP_KERNEL,
+ &ioat_chan->completion_addr);
+ memset(ioat_chan->completion_virt, 0,
+ sizeof(*ioat_chan->completion_virt));
+ writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+ writel(((u64) ioat_chan->completion_addr) >> 32,
+ ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+ tasklet_enable(&ioat_chan->cleanup_task);
+ ioat_dma_start_null_desc(ioat_chan); /* give chain to dma device */
+ return ioat_chan->desccount;
+}
+
+/**
+ * ioat_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat_dma_free_chan_resources(struct dma_chan *chan)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+ struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
+ struct ioat_desc_sw *desc, *_desc;
+ int in_use_descs = 0;
+
+ /* Before freeing channel resources first check
+ * if they have been previously allocated for this channel.
+ */
+ if (ioat_chan->desccount == 0)
+ return;
+
+ tasklet_disable(&ioat_chan->cleanup_task);
+ ioat_dma_memcpy_cleanup(ioat_chan);
+
+ /* Delay 100ms after reset to allow internal DMA logic to quiesce
+ * before removing DMA descriptor resources.
+ */
+ writeb(IOAT_CHANCMD_RESET,
+ ioat_chan->reg_base
+ + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+ mdelay(100);
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ list_for_each_entry_safe(desc, _desc,
+ &ioat_chan->used_desc, node) {
+ in_use_descs++;
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ }
+ list_for_each_entry_safe(desc, _desc,
+ &ioat_chan->free_desc, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ }
+ break;
+ case IOAT_VER_2_0:
+ case IOAT_VER_3_0:
+ list_for_each_entry_safe(desc, _desc,
+ ioat_chan->free_desc.next, node) {
+ list_del(&desc->node);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ }
+ desc = to_ioat_desc(ioat_chan->free_desc.next);
+ pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+ desc->async_tx.phys);
+ kfree(desc);
+ INIT_LIST_HEAD(&ioat_chan->free_desc);
+ INIT_LIST_HEAD(&ioat_chan->used_desc);
+ break;
+ }
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ pci_pool_free(ioatdma_device->completion_pool,
+ ioat_chan->completion_virt,
+ ioat_chan->completion_addr);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ dev_err(&ioat_chan->device->pdev->dev,
+ "Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+
+ ioat_chan->last_completion = ioat_chan->completion_addr = 0;
+ ioat_chan->pending = 0;
+ ioat_chan->dmacount = 0;
+ ioat_chan->desccount = 0;
+ ioat_chan->watchdog_completion = 0;
+ ioat_chan->last_compl_desc_addr_hw = 0;
+ ioat_chan->watchdog_tcp_cookie =
+ ioat_chan->watchdog_last_tcp_cookie = 0;
+}
+
+/**
+ * ioat_dma_get_next_descriptor - return the next available descriptor
+ * @ioat_chan: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held. Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
+{
+ struct ioat_desc_sw *new;
+
+ if (!list_empty(&ioat_chan->free_desc)) {
+ new = to_ioat_desc(ioat_chan->free_desc.next);
+ list_del(&new->node);
+ } else {
+ /* try to get another desc */
+ new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+ if (!new) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "alloc failed\n");
+ return NULL;
+ }
+ }
+
+ prefetch(new->hw);
+ return new;
+}
+
+static struct ioat_desc_sw *
+ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
+{
+ struct ioat_desc_sw *new;
+
+ /*
+ * used.prev points to where to start processing
+ * used.next points to next free descriptor
+ * if used.prev == NULL, there are none waiting to be processed
+ * if used.next == used.prev.prev, there is only one free descriptor,
+ * and we need to use it to as a noop descriptor before
+ * linking in a new set of descriptors, since the device
+ * has probably already read the pointer to it
+ */
+ if (ioat_chan->used_desc.prev &&
+ ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
+
+ struct ioat_desc_sw *desc;
+ struct ioat_desc_sw *noop_desc;
+ int i;
+
+ /* set up the noop descriptor */
+ noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
+ /* set size to non-zero value (channel returns error when size is 0) */
+ noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
+ noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+ noop_desc->hw->src_addr = 0;
+ noop_desc->hw->dst_addr = 0;
+
+ ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
+ ioat_chan->pending++;
+ ioat_chan->dmacount++;
+
+ /* try to get a few more descriptors */
+ for (i = 16; i; i--) {
+ desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+ if (!desc) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "alloc failed\n");
+ break;
+ }
+ list_add_tail(&desc->node, ioat_chan->used_desc.next);
+
+ desc->hw->next
+ = to_ioat_desc(desc->node.next)->async_tx.phys;
+ to_ioat_desc(desc->node.prev)->hw->next
+ = desc->async_tx.phys;
+ ioat_chan->desccount++;
+ }
+
+ ioat_chan->used_desc.next = noop_desc->node.next;
+ }
+ new = to_ioat_desc(ioat_chan->used_desc.next);
+ prefetch(new);
+ ioat_chan->used_desc.next = new->node.next;
+
+ if (ioat_chan->used_desc.prev == NULL)
+ ioat_chan->used_desc.prev = &new->node;
+
+ prefetch(new->hw);
+ return new;
+}
+
+static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
+ struct ioat_dma_chan *ioat_chan)
+{
+ if (!ioat_chan)
+ return NULL;
+
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ return ioat1_dma_get_next_descriptor(ioat_chan);
+ case IOAT_VER_2_0:
+ case IOAT_VER_3_0:
+ return ioat2_dma_get_next_descriptor(ioat_chan);
+ }
+ return NULL;
+}
+
+static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
+ struct dma_chan *chan,
+ dma_addr_t dma_dest,
+ dma_addr_t dma_src,
+ size_t len,
+ unsigned long flags)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+ struct ioat_desc_sw *new;
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+ new = ioat_dma_get_next_descriptor(ioat_chan);
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ if (new) {
+ new->len = len;
+ new->dst = dma_dest;
+ new->src = dma_src;
+ new->async_tx.flags = flags;
+ return &new->async_tx;
+ } else {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
+ chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+ return NULL;
+ }
+}
+
+static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
+ struct dma_chan *chan,
+ dma_addr_t dma_dest,
+ dma_addr_t dma_src,
+ size_t len,
+ unsigned long flags)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+ struct ioat_desc_sw *new;
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+ new = ioat2_dma_get_next_descriptor(ioat_chan);
+
+ /*
+ * leave ioat_chan->desc_lock set in ioat 2 path
+ * it will get unlocked at end of tx_submit
+ */
+
+ if (new) {
+ new->len = len;
+ new->dst = dma_dest;
+ new->src = dma_src;
+ new->async_tx.flags = flags;
+ return &new->async_tx;
+ } else {
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ dev_err(&ioat_chan->device->pdev->dev,
+ "chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
+ chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+ return NULL;
+ }
+}
+
+static void ioat_dma_cleanup_tasklet(unsigned long data)
+{
+ struct ioat_dma_chan *chan = (void *)data;
+ ioat_dma_memcpy_cleanup(chan);
+ writew(IOAT_CHANCTRL_INT_DISABLE,
+ chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void
+ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
+{
+ /*
+ * yes we are unmapping both _page and _single
+ * alloc'd regions with unmap_page. Is this
+ * *really* that bad?
+ */
+ if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP))
+ pci_unmap_page(ioat_chan->device->pdev,
+ pci_unmap_addr(desc, dst),
+ pci_unmap_len(desc, len),
+ PCI_DMA_FROMDEVICE);
+
+ if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP))
+ pci_unmap_page(ioat_chan->device->pdev,
+ pci_unmap_addr(desc, src),
+ pci_unmap_len(desc, len),
+ PCI_DMA_TODEVICE);
+}
+
+/**
+ * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
+{
+ unsigned long phys_complete;
+ struct ioat_desc_sw *desc, *_desc;
+ dma_cookie_t cookie = 0;
+ unsigned long desc_phys;
+ struct ioat_desc_sw *latest_desc;
+
+ prefetch(ioat_chan->completion_virt);
+
+ if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
+ return;
+
+ /* The completion writeback can happen at any time,
+ so reads by the driver need to be atomic operations
+ The descriptor physical addresses are limited to 32-bits
+ when the CPU can only do a 32-bit mov */
+
+#if (BITS_PER_LONG == 64)
+ phys_complete =
+ ioat_chan->completion_virt->full
+ & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+#else
+ phys_complete =
+ ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
+#endif
+
+ if ((ioat_chan->completion_virt->full
+ & IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
+ IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "Channel halted, chanerr = %x\n",
+ readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
+
+ /* TODO do something to salvage the situation */
+ }
+
+ if (phys_complete == ioat_chan->last_completion) {
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+ /*
+ * perhaps we're stuck so hard that the watchdog can't go off?
+ * try to catch it after 2 seconds
+ */
+ if (ioat_chan->device->version != IOAT_VER_3_0) {
+ if (time_after(jiffies,
+ ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
+ ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
+ ioat_chan->last_completion_time = jiffies;
+ }
+ }
+ return;
+ }
+ ioat_chan->last_completion_time = jiffies;
+
+ cookie = 0;
+ if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+ return;
+ }
+
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ list_for_each_entry_safe(desc, _desc,
+ &ioat_chan->used_desc, node) {
+
+ /*
+ * Incoming DMA requests may use multiple descriptors,
+ * due to exceeding xfercap, perhaps. If so, only the
+ * last one will have a cookie, and require unmapping.
+ */
+ if (desc->async_tx.cookie) {
+ cookie = desc->async_tx.cookie;
+ ioat_dma_unmap(ioat_chan, desc);
+ if (desc->async_tx.callback) {
+ desc->async_tx.callback(desc->async_tx.callback_param);
+ desc->async_tx.callback = NULL;
+ }
+ }
+
+ if (desc->async_tx.phys != phys_complete) {
+ /*
+ * a completed entry, but not the last, so clean
+ * up if the client is done with the descriptor
+ */
+ if (async_tx_test_ack(&desc->async_tx)) {
+ list_del(&desc->node);
+ list_add_tail(&desc->node,
+ &ioat_chan->free_desc);
+ } else
+ desc->async_tx.cookie = 0;
+ } else {
+ /*
+ * last used desc. Do not remove, so we can
+ * append from it, but don't look at it next
+ * time, either
+ */
+ desc->async_tx.cookie = 0;
+
+ /* TODO check status bits? */
+ break;
+ }
+ }
+ break;
+ case IOAT_VER_2_0:
+ case IOAT_VER_3_0:
+ /* has some other thread has already cleaned up? */
+ if (ioat_chan->used_desc.prev == NULL)
+ break;
+
+ /* work backwards to find latest finished desc */
+ desc = to_ioat_desc(ioat_chan->used_desc.next);
+ latest_desc = NULL;
+ do {
+ desc = to_ioat_desc(desc->node.prev);
+ desc_phys = (unsigned long)desc->async_tx.phys
+ & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+ if (desc_phys == phys_complete) {
+ latest_desc = desc;
+ break;
+ }
+ } while (&desc->node != ioat_chan->used_desc.prev);
+
+ if (latest_desc != NULL) {
+
+ /* work forwards to clear finished descriptors */
+ for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
+ &desc->node != latest_desc->node.next &&
+ &desc->node != ioat_chan->used_desc.next;
+ desc = to_ioat_desc(desc->node.next)) {
+ if (desc->async_tx.cookie) {
+ cookie = desc->async_tx.cookie;
+ desc->async_tx.cookie = 0;
+ ioat_dma_unmap(ioat_chan, desc);
+ if (desc->async_tx.callback) {
+ desc->async_tx.callback(desc->async_tx.callback_param);
+ desc->async_tx.callback = NULL;
+ }
+ }
+ }
+
+ /* move used.prev up beyond those that are finished */
+ if (&desc->node == ioat_chan->used_desc.next)
+ ioat_chan->used_desc.prev = NULL;
+ else
+ ioat_chan->used_desc.prev = &desc->node;
+ }
+ break;
+ }
+
+ spin_unlock_bh(&ioat_chan->desc_lock);
+
+ ioat_chan->last_completion = phys_complete;
+ if (cookie != 0)
+ ioat_chan->completed_cookie = cookie;
+
+ spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+/**
+ * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
+ * @chan: IOAT DMA channel handle
+ * @cookie: DMA transaction identifier
+ * @done: if not %NULL, updated with last completed transaction
+ * @used: if not %NULL, updated with last used transaction
+ */
+static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = ioat_chan->completed_cookie;
+ ioat_chan->watchdog_tcp_cookie = cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS)
+ return ret;
+
+ ioat_dma_memcpy_cleanup(ioat_chan);
+
+ last_used = chan->cookie;
+ last_complete = ioat_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
+{
+ struct ioat_desc_sw *desc;
+
+ spin_lock_bh(&ioat_chan->desc_lock);
+
+ desc = ioat_dma_get_next_descriptor(ioat_chan);
+
+ if (!desc) {
+ dev_err(&ioat_chan->device->pdev->dev,
+ "Unable to start null desc - get next desc failed\n");
+ spin_unlock_bh(&ioat_chan->desc_lock);
+ return;
+ }
+
+ desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
+ | IOAT_DMA_DESCRIPTOR_CTL_INT_GN
+ | IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+ /* set size to non-zero value (channel returns error when size is 0) */
+ desc->hw->size = NULL_DESC_BUFFER_SIZE;
+ desc->hw->src_addr = 0;
+ desc->hw->dst_addr = 0;
+ async_tx_ack(&desc->async_tx);
+ switch (ioat_chan->device->version) {
+ case IOAT_VER_1_2:
+ desc->hw->next = 0;
+ list_add_tail(&desc->node, &ioat_chan->used_desc);
+
+ writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+ writel(((u64) desc->async_tx.phys) >> 32,
+ ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+
+ writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+ + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+ break;
+ case IOAT_VER_2_0:
+ case IOAT_VER_3_0:
+ writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+ writel(((u64) desc->async_tx.phys) >> 32,
+ ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+
+ ioat_chan->dmacount++;
+ __ioat2_dma_memcpy_issue_pending(ioat_chan);
+ break;
+ }
+ spin_unlock_bh(&ioat_chan->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+ struct completion *cmp = dma_async_param;
+
+ complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *device)
+{
+ int i;
+ u8 *src;
+ u8 *dest;
+ struct dma_chan *dma_chan;
+ struct dma_async_tx_descriptor *tx;
+ dma_addr_t dma_dest, dma_src;
+ dma_cookie_t cookie;
+ int err = 0;
+ struct completion cmp;
+
+ src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOAT_TEST_SIZE; i++)
+ src[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) {
+ dev_err(&device->pdev->dev,
+ "selftest cannot allocate chan resource\n");
+ err = -ENODEV;
+ goto out;
+ }
+
+ dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
+ DMA_TO_DEVICE);
+ dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
+ DMA_FROM_DEVICE);
+ tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+ IOAT_TEST_SIZE, 0);
+ if (!tx) {
+ dev_err(&device->pdev->dev,
+ "Self-test prep failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ async_tx_ack(tx);
+ init_completion(&cmp);
+ tx->callback = ioat_dma_test_callback;
+ tx->callback_param = &cmp;
+ cookie = tx->tx_submit(tx);
+ if (cookie < 0) {
+ dev_err(&device->pdev->dev,
+ "Self-test setup failed, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ device->common.device_issue_pending(dma_chan);
+
+ wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+ if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+ != DMA_SUCCESS) {
+ dev_err(&device->pdev->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+ dev_err(&device->pdev->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ device->common.device_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+ sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+ "set ioat interrupt style: msix (default), "
+ "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+ struct ioat_dma_chan *ioat_chan;
+ int err, i, j, msixcnt;
+ u8 intrctrl = 0;
+
+ if (!strcmp(ioat_interrupt_style, "msix"))
+ goto msix;
+ if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+ goto msix_single_vector;
+ if (!strcmp(ioat_interrupt_style, "msi"))
+ goto msi;
+ if (!strcmp(ioat_interrupt_style, "intx"))
+ goto intx;
+ dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
+ ioat_interrupt_style);
+ goto err_no_irq;
+
+msix:
+ /* The number of MSI-X vectors should equal the number of channels */
+ msixcnt = device->common.chancnt;
+ for (i = 0; i < msixcnt; i++)
+ device->msix_entries[i].entry = i;
+
+ err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
+ if (err < 0)
+ goto msi;
+ if (err > 0)
+ goto msix_single_vector;
+
+ for (i = 0; i < msixcnt; i++) {
+ ioat_chan = ioat_lookup_chan_by_index(device, i);
+ err = request_irq(device->msix_entries[i].vector,
+ ioat_dma_do_interrupt_msix,
+ 0, "ioat-msix", ioat_chan);
+ if (err) {
+ for (j = 0; j < i; j++) {
+ ioat_chan =
+ ioat_lookup_chan_by_index(device, j);
+ free_irq(device->msix_entries[j].vector,
+ ioat_chan);
+ }
+ goto msix_single_vector;
+ }
+ }
+ intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+ device->irq_mode = msix_multi_vector;
+ goto done;
+
+msix_single_vector:
+ device->msix_entries[0].entry = 0;
+ err = pci_enable_msix(device->pdev, device->msix_entries, 1);
+ if (err)
+ goto msi;
+
+ err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
+ 0, "ioat-msix", device);
+ if (err) {
+ pci_disable_msix(device->pdev);
+ goto msi;
+ }
+ device->irq_mode = msix_single_vector;
+ goto done;
+
+msi:
+ err = pci_enable_msi(device->pdev);
+ if (err)
+ goto intx;
+
+ err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
+ 0, "ioat-msi", device);
+ if (err) {
+ pci_disable_msi(device->pdev);
+ goto intx;
+ }
+ /*
+ * CB 1.2 devices need a bit set in configuration space to enable MSI
+ */
+ if (device->version == IOAT_VER_1_2) {
+ u32 dmactrl;
+ pci_read_config_dword(device->pdev,
+ IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+ dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+ pci_write_config_dword(device->pdev,
+ IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+ }
+ device->irq_mode = msi;
+ goto done;
+
+intx:
+ err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
+ IRQF_SHARED, "ioat-intx", device);
+ if (err)
+ goto err_no_irq;
+ device->irq_mode = intx;
+
+done:
+ intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+ writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ return 0;
+
+err_no_irq:
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+ dev_err(&device->pdev->dev, "no usable interrupts\n");
+ device->irq_mode = none;
+ return -1;
+}
+
+/**
+ * ioat_dma_remove_interrupts - remove whatever interrupts were set
+ * @device: ioat device
+ */
+static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
+{
+ struct ioat_dma_chan *ioat_chan;
+ int i;
+
+ /* Disable all interrupt generation */
+ writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+
+ switch (device->irq_mode) {
+ case msix_multi_vector:
+ for (i = 0; i < device->common.chancnt; i++) {
+ ioat_chan = ioat_lookup_chan_by_index(device, i);
+ free_irq(device->msix_entries[i].vector, ioat_chan);
+ }
+ pci_disable_msix(device->pdev);
+ break;
+ case msix_single_vector:
+ free_irq(device->msix_entries[0].vector, device);
+ pci_disable_msix(device->pdev);
+ break;
+ case msi:
+ free_irq(device->pdev->irq, device);
+ pci_disable_msi(device->pdev);
+ break;
+ case intx:
+ free_irq(device->pdev->irq, device);
+ break;
+ case none:
+ dev_warn(&device->pdev->dev,
+ "call to %s without interrupts setup\n", __func__);
+ }
+ device->irq_mode = none;
+}
+
+struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
+ void __iomem *iobase)
+{
+ int err;
+ struct ioatdma_device *device;
+
+ device = kzalloc(sizeof(*device), GFP_KERNEL);
+ if (!device) {
+ err = -ENOMEM;
+ goto err_kzalloc;
+ }
+ device->pdev = pdev;
+ device->reg_base = iobase;
+ device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+
+ /* DMA coherent memory pool for DMA descriptor allocations */
+ device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+ sizeof(struct ioat_dma_descriptor),
+ 64, 0);
+ if (!device->dma_pool) {
+ err = -ENOMEM;
+ goto err_dma_pool;
+ }
+
+ device->completion_pool = pci_pool_create("completion_pool", pdev,
+ sizeof(u64), SMP_CACHE_BYTES,
+ SMP_CACHE_BYTES);
+ if (!device->completion_pool) {
+ err = -ENOMEM;
+ goto err_completion_pool;
+ }
+
+ INIT_LIST_HEAD(&device->common.channels);
+ ioat_dma_enumerate_channels(device);
+
+ device->common.device_alloc_chan_resources =
+ ioat_dma_alloc_chan_resources;
+ device->common.device_free_chan_resources =
+ ioat_dma_free_chan_resources;
+ device->common.dev = &pdev->dev;
+
+ dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
+ device->common.device_is_tx_complete = ioat_dma_is_complete;
+ switch (device->version) {
+ case IOAT_VER_1_2:
+ device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+ device->common.device_issue_pending =
+ ioat1_dma_memcpy_issue_pending;
+ break;
+ case IOAT_VER_2_0:
+ case IOAT_VER_3_0:
+ device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
+ device->common.device_issue_pending =
+ ioat2_dma_memcpy_issue_pending;
+ break;
+ }
+
+ dev_err(&device->pdev->dev,
+ "Intel(R) I/OAT DMA Engine found,"
+ " %d channels, device version 0x%02x, driver version %s\n",
+ device->common.chancnt, device->version, IOAT_DMA_VERSION);
+
+ err = ioat_dma_setup_interrupts(device);
+ if (err)
+ goto err_setup_interrupts;
+
+ err = ioat_dma_self_test(device);
+ if (err)
+ goto err_self_test;
+
+ ioat_set_tcp_copy_break(device);
+
+ dma_async_device_register(&device->common);
+
+ if (device->version != IOAT_VER_3_0) {
+ INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
+ schedule_delayed_work(&device->work,
+ WATCHDOG_DELAY);
+ }
+
+ return device;
+
+err_self_test:
+ ioat_dma_remove_interrupts(device);
+err_setup_interrupts:
+ pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+ pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+ kfree(device);
+err_kzalloc:
+ dev_err(&pdev->dev,
+ "Intel(R) I/OAT DMA Engine initialization failed\n");
+ return NULL;
+}
+
+void ioat_dma_remove(struct ioatdma_device *device)
+{
+ struct dma_chan *chan, *_chan;
+ struct ioat_dma_chan *ioat_chan;
+
+ ioat_dma_remove_interrupts(device);
+
+ dma_async_device_unregister(&device->common);
+
+ pci_pool_destroy(device->dma_pool);
+ pci_pool_destroy(device->completion_pool);
+
+ iounmap(device->reg_base);
+ pci_release_regions(device->pdev);
+ pci_disable_device(device->pdev);
+
+ if (device->version != IOAT_VER_3_0) {
+ cancel_delayed_work(&device->work);
+ }
+
+ list_for_each_entry_safe(chan, _chan,
+ &device->common.channels, device_node) {
+ ioat_chan = to_ioat_chan(chan);
+ list_del(&chan->device_node);
+ kfree(ioat_chan);
+ }
+ kfree(device);
+}
+
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
new file mode 100644
index 0000000..a3306d0
--- /dev/null
+++ b/drivers/dma/ioatdma.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "ioatdma_hw.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION "3.30"
+
+enum ioat_interrupt {
+ none = 0,
+ msix_multi_vector = 1,
+ msix_single_vector = 2,
+ msi = 3,
+ intx = 4,
+};
+
+#define IOAT_LOW_COMPLETION_MASK 0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU ~0
+#define IOAT_WATCHDOG_PERIOD (2 * HZ)
+
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @irq_mode: which style irq to use
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ */
+
+struct ioatdma_device {
+ struct pci_dev *pdev;
+ void __iomem *reg_base;
+ struct pci_pool *dma_pool;
+ struct pci_pool *completion_pool;
+ struct dma_device common;
+ u8 version;
+ enum ioat_interrupt irq_mode;
+ struct delayed_work work;
+ struct msix_entry msix_entries[4];
+ struct ioat_dma_chan *idx[4];
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+
+ void __iomem *reg_base;
+
+ dma_cookie_t completed_cookie;
+ unsigned long last_completion;
+ unsigned long last_completion_time;
+
+ size_t xfercap; /* XFERCAP register value expanded out */
+
+ spinlock_t cleanup_lock;
+ spinlock_t desc_lock;
+ struct list_head free_desc;
+ struct list_head used_desc;
+ unsigned long watchdog_completion;
+ int watchdog_tcp_cookie;
+ u32 watchdog_last_tcp_cookie;
+ struct delayed_work work;
+
+ int pending;
+ int dmacount;
+ int desccount;
+
+ struct ioatdma_device *device;
+ struct dma_chan common;
+
+ dma_addr_t completion_addr;
+ union {
+ u64 full; /* HW completion writeback */
+ struct {
+ u32 low;
+ u32 high;
+ };
+ } *completion_virt;
+ unsigned long last_compl_desc_addr_hw;
+ struct tasklet_struct cleanup_task;
+};
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor
+ * @node: this descriptor will either be on the free list,
+ * or attached to a transaction list (async_tx.tx_list)
+ * @tx_cnt: number of descriptors required to complete the transaction
+ * @async_tx: the generic software descriptor for all engines
+ */
+struct ioat_desc_sw {
+ struct ioat_dma_descriptor *hw;
+ struct list_head node;
+ int tx_cnt;
+ size_t len;
+ dma_addr_t src;
+ dma_addr_t dst;
+ struct dma_async_tx_descriptor async_tx;
+};
+
+static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
+{
+ #ifdef CONFIG_NET_DMA
+ switch (dev->version) {
+ case IOAT_VER_1_2:
+ case IOAT_VER_3_0:
+ sysctl_tcp_dma_copybreak = 4096;
+ break;
+ case IOAT_VER_2_0:
+ sysctl_tcp_dma_copybreak = 2048;
+ break;
+ }
+ #endif
+}
+
+#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
+struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
+ void __iomem *iobase);
+void ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+#else
+#define ioat_dma_probe(pdev, iobase) NULL
+#define ioat_dma_remove(device) do { } while (0)
+#define ioat_dca_init(pdev, iobase) NULL
+#define ioat2_dca_init(pdev, iobase) NULL
+#define ioat3_dca_init(pdev, iobase) NULL
+#endif
+
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
new file mode 100644
index 0000000..f1ae2c7
--- /dev/null
+++ b/drivers/dma/ioatdma_hw.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID 0x8086
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000 0x1A38
+#define IOAT_PCI_DID_CNB 0x360B
+#define IOAT_PCI_DID_SCNB 0x65FF
+#define IOAT_PCI_DID_SNB 0x402F
+
+#define IOAT_PCI_RID 0x00
+#define IOAT_PCI_SVID 0x8086
+#define IOAT_PCI_SID 0x8086
+#define IOAT_VER_1_2 0x12 /* Version 1.2 */
+#define IOAT_VER_2_0 0x20 /* Version 2.0 */
+#define IOAT_VER_3_0 0x30 /* Version 3.0 */
+
+struct ioat_dma_descriptor {
+ uint32_t size;
+ uint32_t ctl;
+ uint64_t src_addr;
+ uint64_t dst_addr;
+ uint64_t next;
+ uint64_t rsv1;
+ uint64_t rsv2;
+ uint64_t user1;
+ uint64_t user2;
+};
+
+#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN 0x00000001
+#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN 0x00000002
+#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN 0x00000004
+#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS 0x00000008
+#define IOAT_DMA_DESCRIPTOR_CTL_FRAME 0x00000010
+#define IOAT_DMA_DESCRIPTOR_NUL 0x00000020
+#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK 0x00000040
+#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK 0x00000080
+#define IOAT_DMA_DESCRIPTOR_CTL_BNDL 0x00000100
+#define IOAT_DMA_DESCRIPTOR_CTL_DCA 0x00000200
+#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT 0x00000400
+
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT 0xFF000000
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA 0x00000000
+
+#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA 0x00000001
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK 0xFF000000
+
+#endif
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
new file mode 100644
index 0000000..827cb50
--- /dev/null
+++ b/drivers/dma/ioatdma_registers.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET 0x48
+#define IOAT_PCI_DMACTRL_DMA_EN 0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN 0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET 0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET 0x148
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET 0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET 0x00 /* 8-bit */
+
+#define IOAT_XFERCAP_OFFSET 0x01 /* 8-bit */
+#define IOAT_XFERCAP_4KB 12
+#define IOAT_XFERCAP_8KB 13
+#define IOAT_XFERCAP_16KB 14
+#define IOAT_XFERCAP_32KB 15
+#define IOAT_XFERCAP_32GB 0
+
+#define IOAT_GENCTRL_OFFSET 0x02 /* 8-bit */
+#define IOAT_GENCTRL_DEBUG_EN 0x01
+
+#define IOAT_INTRCTRL_OFFSET 0x03 /* 8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN 0x01 /* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS 0x02 /* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT 0x04 /* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL 0x08 /* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET 0x04 /* Each bit is a channel */
+
+#define IOAT_VER_OFFSET 0x08 /* 8-bit */
+#define IOAT_VER_MAJOR_MASK 0xF0
+#define IOAT_VER_MINOR_MASK 0x0F
+#define GET_IOAT_VER_MAJOR(x) (((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x) ((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET 0x0A /* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET 0x0C /* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK 0x3FFF /* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT 0x8000 /* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET 0x0E /* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE 0x0001
+
+#define IOAT_CHANNEL_MMIO_SIZE 0x80 /* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET 0x00 /* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK 0xF000
+#define IOAT_CHANCTRL_CHANNEL_IN_USE 0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL 0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN 0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN 0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN 0x0004
+#define IOAT_CHANCTRL_INT_DISABLE 0x0001
+
+#define IOAT_DMA_COMP_OFFSET 0x02 /* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1 0x0001 /* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2 0x0002 /* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET 0x04 /* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET 0x08 /* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW 0x04
+#define IOAT2_CHANSTS_OFFSET_LOW 0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH 0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH 0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR ~0x3F
+#define IOAT_CHANSTS_SOFT_ERR 0x0000000000000010
+#define IOAT_CHANSTS_UNAFFILIATED_ERR 0x0000000000000008
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS 0x0000000000000007
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE 0x0
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE 0x1
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED 0x2
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED 0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET 0x06 /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET 0x30 /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK 0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET 0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET 0x00
+#define IOAT_DCA_VER_MAJOR_MASK 0xF0
+#define IOAT_DCA_VER_MINOR_MASK 0x0F
+
+#define IOAT_DCA_COMP_OFFSET 0x02
+#define IOAT_DCA_COMP_V1 0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET 0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH 0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET 0x06
+#define IOAT_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET 0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH 0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET 0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR 0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET 0x0C
+#define IOAT_APICID_TAG_MAP_TAG0 0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1 0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2 0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3 0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4 0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID 0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET 0x10
+#define IOAT_DCA_GREQID_SIZE 0x04
+#define IOAT_DCA_GREQID_MASK 0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN 0x10000000
+#define IOAT_DCA_GREQID_VALID 0x20000000
+#define IOAT_DCA_GREQID_LASTID 0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH 0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR 0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET 0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH 0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET 0x0E
+#define IOAT3_PCI_CONTROL_MEMWR 0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET 0x02
+
+#define IOAT1_CHAINADDR_OFFSET 0x0C /* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET 0x10 /* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW 0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW 0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH 0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH 0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET 0x14 /* 8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET 0x04 /* 8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver) ((ver) < IOAT_VER_2_0 \
+ ? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET 0x20
+#define IOAT_CHANCMD_RESUME 0x10
+#define IOAT_CHANCMD_ABORT 0x08
+#define IOAT_CHANCMD_SUSPEND 0x04
+#define IOAT_CHANCMD_APPEND 0x02
+#define IOAT_CHANCMD_START 0x01
+
+#define IOAT_CHANCMP_OFFSET 0x18 /* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW 0x18
+#define IOAT_CHANCMP_OFFSET_HIGH 0x1C
+
+#define IOAT_CDAR_OFFSET 0x20 /* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW 0x20
+#define IOAT_CDAR_OFFSET_HIGH 0x24
+
+#define IOAT_CHANERR_OFFSET 0x28 /* 32-bit Channel Error Register */
+#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR 0x0001
+#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR 0x0002
+#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR 0x0004
+#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR 0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR 0x0010
+#define IOAT_CHANERR_CHANCMD_ERR 0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR 0x0080
+#define IOAT_CHANERR_READ_DATA_ERR 0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR 0x0200
+#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR 0x0400
+#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR 0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR 0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR 0x2000
+#define IOAT_CHANERR_SOFT_ERR 0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR 0x8000
+
+#define IOAT_CHANERR_MASK_OFFSET 0x2C /* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 0000000..6be3172
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1448 @@
+/*
+ * offload engine driver for the Intel Xscale series of i/o processors
+ * Copyright © 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/ioport.h>
+
+#include <mach/adma.h>
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) \
+ container_of(dev, struct iop_adma_device, common)
+#define tx_to_iop_adma_slot(tx) \
+ container_of(tx, struct iop_adma_desc_slot, async_tx)
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+ int stride = slot->slots_per_op;
+
+ while (stride--) {
+ slot->slots_per_op = 0;
+ slot = list_entry(slot->slot_node.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+ }
+}
+
+static dma_cookie_t
+iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+ desc->async_tx.cookie = 0;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct iop_adma_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &iop_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = desc->async_tx.flags;
+ u32 src_cnt;
+ dma_addr_t addr;
+ dma_addr_t dest;
+
+ src_cnt = unmap->unmap_src_cnt;
+ dest = iop_desc_get_dest_addr(unmap, iop_chan);
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ enum dma_data_direction dir;
+
+ if (src_cnt > 1) /* is xor? */
+ dir = DMA_BIDIRECTIONAL;
+ else
+ dir = DMA_FROM_DEVICE;
+
+ dma_unmap_page(dev, dest, len, dir);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = iop_desc_get_src_addr(unmap,
+ iop_chan,
+ src_cnt);
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len,
+ DMA_TO_DEVICE);
+ }
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
+ struct iop_adma_chan *iop_chan)
+{
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx))
+ return 0;
+
+ /* leave the last descriptor in the chain
+ * so we can append to it
+ */
+ if (desc->chain_node.next == &iop_chan->chain)
+ return 1;
+
+ dev_dbg(iop_chan->device->common.dev,
+ "\tfree slot: %d slots_per_op: %d\n",
+ desc->idx, desc->slots_per_op);
+
+ list_del(&desc->chain_node);
+ iop_adma_free_slots(desc);
+
+ return 0;
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
+ dma_cookie_t cookie = 0;
+ u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+ int busy = iop_chan_is_busy(iop_chan);
+ int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+ chain_node) {
+ pr_debug("\tcookie: %d slot: %d busy: %d "
+ "this_desc: %#x next_desc: %#x ack: %d\n",
+ iter->async_tx.cookie, iter->idx, busy,
+ iter->async_tx.phys, iop_desc_get_next_desc(iter),
+ async_tx_test_ack(&iter->async_tx));
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy, or if it appears that the current descriptor
+ * needs to be re-read (i.e. has been appended to)
+ */
+ if (iter->async_tx.phys == current_desc) {
+ BUG_ON(seen_current++);
+ if (busy || iop_desc_get_next_desc(iter))
+ break;
+ }
+
+ /* detect the start of a group transaction */
+ if (!slot_cnt && !slots_per_op) {
+ slot_cnt = iter->slot_cnt;
+ slots_per_op = iter->slots_per_op;
+ if (slot_cnt <= slots_per_op) {
+ slot_cnt = 0;
+ slots_per_op = 0;
+ }
+ }
+
+ if (slot_cnt) {
+ pr_debug("\tgroup++\n");
+ if (!grp_start)
+ grp_start = iter;
+ slot_cnt -= slots_per_op;
+ }
+
+ /* all the members of a group are complete */
+ if (slots_per_op != 0 && slot_cnt == 0) {
+ struct iop_adma_desc_slot *grp_iter, *_grp_iter;
+ int end_of_chain = 0;
+ pr_debug("\tgroup end\n");
+
+ /* collect the total results */
+ if (grp_start->xor_check_result) {
+ u32 zero_sum_result = 0;
+ slot_cnt = grp_start->slot_cnt;
+ grp_iter = grp_start;
+
+ list_for_each_entry_from(grp_iter,
+ &iop_chan->chain, chain_node) {
+ zero_sum_result |=
+ iop_desc_get_zero_result(grp_iter);
+ pr_debug("\titer%d result: %d\n",
+ grp_iter->idx, zero_sum_result);
+ slot_cnt -= slots_per_op;
+ if (slot_cnt == 0)
+ break;
+ }
+ pr_debug("\tgrp_start->xor_check_result: %p\n",
+ grp_start->xor_check_result);
+ *grp_start->xor_check_result = zero_sum_result;
+ }
+
+ /* clean up the group */
+ slot_cnt = grp_start->slot_cnt;
+ grp_iter = grp_start;
+ list_for_each_entry_safe_from(grp_iter, _grp_iter,
+ &iop_chan->chain, chain_node) {
+ cookie = iop_adma_run_tx_complete_actions(
+ grp_iter, iop_chan, cookie);
+
+ slot_cnt -= slots_per_op;
+ end_of_chain = iop_adma_clean_slot(grp_iter,
+ iop_chan);
+
+ if (slot_cnt == 0 || end_of_chain)
+ break;
+ }
+
+ /* the group should be complete at this point */
+ BUG_ON(slot_cnt);
+
+ slots_per_op = 0;
+ grp_start = NULL;
+ if (end_of_chain)
+ break;
+ else
+ continue;
+ } else if (slots_per_op) /* wait for group completion */
+ continue;
+
+ /* write back zero sum results (single descriptor case) */
+ if (iter->xor_check_result && iter->async_tx.cookie)
+ *iter->xor_check_result =
+ iop_desc_get_zero_result(iter);
+
+ cookie = iop_adma_run_tx_complete_actions(
+ iter, iop_chan, cookie);
+
+ if (iop_adma_clean_slot(iter, iop_chan))
+ break;
+ }
+
+ BUG_ON(!seen_current);
+
+ if (cookie > 0) {
+ iop_chan->completed_cookie = cookie;
+ pr_debug("\tcompleted cookie %d\n", cookie);
+ }
+}
+
+static void
+iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+ spin_lock_bh(&iop_chan->lock);
+ __iop_adma_slot_cleanup(iop_chan);
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_adma_tasklet(unsigned long data)
+{
+ struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
+
+ spin_lock(&iop_chan->lock);
+ __iop_adma_slot_cleanup(iop_chan);
+ spin_unlock(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+ int slots_per_op)
+{
+ struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = iop_chan->last_used;
+ else
+ iter = list_entry(&iop_chan->all_slots,
+ struct iop_adma_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &iop_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++) {
+ if (iop_desc_is_aligned(iter, slots_per_op))
+ alloc_start = iter;
+ else {
+ slots_found = 0;
+ continue;
+ }
+ }
+
+ if (slots_found == num_slots) {
+ struct iop_adma_desc_slot *alloc_tail = NULL;
+ struct iop_adma_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+ dev_dbg(iop_chan->device->common.dev,
+ "allocated slot: %d "
+ "(desc %p phys: %#x) slots_per_op %d\n",
+ iter->idx, iter->hw_desc,
+ iter->async_tx.phys, slots_per_op);
+
+ /* pre-ack all but the last descriptor */
+ if (num_slots != slots_per_op)
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ iop_chan->last_used = last_used;
+ iop_desc_clear_next_desc(alloc_start);
+ iop_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* perform direct reclaim if the allocation fails */
+ __iop_adma_slot_cleanup(iop_chan);
+
+ return NULL;
+}
+
+static dma_cookie_t
+iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
+ struct iop_adma_desc_slot *desc)
+{
+ dma_cookie_t cookie = iop_chan->common.cookie;
+ cookie++;
+ if (cookie < 0)
+ cookie = 1;
+ iop_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+ dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
+ iop_chan->pending);
+
+ if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+ iop_chan->pending = 0;
+ iop_chan_append(iop_chan);
+ }
+}
+
+static dma_cookie_t
+iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
+ struct iop_adma_desc_slot *grp_start, *old_chain_tail;
+ int slot_cnt;
+ int slots_per_op;
+ dma_cookie_t cookie;
+ dma_addr_t next_dma;
+
+ grp_start = sw_desc->group_head;
+ slot_cnt = grp_start->slot_cnt;
+ slots_per_op = grp_start->slots_per_op;
+
+ spin_lock_bh(&iop_chan->lock);
+ cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
+
+ old_chain_tail = list_entry(iop_chan->chain.prev,
+ struct iop_adma_desc_slot, chain_node);
+ list_splice_init(&sw_desc->async_tx.tx_list,
+ &old_chain_tail->chain_node);
+
+ /* fix up the hardware chain */
+ next_dma = grp_start->async_tx.phys;
+ iop_desc_set_next_desc(old_chain_tail, next_dma);
+ BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
+
+ /* check for pre-chained descriptors */
+ iop_paranoia(iop_desc_get_next_desc(sw_desc));
+
+ /* increment the pending count by the number of slots
+ * memcpy operations have a 1:1 (slot:operation) relation
+ * other operations are heavier and will pop the threshold
+ * more often.
+ */
+ iop_chan->pending += slot_cnt;
+ iop_adma_check_threshold(iop_chan);
+ spin_unlock_bh(&iop_chan->lock);
+
+ dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
+ __func__, sw_desc->async_tx.cookie, sw_desc->idx);
+
+ return cookie;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/**
+ * iop_adma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan - allocate descriptor resources for this channel
+ * @client - current client requesting the channel be ready for requests
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times. To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan,
+ struct dma_client *client)
+{
+ char *hw_desc;
+ int idx;
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *slot = NULL;
+ int init = iop_chan->slots_allocated ? 0 : 1;
+ struct iop_adma_platform_data *plat_data =
+ iop_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ do {
+ idx = iop_chan->slots_allocated;
+ if (idx == num_descs_in_pool)
+ break;
+
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "IOP ADMA Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = iop_adma_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->async_tx.tx_list);
+ hw_desc = (char *) iop_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+ slot->idx = idx;
+
+ spin_lock_bh(&iop_chan->lock);
+ iop_chan->slots_allocated++;
+ list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+ spin_unlock_bh(&iop_chan->lock);
+ } while (iop_chan->slots_allocated < num_descs_in_pool);
+
+ if (idx && !iop_chan->last_used)
+ iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+ struct iop_adma_desc_slot,
+ slot_node);
+
+ dev_dbg(iop_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ iop_chan->slots_allocated, iop_chan->last_used);
+
+ /* initialize the channel and the chain with a null operation */
+ if (init) {
+ if (dma_has_cap(DMA_MEMCPY,
+ iop_chan->device->common.cap_mask))
+ iop_chan_start_null_memcpy(iop_chan);
+ else if (dma_has_cap(DMA_XOR,
+ iop_chan->device->common.cap_mask))
+ iop_chan_start_null_xor(iop_chan);
+ else
+ BUG();
+ }
+
+ return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_interrupt(grp_start, iop_chan);
+ grp_start->unmap_len = 0;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+
+ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+ __func__, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_memcpy(grp_start, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ iop_desc_set_memcpy_src_addr(grp_start, dma_src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
+ int value, size_t len, unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+
+ dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+ __func__, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_memset(grp_start, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_block_fill_val(grp_start, value);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+ dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+ BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(iop_chan->device->common.dev,
+ "%s src_cnt: %d len: %u flags: %lx\n",
+ __func__, src_cnt, len, flags);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_xor(grp_start, src_cnt, flags);
+ iop_desc_set_byte_count(grp_start, iop_chan, len);
+ iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_xor_src_addr(grp_start, src_cnt,
+ dma_src[src_cnt]);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
+ unsigned int src_cnt, size_t len, u32 *result,
+ unsigned long flags)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ int slot_cnt, slots_per_op;
+
+ if (unlikely(!len))
+ return NULL;
+
+ dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+ __func__, src_cnt, len);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ iop_desc_init_zero_sum(grp_start, src_cnt, flags);
+ iop_desc_set_zero_sum_byte_count(grp_start, len);
+ grp_start->xor_check_result = result;
+ pr_debug("\t%s: grp_start->xor_check_result: %p\n",
+ __func__, grp_start->xor_check_result);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ sw_desc->async_tx.flags = flags;
+ while (src_cnt--)
+ iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+ dma_src[src_cnt]);
+ }
+ spin_unlock_bh(&iop_chan->lock);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ struct iop_adma_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ iop_adma_slot_cleanup(iop_chan);
+
+ spin_lock_bh(&iop_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &iop_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ iop_chan->slots_allocated--;
+ }
+ iop_chan->last_used = NULL;
+
+ dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, iop_chan->slots_allocated);
+ spin_unlock_bh(&iop_chan->lock);
+
+ /* one is ok since we left it on there on purpose */
+ if (in_use_descs > 1)
+ printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+ in_use_descs - 1);
+}
+
+/**
+ * iop_adma_is_complete - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ */
+static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = iop_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS)
+ return ret;
+
+ iop_adma_slot_cleanup(iop_chan);
+
+ last_used = chan->cookie;
+ last_complete = iop_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t iop_adma_eot_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ iop_adma_device_clear_eot_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+
+ dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ iop_adma_device_clear_eoc_status(chan);
+
+ return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data)
+{
+ struct iop_adma_chan *chan = data;
+ unsigned long status = iop_chan_get_status(chan);
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error ( %s%s%s%s%s%s%s)\n",
+ iop_is_err_int_parity(status, chan) ? "int_parity " : "",
+ iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
+ iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
+ iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
+ iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
+ iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
+ iop_is_err_split_tx(status, chan) ? "split_tx " : "");
+
+ iop_adma_device_clear_err_status(chan);
+
+ BUG();
+
+ return IRQ_HANDLED;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+ struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+
+ if (iop_chan->pending) {
+ iop_chan->pending = 0;
+ iop_chan_append(iop_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct iop_adma_chan *iop_chan;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+ dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ IOP_ADMA_TEST_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(1);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ iop_chan = to_iop_adma_chan(dma_chan);
+ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+ IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
+ struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+ dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+ dma_addr_t dma_addr, dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ u32 zero_sum_result;
+ int err = 0;
+ struct iop_adma_chan *iop_chan;
+
+ dev_dbg(device->common.dev, "%s\n", __func__);
+
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx])
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest)
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ iop_chan = to_iop_adma_chan(dma_chan);
+ dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+ dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_TO_DEVICE);
+
+ /* skip zero sum if the capability is not present */
+ if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+ goto free_resources;
+
+ /* zero sum the sources with the destintation page */
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+ zero_sum_srcs[i] = xor_srcs[i];
+ zero_sum_srcs[i] = dest;
+
+ zero_sum_result = 1;
+
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+ zero_sum_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test zero sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 0) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test zero sum failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ /* test memset */
+ dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test memset timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i]) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test memset failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+ /* test for non-zero parity sum */
+ zero_sum_result = 0;
+ for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+ zero_sum_srcs[i], 0, PAGE_SIZE,
+ DMA_TO_DEVICE);
+ tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
+ IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+ &zero_sum_result,
+ DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+ cookie = iop_adma_tx_submit(tx);
+ iop_adma_issue_pending(dma_chan);
+ msleep(8);
+
+ if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test non-zero sum timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ if (zero_sum_result != 1) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test non-zero sum failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ iop_adma_free_chan_resources(dma_chan);
+out:
+ src_idx = IOP_ADMA_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit iop_adma_remove(struct platform_device *dev)
+{
+ struct iop_adma_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct iop_adma_chan *iop_chan;
+ int i;
+ struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ for (i = 0; i < 3; i++) {
+ unsigned int irq;
+ irq = platform_get_irq(dev, i);
+ free_irq(irq, device);
+ }
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ do {
+ struct resource *res;
+ res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+ release_mem_region(res->start, res->end - res->start);
+ } while (0);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ iop_chan = to_iop_adma_chan(chan);
+ list_del(&chan->device_node);
+ kfree(iop_chan);
+ }
+ kfree(device);
+
+ return 0;
+}
+
+static int __devinit iop_adma_probe(struct platform_device *pdev)
+{
+ struct resource *res;
+ int ret = 0, i;
+ struct iop_adma_device *adev;
+ struct iop_adma_chan *iop_chan;
+ struct dma_device *dma_dev;
+ struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ if (!devm_request_mem_region(&pdev->dev, res->start,
+ res->end - res->start, pdev->name))
+ return -EBUSY;
+
+ adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL)) == NULL) {
+ ret = -ENOMEM;
+ goto err_free_adev;
+ }
+
+ dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
+ __func__, adev->dma_desc_pool_virt,
+ (void *) adev->dma_desc_pool);
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
+ dma_dev->device_is_tx_complete = iop_adma_is_complete;
+ dma_dev->device_issue_pending = iop_adma_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = iop_adma_get_max_xor();
+ dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
+ }
+ if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_zero_sum =
+ iop_adma_prep_dma_zero_sum;
+ if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_interrupt =
+ iop_adma_prep_dma_interrupt;
+
+ iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
+ if (!iop_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ iop_chan->device = adev;
+
+ iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start);
+ if (!iop_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_iop_chan;
+ }
+ tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
+ iop_chan);
+
+ /* clear errors before enabling interrupts */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ for (i = 0; i < 3; i++) {
+ irq_handler_t handler[] = { iop_adma_eot_handler,
+ iop_adma_eoc_handler,
+ iop_adma_err_handler };
+ int irq = platform_get_irq(pdev, i);
+ if (irq < 0) {
+ ret = -ENXIO;
+ goto err_free_iop_chan;
+ } else {
+ ret = devm_request_irq(&pdev->dev, irq,
+ handler[i], 0, pdev->name, iop_chan);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+ }
+
+ spin_lock_init(&iop_chan->lock);
+ INIT_LIST_HEAD(&iop_chan->chain);
+ INIT_LIST_HEAD(&iop_chan->all_slots);
+ INIT_RCU_HEAD(&iop_chan->common.rcu);
+ iop_chan->common.device = dma_dev;
+ list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = iop_adma_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+ ret = iop_adma_xor_zero_sum_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_iop_chan;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
+ "( %s%s%s%s%s%s%s%s%s%s)\n",
+ dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
+ dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
+ dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
+ dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_iop_chan:
+ kfree(iop_chan);
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ err_free_adev:
+ kfree(adev);
+ out:
+ return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+
+ list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ iop_desc_init_memcpy(grp_start, 0);
+ iop_desc_set_byte_count(grp_start, iop_chan, 0);
+ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+ iop_desc_set_memcpy_src_addr(grp_start, 0);
+
+ cookie = iop_chan->common.cookie;
+ cookie++;
+ if (cookie <= 1)
+ cookie = 2;
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ iop_chan->completed_cookie = cookie - 1;
+ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+ /* channel should not be busy */
+ BUG_ON(iop_chan_is_busy(iop_chan));
+
+ /* clear any prior error-status bits */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ /* disable operation */
+ iop_chan_disable(iop_chan);
+
+ /* set the descriptor address */
+ iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+ /* 1/ don't add pre-chained descriptors
+ * 2/ dummy read to flush next_desc write
+ */
+ BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+ /* run the descriptor */
+ iop_chan_enable(iop_chan);
+ } else
+ dev_printk(KERN_ERR, iop_chan->device->common.dev,
+ "failed to allocate null descriptor\n");
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+ struct iop_adma_desc_slot *sw_desc, *grp_start;
+ dma_cookie_t cookie;
+ int slot_cnt, slots_per_op;
+
+ dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+ spin_lock_bh(&iop_chan->lock);
+ slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+ sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+ if (sw_desc) {
+ grp_start = sw_desc->group_head;
+ list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+ async_tx_ack(&sw_desc->async_tx);
+ iop_desc_init_null_xor(grp_start, 2, 0);
+ iop_desc_set_byte_count(grp_start, iop_chan, 0);
+ iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+ iop_desc_set_xor_src_addr(grp_start, 0, 0);
+ iop_desc_set_xor_src_addr(grp_start, 1, 0);
+
+ cookie = iop_chan->common.cookie;
+ cookie++;
+ if (cookie <= 1)
+ cookie = 2;
+
+ /* initialize the completed cookie to be less than
+ * the most recently used cookie
+ */
+ iop_chan->completed_cookie = cookie - 1;
+ iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+ /* channel should not be busy */
+ BUG_ON(iop_chan_is_busy(iop_chan));
+
+ /* clear any prior error-status bits */
+ iop_adma_device_clear_err_status(iop_chan);
+
+ /* disable operation */
+ iop_chan_disable(iop_chan);
+
+ /* set the descriptor address */
+ iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+ /* 1/ don't add pre-chained descriptors
+ * 2/ dummy read to flush next_desc write
+ */
+ BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+ /* run the descriptor */
+ iop_chan_enable(iop_chan);
+ } else
+ dev_printk(KERN_ERR, iop_chan->device->common.dev,
+ "failed to allocate null descriptor\n");
+ spin_unlock_bh(&iop_chan->lock);
+}
+
+MODULE_ALIAS("platform:iop-adma");
+
+static struct platform_driver iop_adma_driver = {
+ .probe = iop_adma_probe,
+ .remove = iop_adma_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = "iop-adma",
+ },
+};
+
+static int __init iop_adma_init (void)
+{
+ return platform_driver_register(&iop_adma_driver);
+}
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit iop_adma_exit (void)
+{
+ platform_driver_unregister(&iop_adma_driver);
+ return;
+}
+module_exit(iop_adma_exit);
+#endif
+
+module_init(iop_adma_init);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644
index 0000000..9f6fe46
--- /dev/null
+++ b/drivers/dma/iovlock.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
+#include <net/tcp.h> /* for memcpy_toiovec */
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+static int num_pages_spanned(struct iovec *iov)
+{
+ return
+ ((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
+ ((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/*
+ * Pin down all the iovec pages needed for len bytes.
+ * Return a struct dma_pinned_list to keep track of pages pinned down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
+{
+ struct dma_pinned_list *local_list;
+ struct page **pages;
+ int i;
+ int ret;
+ int nr_iovecs = 0;
+ int iovec_len_used = 0;
+ int iovec_pages_used = 0;
+
+ /* don't pin down non-user-based iovecs */
+ if (segment_eq(get_fs(), KERNEL_DS))
+ return NULL;
+
+ /* determine how many iovecs/pages there are, up front */
+ do {
+ iovec_len_used += iov[nr_iovecs].iov_len;
+ iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
+ nr_iovecs++;
+ } while (iovec_len_used < len);
+
+ /* single kmalloc for pinned list, page_list[], and the page arrays */
+ local_list = kmalloc(sizeof(*local_list)
+ + (nr_iovecs * sizeof (struct dma_page_list))
+ + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+ if (!local_list)
+ goto out;
+
+ /* list of pages starts right after the page list array */
+ pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+ local_list->nr_iovecs = 0;
+
+ for (i = 0; i < nr_iovecs; i++) {
+ struct dma_page_list *page_list = &local_list->page_list[i];
+
+ len -= iov[i].iov_len;
+
+ if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
+ goto unpin;
+
+ page_list->nr_pages = num_pages_spanned(&iov[i]);
+ page_list->base_address = iov[i].iov_base;
+
+ page_list->pages = pages;
+ pages += page_list->nr_pages;
+
+ /* pin pages down */
+ down_read(&current->mm->mmap_sem);
+ ret = get_user_pages(
+ current,
+ current->mm,
+ (unsigned long) iov[i].iov_base,
+ page_list->nr_pages,
+ 1, /* write */
+ 0, /* force */
+ page_list->pages,
+ NULL);
+ up_read(&current->mm->mmap_sem);
+
+ if (ret != page_list->nr_pages)
+ goto unpin;
+
+ local_list->nr_iovecs = i + 1;
+ }
+
+ return local_list;
+
+unpin:
+ dma_unpin_iovec_pages(local_list);
+out:
+ return NULL;
+}
+
+void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
+{
+ int i, j;
+
+ if (!pinned_list)
+ return;
+
+ for (i = 0; i < pinned_list->nr_iovecs; i++) {
+ struct dma_page_list *page_list = &pinned_list->page_list[i];
+ for (j = 0; j < page_list->nr_pages; j++) {
+ set_page_dirty_lock(page_list->pages[j]);
+ page_cache_release(page_list->pages[j]);
+ }
+ }
+
+ kfree(pinned_list);
+}
+
+
+/*
+ * We have already pinned down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in pinned_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied into
+ * by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
+ struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
+{
+ int iov_byte_offset;
+ int copy;
+ dma_cookie_t dma_cookie = 0;
+ int iovec_idx;
+ int page_idx;
+
+ if (!chan)
+ return memcpy_toiovec(iov, kdata, len);
+
+ iovec_idx = 0;
+ while (iovec_idx < pinned_list->nr_iovecs) {
+ struct dma_page_list *page_list;
+
+ /* skip already used-up iovecs */
+ while (!iov[iovec_idx].iov_len)
+ iovec_idx++;
+
+ page_list = &pinned_list->page_list[iovec_idx];
+
+ iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+ page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+ - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+ /* break up copies to not cross page boundary */
+ while (iov[iovec_idx].iov_len) {
+ copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+ copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+ dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+ page_list->pages[page_idx],
+ iov_byte_offset,
+ kdata,
+ copy);
+
+ len -= copy;
+ iov[iovec_idx].iov_len -= copy;
+ iov[iovec_idx].iov_base += copy;
+
+ if (!len)
+ return dma_cookie;
+
+ kdata += copy;
+ iov_byte_offset = 0;
+ page_idx++;
+ }
+ iovec_idx++;
+ }
+
+ /* really bad if we ever run out of iovecs */
+ BUG();
+ return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
+ struct dma_pinned_list *pinned_list, struct page *page,
+ unsigned int offset, size_t len)
+{
+ int iov_byte_offset;
+ int copy;
+ dma_cookie_t dma_cookie = 0;
+ int iovec_idx;
+ int page_idx;
+ int err;
+
+ /* this needs as-yet-unimplemented buf-to-buff, so punt. */
+ /* TODO: use dma for this */
+ if (!chan || !pinned_list) {
+ u8 *vaddr = kmap(page);
+ err = memcpy_toiovec(iov, vaddr + offset, len);
+ kunmap(page);
+ return err;
+ }
+
+ iovec_idx = 0;
+ while (iovec_idx < pinned_list->nr_iovecs) {
+ struct dma_page_list *page_list;
+
+ /* skip already used-up iovecs */
+ while (!iov[iovec_idx].iov_len)
+ iovec_idx++;
+
+ page_list = &pinned_list->page_list[iovec_idx];
+
+ iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+ page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+ - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+ /* break up copies to not cross page boundary */
+ while (iov[iovec_idx].iov_len) {
+ copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+ copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+ dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+ page_list->pages[page_idx],
+ iov_byte_offset,
+ page,
+ offset,
+ copy);
+
+ len -= copy;
+ iov[iovec_idx].iov_len -= copy;
+ iov[iovec_idx].iov_base += copy;
+
+ if (!len)
+ return dma_cookie;
+
+ offset += copy;
+ iov_byte_offset = 0;
+ page_idx++;
+ }
+ iovec_idx++;
+ }
+
+ /* really bad if we ever run out of iovecs */
+ BUG();
+ return -EFAULT;
+}
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 0000000..bcda174
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1384 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <plat/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan) \
+ container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev) \
+ container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx) \
+ container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+ hw_desc->status = (1 << 31);
+ hw_desc->phy_next_desc = 0;
+ hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+ int src_idx)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+ u32 byte_count)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+ u32 next_desc_addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ BUG_ON(hw_desc->phy_next_desc);
+ hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+ desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+ dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+ return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+ int index, dma_addr_t addr)
+{
+ struct mv_xor_desc *hw_desc = desc->hw_desc;
+ hw_desc->phy_src_addr[index] = addr;
+ if (desc->type == DMA_XOR)
+ hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+ return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+ u32 next_desc_addr)
+{
+ __raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+ __raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+ __raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+ __raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+ __raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+ u32 val = __raw_readl(XOR_INTR_MASK(chan));
+ val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+ u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+ intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+ return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+ if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+ return 1;
+
+ return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+ u32 val = (1 << (1 + (chan->idx * 16)));
+ dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+ u32 val = 0xFFFF0000 >> (chan->idx * 16);
+ __raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+ struct mv_xor_desc_slot *chain_old_tail = list_entry(
+ desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+ if (chain_old_tail->type != desc->type)
+ return 0;
+ if (desc->type == DMA_MEMSET)
+ return 0;
+
+ return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+ enum dma_transaction_type type)
+{
+ u32 op_mode;
+ u32 config = __raw_readl(XOR_CONFIG(chan));
+
+ switch (type) {
+ case DMA_XOR:
+ op_mode = XOR_OPERATION_MODE_XOR;
+ break;
+ case DMA_MEMCPY:
+ op_mode = XOR_OPERATION_MODE_MEMCPY;
+ break;
+ case DMA_MEMSET:
+ op_mode = XOR_OPERATION_MODE_MEMSET;
+ break;
+ default:
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error: unsupported operation %d.\n",
+ type);
+ BUG();
+ return;
+ }
+
+ config &= ~0x7;
+ config |= op_mode;
+ __raw_writel(config, XOR_CONFIG(chan));
+ chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+ u32 activation;
+
+ dev_dbg(chan->device->common.dev, " activate chan.\n");
+ activation = __raw_readl(XOR_ACTIVATION(chan));
+ activation |= 0x1;
+ __raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+ u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+ state = (state >> 4) & 0x3;
+
+ return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+ return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *slot)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+ __func__, __LINE__, slot);
+
+ slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *sw_desc)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+ __func__, __LINE__, sw_desc);
+ if (sw_desc->type != mv_chan->current_type)
+ mv_set_mode(mv_chan, sw_desc->type);
+
+ if (sw_desc->type == DMA_MEMSET) {
+ /* for memset requests we need to program the engine, no
+ * descriptors used.
+ */
+ struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+ mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+ mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+ mv_chan_set_value(mv_chan, sw_desc->value);
+ } else {
+ /* set the hardware chain */
+ mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+ }
+ mv_chan->pending += sw_desc->slot_cnt;
+ mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+ BUG_ON(desc->async_tx.cookie < 0);
+
+ if (desc->async_tx.cookie > 0) {
+ cookie = desc->async_tx.cookie;
+
+ /* call the callback (must not sleep or submit new
+ * operations to this channel)
+ */
+ if (desc->async_tx.callback)
+ desc->async_tx.callback(
+ desc->async_tx.callback_param);
+
+ /* unmap dma addresses
+ * (unmap_single vs unmap_page?)
+ */
+ if (desc->group_head && desc->unmap_len) {
+ struct mv_xor_desc_slot *unmap = desc->group_head;
+ struct device *dev =
+ &mv_chan->device->pdev->dev;
+ u32 len = unmap->unmap_len;
+ enum dma_ctrl_flags flags = desc->async_tx.flags;
+ u32 src_cnt;
+ dma_addr_t addr;
+ dma_addr_t dest;
+
+ src_cnt = unmap->unmap_src_cnt;
+ dest = mv_desc_get_dest_addr(unmap);
+ if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+ enum dma_data_direction dir;
+
+ if (src_cnt > 1) /* is xor ? */
+ dir = DMA_BIDIRECTIONAL;
+ else
+ dir = DMA_FROM_DEVICE;
+ dma_unmap_page(dev, dest, len, dir);
+ }
+
+ if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+ while (src_cnt--) {
+ addr = mv_desc_get_src_addr(unmap,
+ src_cnt);
+ if (addr == dest)
+ continue;
+ dma_unmap_page(dev, addr, len,
+ DMA_TO_DEVICE);
+ }
+ }
+ desc->group_head = NULL;
+ }
+ }
+
+ /* run dependent operations */
+ async_tx_run_dependencies(&desc->async_tx);
+
+ return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+
+ if (async_tx_test_ack(&iter->async_tx)) {
+ list_del(&iter->completed_node);
+ mv_xor_free_slots(mv_chan, iter);
+ }
+ }
+ return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+ struct mv_xor_chan *mv_chan)
+{
+ dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+ __func__, __LINE__, desc, desc->async_tx.flags);
+ list_del(&desc->chain_node);
+ /* the client is allowed to attach dependent operations
+ * until 'ack' is set
+ */
+ if (!async_tx_test_ack(&desc->async_tx)) {
+ /* move this slot to the completed_slots */
+ list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+ return 0;
+ }
+
+ mv_xor_free_slots(mv_chan, desc);
+ return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ struct mv_xor_desc_slot *iter, *_iter;
+ dma_cookie_t cookie = 0;
+ int busy = mv_chan_is_busy(mv_chan);
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ int seen_current = 0;
+
+ dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+ dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+ mv_xor_clean_completed_slots(mv_chan);
+
+ /* free completed slots from the chain starting with
+ * the oldest descriptor
+ */
+
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+
+ /* do not advance past the current descriptor loaded into the
+ * hardware channel, subsequent descriptors are either in
+ * process or have not been submitted
+ */
+ if (seen_current)
+ break;
+
+ /* stop the search if we reach the current descriptor and the
+ * channel is busy
+ */
+ if (iter->async_tx.phys == current_desc) {
+ seen_current = 1;
+ if (busy)
+ break;
+ }
+
+ cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+ if (mv_xor_clean_slot(iter, mv_chan))
+ break;
+ }
+
+ if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+ struct mv_xor_desc_slot *chain_head;
+ chain_head = list_entry(mv_chan->chain.next,
+ struct mv_xor_desc_slot,
+ chain_node);
+
+ mv_xor_start_new_chain(mv_chan, chain_head);
+ }
+
+ if (cookie > 0)
+ mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+ spin_lock_bh(&mv_chan->lock);
+ __mv_xor_slot_cleanup(mv_chan);
+ spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+ struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+ __mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+ int slots_per_op)
+{
+ struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+ LIST_HEAD(chain);
+ int slots_found, retry = 0;
+
+ /* start search from the last allocated descrtiptor
+ * if a contiguous allocation can not be found start searching
+ * from the beginning of the list
+ */
+retry:
+ slots_found = 0;
+ if (retry == 0)
+ iter = mv_chan->last_used;
+ else
+ iter = list_entry(&mv_chan->all_slots,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ list_for_each_entry_safe_continue(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ prefetch(_iter);
+ prefetch(&_iter->async_tx);
+ if (iter->slots_per_op) {
+ /* give up after finding the first busy slot
+ * on the second pass through the list
+ */
+ if (retry)
+ break;
+
+ slots_found = 0;
+ continue;
+ }
+
+ /* start the allocation if the slot is correctly aligned */
+ if (!slots_found++)
+ alloc_start = iter;
+
+ if (slots_found == num_slots) {
+ struct mv_xor_desc_slot *alloc_tail = NULL;
+ struct mv_xor_desc_slot *last_used = NULL;
+ iter = alloc_start;
+ while (num_slots) {
+ int i;
+
+ /* pre-ack all but the last descriptor */
+ async_tx_ack(&iter->async_tx);
+
+ list_add_tail(&iter->chain_node, &chain);
+ alloc_tail = iter;
+ iter->async_tx.cookie = 0;
+ iter->slot_cnt = num_slots;
+ iter->xor_check_result = NULL;
+ for (i = 0; i < slots_per_op; i++) {
+ iter->slots_per_op = slots_per_op - i;
+ last_used = iter;
+ iter = list_entry(iter->slot_node.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+ }
+ num_slots -= slots_per_op;
+ }
+ alloc_tail->group_head = alloc_start;
+ alloc_tail->async_tx.cookie = -EBUSY;
+ list_splice(&chain, &alloc_tail->async_tx.tx_list);
+ mv_chan->last_used = last_used;
+ mv_desc_clear_next_desc(alloc_start);
+ mv_desc_clear_next_desc(alloc_tail);
+ return alloc_tail;
+ }
+ }
+ if (!retry++)
+ goto retry;
+
+ /* try to free some slots if the allocation fails */
+ tasklet_schedule(&mv_chan->irq_tasklet);
+
+ return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+ struct mv_xor_desc_slot *desc)
+{
+ dma_cookie_t cookie = mv_chan->common.cookie;
+
+ if (++cookie < 0)
+ cookie = 1;
+ mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+ return cookie;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+ struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+ struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+ dma_cookie_t cookie;
+ int new_hw_chain = 1;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p: async_tx %p\n",
+ __func__, sw_desc, &sw_desc->async_tx);
+
+ grp_start = sw_desc->group_head;
+
+ spin_lock_bh(&mv_chan->lock);
+ cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+ if (list_empty(&mv_chan->chain))
+ list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+ else {
+ new_hw_chain = 0;
+
+ old_chain_tail = list_entry(mv_chan->chain.prev,
+ struct mv_xor_desc_slot,
+ chain_node);
+ list_splice_init(&grp_start->async_tx.tx_list,
+ &old_chain_tail->chain_node);
+
+ if (!mv_can_chain(grp_start))
+ goto submit_done;
+
+ dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+ old_chain_tail->async_tx.phys);
+
+ /* fix up the hardware chain */
+ mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+ /* if the channel is not busy */
+ if (!mv_chan_is_busy(mv_chan)) {
+ u32 current_desc = mv_chan_get_current_desc(mv_chan);
+ /*
+ * and the curren desc is the end of the chain before
+ * the append, then we need to start the channel
+ */
+ if (current_desc == old_chain_tail->async_tx.phys)
+ new_hw_chain = 1;
+ }
+ }
+
+ if (new_hw_chain)
+ mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+ spin_unlock_bh(&mv_chan->lock);
+
+ return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan,
+ struct dma_client *client)
+{
+ char *hw_desc;
+ int idx;
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *slot = NULL;
+ struct mv_xor_platform_data *plat_data =
+ mv_chan->device->pdev->dev.platform_data;
+ int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+ /* Allocate descriptor slots */
+ idx = mv_chan->slots_allocated;
+ while (idx < num_descs_in_pool) {
+ slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+ if (!slot) {
+ printk(KERN_INFO "MV XOR Channel only initialized"
+ " %d descriptor slots", idx);
+ break;
+ }
+ hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+ slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+ dma_async_tx_descriptor_init(&slot->async_tx, chan);
+ slot->async_tx.tx_submit = mv_xor_tx_submit;
+ INIT_LIST_HEAD(&slot->chain_node);
+ INIT_LIST_HEAD(&slot->slot_node);
+ INIT_LIST_HEAD(&slot->async_tx.tx_list);
+ hw_desc = (char *) mv_chan->device->dma_desc_pool;
+ slot->async_tx.phys =
+ (dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+ slot->idx = idx++;
+
+ spin_lock_bh(&mv_chan->lock);
+ mv_chan->slots_allocated = idx;
+ list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+ spin_unlock_bh(&mv_chan->lock);
+ }
+
+ if (mv_chan->slots_allocated && !mv_chan->last_used)
+ mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+ struct mv_xor_desc_slot,
+ slot_node);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "allocated %d descriptor slots last_used: %p\n",
+ mv_chan->slots_allocated, mv_chan->last_used);
+
+ return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x src %x len: %u flags: %ld\n",
+ __func__, dest, src, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memcpy_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMCPY;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_src_addr(grp_start, 0, src);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p\n",
+ __func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+ size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s dest: %x len: %u flags: %ld\n",
+ __func__, dest, len, flags);
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_memset_slot_count(len);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_MEMSET;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ mv_desc_set_block_fill_val(grp_start, value);
+ sw_desc->unmap_src_cnt = 1;
+ sw_desc->unmap_len = len;
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+ unsigned int src_cnt, size_t len, unsigned long flags)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *sw_desc, *grp_start;
+ int slot_cnt;
+
+ if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+ return NULL;
+
+ BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+ dev_dbg(mv_chan->device->common.dev,
+ "%s src_cnt: %d len: dest %x %u flags: %ld\n",
+ __func__, src_cnt, len, dest, flags);
+
+ spin_lock_bh(&mv_chan->lock);
+ slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+ sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+ if (sw_desc) {
+ sw_desc->type = DMA_XOR;
+ sw_desc->async_tx.flags = flags;
+ grp_start = sw_desc->group_head;
+ mv_desc_init(grp_start, flags);
+ /* the byte count field is the same as in memcpy desc*/
+ mv_desc_set_byte_count(grp_start, len);
+ mv_desc_set_dest_addr(sw_desc->group_head, dest);
+ sw_desc->unmap_src_cnt = src_cnt;
+ sw_desc->unmap_len = len;
+ while (src_cnt--)
+ mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+ }
+ spin_unlock_bh(&mv_chan->lock);
+ dev_dbg(mv_chan->device->common.dev,
+ "%s sw_desc %p async_tx %p \n",
+ __func__, sw_desc, &sw_desc->async_tx);
+ return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ struct mv_xor_desc_slot *iter, *_iter;
+ int in_use_descs = 0;
+
+ mv_xor_slot_cleanup(mv_chan);
+
+ spin_lock_bh(&mv_chan->lock);
+ list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+ chain_node) {
+ in_use_descs++;
+ list_del(&iter->chain_node);
+ }
+ list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+ completed_node) {
+ in_use_descs++;
+ list_del(&iter->completed_node);
+ }
+ list_for_each_entry_safe_reverse(
+ iter, _iter, &mv_chan->all_slots, slot_node) {
+ list_del(&iter->slot_node);
+ kfree(iter);
+ mv_chan->slots_allocated--;
+ }
+ mv_chan->last_used = NULL;
+
+ dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+ __func__, mv_chan->slots_allocated);
+ spin_unlock_bh(&mv_chan->lock);
+
+ if (in_use_descs)
+ dev_err(mv_chan->device->common.dev,
+ "freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+ dma_cookie_t cookie,
+ dma_cookie_t *done,
+ dma_cookie_t *used)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+ dma_cookie_t last_used;
+ dma_cookie_t last_complete;
+ enum dma_status ret;
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+ mv_chan->is_complete_cookie = cookie;
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ ret = dma_async_is_complete(cookie, last_complete, last_used);
+ if (ret == DMA_SUCCESS) {
+ mv_xor_clean_completed_slots(mv_chan);
+ return ret;
+ }
+ mv_xor_slot_cleanup(mv_chan);
+
+ last_used = chan->cookie;
+ last_complete = mv_chan->completed_cookie;
+
+ if (done)
+ *done = last_complete;
+ if (used)
+ *used = last_used;
+
+ return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+ u32 val;
+
+ val = __raw_readl(XOR_CONFIG(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "config 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ACTIVATION(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "activation 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_INTR_MASK(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "intr mask 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_CAUSE(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error cause 0x%08x.\n", val);
+
+ val = __raw_readl(XOR_ERROR_ADDR(chan));
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error addr 0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+ u32 intr_cause)
+{
+ if (intr_cause & (1 << 4)) {
+ dev_dbg(chan->device->common.dev,
+ "ignore this error\n");
+ return;
+ }
+
+ dev_printk(KERN_ERR, chan->device->common.dev,
+ "error on chan %d. intr cause 0x%08x.\n",
+ chan->idx, intr_cause);
+
+ mv_dump_xor_regs(chan);
+ BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+ struct mv_xor_chan *chan = data;
+ u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+ dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+ if (mv_is_err_intr(intr_cause))
+ mv_xor_err_interrupt_handler(chan, intr_cause);
+
+ tasklet_schedule(&chan->irq_tasklet);
+
+ mv_xor_device_clear_eoc_cause(chan);
+
+ return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+ struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+ if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+ mv_chan->pending = 0;
+ mv_chan_activate(mv_chan);
+ }
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+ int i;
+ void *src, *dest;
+ dma_addr_t src_dma, dest_dma;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ struct dma_async_tx_descriptor *tx;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!src)
+ return -ENOMEM;
+
+ dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+ if (!dest) {
+ kfree(src);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffer */
+ for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+ ((u8 *) src)[i] = (u8)i;
+
+ /* Start copy, using first DMA channel */
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ dest_dma = dma_map_single(dma_chan->device->dev, dest,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+ src_dma = dma_map_single(dma_chan->device->dev, src,
+ MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+ MV_XOR_TEST_SIZE, 0);
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(1);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+ if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test copy failed compare, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ kfree(src);
+ kfree(dest);
+ return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+ int i, src_idx;
+ struct page *dest;
+ struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+ dma_addr_t dest_dma;
+ struct dma_async_tx_descriptor *tx;
+ struct dma_chan *dma_chan;
+ dma_cookie_t cookie;
+ u8 cmp_byte = 0;
+ u32 cmp_word;
+ int err = 0;
+ struct mv_xor_chan *mv_chan;
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+ if (!xor_srcs[src_idx])
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+ }
+
+ dest = alloc_page(GFP_KERNEL);
+ if (!dest)
+ while (src_idx--) {
+ __free_page(xor_srcs[src_idx]);
+ return -ENOMEM;
+ }
+
+ /* Fill in src buffers */
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+ u8 *ptr = page_address(xor_srcs[src_idx]);
+ for (i = 0; i < PAGE_SIZE; i++)
+ ptr[i] = (1 << src_idx);
+ }
+
+ for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+ cmp_byte ^= (u8) (1 << src_idx);
+
+ cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+ (cmp_byte << 8) | cmp_byte;
+
+ memset(page_address(dest), 0, PAGE_SIZE);
+
+ dma_chan = container_of(device->common.channels.next,
+ struct dma_chan,
+ device_node);
+ if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
+ err = -ENODEV;
+ goto out;
+ }
+
+ /* test xor */
+ dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+ DMA_FROM_DEVICE);
+
+ for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+ dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+ 0, PAGE_SIZE, DMA_TO_DEVICE);
+
+ tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+ MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+ cookie = mv_xor_tx_submit(tx);
+ mv_xor_issue_pending(dma_chan);
+ async_tx_ack(tx);
+ msleep(8);
+
+ if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+ DMA_SUCCESS) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor timed out, disabling\n");
+ err = -ENODEV;
+ goto free_resources;
+ }
+
+ mv_chan = to_mv_xor_chan(dma_chan);
+ dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+ PAGE_SIZE, DMA_FROM_DEVICE);
+ for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+ u32 *ptr = page_address(dest);
+ if (ptr[i] != cmp_word) {
+ dev_printk(KERN_ERR, dma_chan->device->dev,
+ "Self-test xor failed compare, disabling."
+ " index %d, data %x, expected %x\n", i,
+ ptr[i], cmp_word);
+ err = -ENODEV;
+ goto free_resources;
+ }
+ }
+
+free_resources:
+ mv_xor_free_chan_resources(dma_chan);
+out:
+ src_idx = MV_XOR_NUM_SRC_TEST;
+ while (src_idx--)
+ __free_page(xor_srcs[src_idx]);
+ __free_page(dest);
+ return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+ struct mv_xor_device *device = platform_get_drvdata(dev);
+ struct dma_chan *chan, *_chan;
+ struct mv_xor_chan *mv_chan;
+ struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+ dma_async_device_unregister(&device->common);
+
+ dma_free_coherent(&dev->dev, plat_data->pool_size,
+ device->dma_desc_pool_virt, device->dma_desc_pool);
+
+ list_for_each_entry_safe(chan, _chan, &device->common.channels,
+ device_node) {
+ mv_chan = to_mv_xor_chan(chan);
+ list_del(&chan->device_node);
+ }
+
+ return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+ int ret = 0;
+ int irq;
+ struct mv_xor_device *adev;
+ struct mv_xor_chan *mv_chan;
+ struct dma_device *dma_dev;
+ struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+ adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+ if (!adev)
+ return -ENOMEM;
+
+ dma_dev = &adev->common;
+
+ /* allocate coherent memory for hardware descriptors
+ * note: writecombine gives slightly better performance, but
+ * requires that we explicitly flush the writes
+ */
+ adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+ plat_data->pool_size,
+ &adev->dma_desc_pool,
+ GFP_KERNEL);
+ if (!adev->dma_desc_pool_virt)
+ return -ENOMEM;
+
+ adev->id = plat_data->hw_id;
+
+ /* discover transaction capabilites from the platform data */
+ dma_dev->cap_mask = plat_data->cap_mask;
+ adev->pdev = pdev;
+ platform_set_drvdata(pdev, adev);
+
+ adev->shared = platform_get_drvdata(plat_data->shared);
+
+ INIT_LIST_HEAD(&dma_dev->channels);
+
+ /* set base routines */
+ dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+ dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+ dma_dev->device_is_tx_complete = mv_xor_is_complete;
+ dma_dev->device_issue_pending = mv_xor_issue_pending;
+ dma_dev->dev = &pdev->dev;
+
+ /* set prep routines based on capability */
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+ if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+ dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ dma_dev->max_xor = 8; ;
+ dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+ }
+
+ mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+ if (!mv_chan) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ mv_chan->device = adev;
+ mv_chan->idx = plat_data->hw_id;
+ mv_chan->mmr_base = adev->shared->xor_base;
+
+ if (!mv_chan->mmr_base) {
+ ret = -ENOMEM;
+ goto err_free_dma;
+ }
+ tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+ mv_chan);
+
+ /* clear errors before enabling interrupts */
+ mv_xor_device_clear_err_status(mv_chan);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0) {
+ ret = irq;
+ goto err_free_dma;
+ }
+ ret = devm_request_irq(&pdev->dev, irq,
+ mv_xor_interrupt_handler,
+ 0, dev_name(&pdev->dev), mv_chan);
+ if (ret)
+ goto err_free_dma;
+
+ mv_chan_unmask_interrupts(mv_chan);
+
+ mv_set_mode(mv_chan, DMA_MEMCPY);
+
+ spin_lock_init(&mv_chan->lock);
+ INIT_LIST_HEAD(&mv_chan->chain);
+ INIT_LIST_HEAD(&mv_chan->completed_slots);
+ INIT_LIST_HEAD(&mv_chan->all_slots);
+ INIT_RCU_HEAD(&mv_chan->common.rcu);
+ mv_chan->common.device = dma_dev;
+
+ list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+ if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+ ret = mv_xor_memcpy_self_test(adev);
+ dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+ ret = mv_xor_xor_self_test(adev);
+ dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+ if (ret)
+ goto err_free_dma;
+ }
+
+ dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+ "( %s%s%s%s)\n",
+ dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+ dma_has_cap(DMA_MEMSET, dma_dev->cap_mask) ? "fill " : "",
+ dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+ dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+ dma_async_device_register(dma_dev);
+ goto out;
+
+ err_free_dma:
+ dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+ adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+ return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+ struct mbus_dram_target_info *dram)
+{
+ void __iomem *base = msp->xor_base;
+ u32 win_enable = 0;
+ int i;
+
+ for (i = 0; i < 8; i++) {
+ writel(0, base + WINDOW_BASE(i));
+ writel(0, base + WINDOW_SIZE(i));
+ if (i < 4)
+ writel(0, base + WINDOW_REMAP_HIGH(i));
+ }
+
+ for (i = 0; i < dram->num_cs; i++) {
+ struct mbus_dram_window *cs = dram->cs + i;
+
+ writel((cs->base & 0xffff0000) |
+ (cs->mbus_attr << 8) |
+ dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+ writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+ win_enable |= (1 << i);
+ win_enable |= 3 << (16 + (2 * i));
+ }
+
+ writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+ writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+ .probe = mv_xor_probe,
+ .remove = mv_xor_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_NAME,
+ },
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+ struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+ struct mv_xor_shared_private *msp;
+ struct resource *res;
+
+ dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+ msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+ if (!msp)
+ return -ENOMEM;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_base)
+ return -EBUSY;
+
+ res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+ if (!res)
+ return -ENODEV;
+
+ msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+ res->end - res->start + 1);
+ if (!msp->xor_high_base)
+ return -EBUSY;
+
+ platform_set_drvdata(pdev, msp);
+
+ /*
+ * (Re-)program MBUS remapping windows if we are asked to.
+ */
+ if (msd != NULL && msd->dram != NULL)
+ mv_xor_conf_mbus_windows(msp, msd->dram);
+
+ return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+ return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+ .probe = mv_xor_shared_probe,
+ .remove = mv_xor_shared_remove,
+ .driver = {
+ .owner = THIS_MODULE,
+ .name = MV_XOR_SHARED_NAME,
+ },
+};
+
+
+static int __init mv_xor_init(void)
+{
+ int rc;
+
+ rc = platform_driver_register(&mv_xor_shared_driver);
+ if (!rc) {
+ rc = platform_driver_register(&mv_xor_driver);
+ if (rc)
+ platform_driver_unregister(&mv_xor_shared_driver);
+ }
+ return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+ platform_driver_unregister(&mv_xor_driver);
+ platform_driver_unregister(&mv_xor_shared_driver);
+ return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 0000000..06cafe1
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE 64
+#define MV_XOR_THRESHOLD 1
+
+#define XOR_OPERATION_MODE_XOR 0
+#define XOR_OPERATION_MODE_MEMCPY 2
+#define XOR_OPERATION_MODE_MEMSET 4
+
+#define XOR_CURR_DESC(chan) (chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan) (chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan) (chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan) (chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan) (chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan) (chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan) (chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan) (chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan) (chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan) (chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan) (chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan) (chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan) (chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE 0x3F5
+
+#define WINDOW_BASE(w) (0x250 + ((w) << 2))
+#define WINDOW_SIZE(w) (0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w) (0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan) (0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+ void __iomem *xor_base;
+ void __iomem *xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+ struct platform_device *pdev;
+ int id;
+ dma_addr_t dma_desc_pool;
+ void *dma_desc_pool_virt;
+ struct dma_device common;
+ struct mv_xor_shared_private *shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+ int pending;
+ dma_cookie_t completed_cookie;
+ spinlock_t lock; /* protects the descriptor slot pool */
+ void __iomem *mmr_base;
+ unsigned int idx;
+ enum dma_transaction_type current_type;
+ struct list_head chain;
+ struct list_head completed_slots;
+ struct mv_xor_device *device;
+ struct dma_chan common;
+ struct mv_xor_desc_slot *last_used;
+ struct list_head all_slots;
+ int slots_allocated;
+ struct tasklet_struct irq_tasklet;
+#ifdef USE_TIMER
+ unsigned long cleanup_time;
+ u32 current_on_last_cleanup;
+ dma_cookie_t is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ * for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+ struct list_head slot_node;
+ struct list_head chain_node;
+ struct list_head completed_node;
+ enum dma_transaction_type type;
+ void *hw_desc;
+ struct mv_xor_desc_slot *group_head;
+ u16 slot_cnt;
+ u16 slots_per_op;
+ u16 idx;
+ u16 unmap_src_cnt;
+ u32 value;
+ size_t unmap_len;
+ struct dma_async_tx_descriptor async_tx;
+ union {
+ u32 *xor_check_result;
+ u32 *crc32_result;
+ };
+#ifdef USE_TIMER
+ unsigned long arrival_time;
+ struct timer_list timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes */
+struct mv_xor_desc {
+ u32 status; /* descriptor execution status */
+ u32 crc32_result; /* result of CRC-32 calculation */
+ u32 desc_command; /* type of operation to be carried out */
+ u32 phy_next_desc; /* next descriptor address pointer */
+ u32 byte_count; /* size of src/dst blocks in bytes */
+ u32 phy_dest_addr; /* destination block address */
+ u32 phy_src_addr[8]; /* source block addresses */
+ u32 reserved0;
+ u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc) \
+ container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx) \
+ ((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT (128)
+#define XOR_MAX_BYTE_COUNT ((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT XOR_MAX_BYTE_COUNT
+
+
+#endif
OpenPOWER on IntegriCloud