18 files changed, 10114 insertions, 0 deletions
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
new file mode 100644
index 0000000..904e575
--- /dev/null
+++ b/drivers/dma/Kconfig
@@ -0,0 +1,91 @@
+#
+# DMA engine configuration
+#
+
+menuconfig DMADEVICES
+	bool "DMA Engine support"
+	depends on !HIGHMEM64G && HAS_DMA
+	help
+	  DMA engines can do asynchronous data transfers without
+	  involving the host CPU.  Currently, this framework can be
+	  used to offload memory copies in the network stack and
+	  RAID operations in the MD driver.  This menu only presents
+	  DMA Device drivers supported by the configured arch, it may
+	  be empty in some cases.
+
+if DMADEVICES
+
+comment "DMA Devices"
+
+config INTEL_IOATDMA
+	tristate "Intel I/OAT DMA support"
+	depends on PCI && X86
+	select DMA_ENGINE
+	select DCA
+	help
+	  Enable support for the Intel(R) I/OAT DMA engine present
+	  in recent Intel Xeon chipsets.
+
+	  Say Y here if you have such a chipset.
+
+	  If unsure, say N.
+
+config INTEL_IOP_ADMA
+	tristate "Intel IOP ADMA support"
+	depends on ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX
+	select ASYNC_CORE
+	select DMA_ENGINE
+	help
+	  Enable support for the Intel(R) IOP Series RAID engines.
+
+config DW_DMAC
+	tristate "Synopsys DesignWare AHB DMA support"
+	depends on AVR32
+	select DMA_ENGINE
+	default y if CPU_AT32AP7000
+	help
+	  Support the Synopsys DesignWare AHB DMA controller.  This
+	  can be integrated in chips such as the Atmel AT32ap7000.
+
+config FSL_DMA
+	tristate "Freescale Elo and Elo Plus DMA support"
+	depends on FSL_SOC
+	select DMA_ENGINE
+	---help---
+	  Enable support for the Freescale Elo and Elo Plus DMA controllers.
+	  The Elo is the DMA controller on some 82xx and 83xx parts, and the
+	  Elo Plus is the DMA controller on 85xx and 86xx parts.
+
+config MV_XOR
+	bool "Marvell XOR engine support"
+	depends on PLAT_ORION
+	select ASYNC_CORE
+	select DMA_ENGINE
+	---help---
+	  Enable support for the Marvell XOR engine.
+
+config DMA_ENGINE
+	bool
+
+comment "DMA Clients"
+	depends on DMA_ENGINE
+
+config NET_DMA
+	bool "Network: TCP receive copy offload"
+	depends on DMA_ENGINE && NET
+	default (INTEL_IOATDMA || FSL_DMA)
+	help
+	  This enables the use of DMA engines in the network stack to
+	  offload receive copy-to-user operations, freeing CPU cycles.
+
+	  Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
+	  say N.
+
+config DMATEST
+	tristate "DMA Test client"
+	depends on DMA_ENGINE
+	help
+	  Simple DMA test client. Say N unless you're debugging a
+	  DMA Device driver.
+
+endif
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
new file mode 100644
index 0000000..14f5952
--- /dev/null
+++ b/drivers/dma/Makefile
@@ -0,0 +1,9 @@
+obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
+obj-$(CONFIG_NET_DMA) += iovlock.o
+obj-$(CONFIG_DMATEST) += dmatest.o
+obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+ioatdma-objs := ioat.o ioat_dma.o ioat_dca.o
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
+obj-$(CONFIG_FSL_DMA) += fsldma.o
+obj-$(CONFIG_MV_XOR) += mv_xor.o
+obj-$(CONFIG_DW_DMAC) += dw_dmac.o
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
new file mode 100644
index 0000000..6579965
--- /dev/null
+++ b/drivers/dma/dmaengine.c
@@ -0,0 +1,635 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code implements the DMA subsystem. It provides a HW-neutral interface
+ * for other kernel code to use asynchronous memory copy capabilities,
+ * if present, and allows different HW DMA drivers to register as providing
+ * this capability.
+ *
+ * Due to the fact we are accelerating what is already a relatively fast
+ * operation, the code goes to great lengths to avoid additional overhead,
+ * such as locking.
+ *
+ * LOCKING:
+ *
+ * The subsystem keeps two global lists, dma_device_list and dma_client_list.
+ * Both of these are protected by a mutex, dma_list_mutex.
+ *
+ * Each device has a channels list, which runs unlocked but is never modified
+ * once the device is registered, it's just setup by the driver.
+ *
+ * Each client is responsible for keeping track of the channels it uses.  See
+ * the definition of dma_event_callback in dmaengine.h.
+ *
+ * Each device has a kref, which is initialized to 1 when the device is
+ * registered. A kref_get is done for each device registered.  When the
+ * device is released, the corresponding kref_put is done in the release
+ * method. Every time one of the device's channels is allocated to a client,
+ * a kref_get occurs.  When the channel is freed, the corresponding kref_put
+ * happens. The device's release function does a completion, so
+ * unregister_device does a remove event, device_unregister, a kref_put
+ * for the first reference, then waits on the completion for all other
+ * references to finish.
+ *
+ * Each channel has an open-coded implementation of Rusty Russell's "bigref,"
+ * with a kref and a per_cpu local_t.  A dma_chan_get is called when a client
+ * signals that it wants to use a channel, and dma_chan_put is called when
+ * a channel is removed or a client using it is unregistered.  A client can
+ * take extra references per outstanding transaction, as is the case with
+ * the NET DMA client.  The release function does a kref_put on the device.
+ *	-ChrisL, DanW
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/device.h>
+#include <linux/dmaengine.h>
+#include <linux/hardirq.h>
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/mutex.h>
+#include <linux/jiffies.h>
+
+static DEFINE_MUTEX(dma_list_mutex);
+static LIST_HEAD(dma_device_list);
+static LIST_HEAD(dma_client_list);
+
+/* --- sysfs implementation --- */
+
+static ssize_t show_memcpy_count(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct dma_chan *chan = to_dma_chan(dev);
+	unsigned long count = 0;
+	int i;
+
+	for_each_possible_cpu(i)
+		count += per_cpu_ptr(chan->local, i)->memcpy_count;
+
+	return sprintf(buf, "%lu\n", count);
+}
+
+static ssize_t show_bytes_transferred(struct device *dev, struct device_attribute *attr,
+				      char *buf)
+{
+	struct dma_chan *chan = to_dma_chan(dev);
+	unsigned long count = 0;
+	int i;
+
+	for_each_possible_cpu(i)
+		count += per_cpu_ptr(chan->local, i)->bytes_transferred;
+
+	return sprintf(buf, "%lu\n", count);
+}
+
+static ssize_t show_in_use(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct dma_chan *chan = to_dma_chan(dev);
+	int in_use = 0;
+
+	if (unlikely(chan->slow_ref) &&
+		atomic_read(&chan->refcount.refcount) > 1)
+		in_use = 1;
+	else {
+		if (local_read(&(per_cpu_ptr(chan->local,
+			get_cpu())->refcount)) > 0)
+			in_use = 1;
+		put_cpu();
+	}
+
+	return sprintf(buf, "%d\n", in_use);
+}
+
+static struct device_attribute dma_attrs[] = {
+	__ATTR(memcpy_count, S_IRUGO, show_memcpy_count, NULL),
+	__ATTR(bytes_transferred, S_IRUGO, show_bytes_transferred, NULL),
+	__ATTR(in_use, S_IRUGO, show_in_use, NULL),
+	__ATTR_NULL
+};
+
+static void dma_async_device_cleanup(struct kref *kref);
+
+static void dma_dev_release(struct device *dev)
+{
+	struct dma_chan *chan = to_dma_chan(dev);
+	kref_put(&chan->device->refcount, dma_async_device_cleanup);
+}
+
+static struct class dma_devclass = {
+	.name		= "dma",
+	.dev_attrs	= dma_attrs,
+	.dev_release	= dma_dev_release,
+};
+
+/* --- client and device registration --- */
+
+#define dma_chan_satisfies_mask(chan, mask) \
+	__dma_chan_satisfies_mask((chan), &(mask))
+static int
+__dma_chan_satisfies_mask(struct dma_chan *chan, dma_cap_mask_t *want)
+{
+	dma_cap_mask_t has;
+
+	bitmap_and(has.bits, want->bits, chan->device->cap_mask.bits,
+		DMA_TX_TYPE_END);
+	return bitmap_equal(want->bits, has.bits, DMA_TX_TYPE_END);
+}
+
+/**
+ * dma_client_chan_alloc - try to allocate channels to a client
+ * @client: &dma_client
+ *
+ * Called with dma_list_mutex held.
+ */
+static void dma_client_chan_alloc(struct dma_client *client)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+	int desc;	/* allocated descriptor count */
+	enum dma_state_client ack;
+
+	/* Find a channel */
+	list_for_each_entry(device, &dma_device_list, global_node) {
+		/* Does the client require a specific DMA controller? */
+		if (client->slave && client->slave->dma_dev
+				&& client->slave->dma_dev != device->dev)
+			continue;
+
+		list_for_each_entry(chan, &device->channels, device_node) {
+			if (!dma_chan_satisfies_mask(chan, client->cap_mask))
+				continue;
+
+			desc = chan->device->device_alloc_chan_resources(
+					chan, client);
+			if (desc >= 0) {
+				ack = client->event_callback(client,
+						chan,
+						DMA_RESOURCE_AVAILABLE);
+
+				/* we are done once this client rejects
+				 * an available resource
+				 */
+				if (ack == DMA_ACK) {
+					dma_chan_get(chan);
+					chan->client_count++;
+				} else if (ack == DMA_NAK)
+					return;
+			}
+		}
+	}
+}
+
+enum dma_status dma_sync_wait(struct dma_chan *chan, dma_cookie_t cookie)
+{
+	enum dma_status status;
+	unsigned long dma_sync_wait_timeout = jiffies + msecs_to_jiffies(5000);
+
+	dma_async_issue_pending(chan);
+	do {
+		status = dma_async_is_tx_complete(chan, cookie, NULL, NULL);
+		if (time_after_eq(jiffies, dma_sync_wait_timeout)) {
+			printk(KERN_ERR "dma_sync_wait_timeout!\n");
+			return DMA_ERROR;
+		}
+	} while (status == DMA_IN_PROGRESS);
+
+	return status;
+}
+EXPORT_SYMBOL(dma_sync_wait);
+
+/**
+ * dma_chan_cleanup - release a DMA channel's resources
+ * @kref: kernel reference structure that contains the DMA channel device
+ */
+void dma_chan_cleanup(struct kref *kref)
+{
+	struct dma_chan *chan = container_of(kref, struct dma_chan, refcount);
+	chan->device->device_free_chan_resources(chan);
+	kref_put(&chan->device->refcount, dma_async_device_cleanup);
+}
+EXPORT_SYMBOL(dma_chan_cleanup);
+
+static void dma_chan_free_rcu(struct rcu_head *rcu)
+{
+	struct dma_chan *chan = container_of(rcu, struct dma_chan, rcu);
+	int bias = 0x7FFFFFFF;
+	int i;
+	for_each_possible_cpu(i)
+		bias -= local_read(&per_cpu_ptr(chan->local, i)->refcount);
+	atomic_sub(bias, &chan->refcount.refcount);
+	kref_put(&chan->refcount, dma_chan_cleanup);
+}
+
+static void dma_chan_release(struct dma_chan *chan)
+{
+	atomic_add(0x7FFFFFFF, &chan->refcount.refcount);
+	chan->slow_ref = 1;
+	call_rcu(&chan->rcu, dma_chan_free_rcu);
+}
+
+/**
+ * dma_chans_notify_available - broadcast available channels to the clients
+ */
+static void dma_clients_notify_available(void)
+{
+	struct dma_client *client;
+
+	mutex_lock(&dma_list_mutex);
+
+	list_for_each_entry(client, &dma_client_list, global_node)
+		dma_client_chan_alloc(client);
+
+	mutex_unlock(&dma_list_mutex);
+}
+
+/**
+ * dma_chans_notify_available - tell the clients that a channel is going away
+ * @chan: channel on its way out
+ */
+static void dma_clients_notify_removed(struct dma_chan *chan)
+{
+	struct dma_client *client;
+	enum dma_state_client ack;
+
+	mutex_lock(&dma_list_mutex);
+
+	list_for_each_entry(client, &dma_client_list, global_node) {
+		ack = client->event_callback(client, chan,
+				DMA_RESOURCE_REMOVED);
+
+		/* client was holding resources for this channel so
+		 * free it
+		 */
+		if (ack == DMA_ACK) {
+			dma_chan_put(chan);
+			chan->client_count--;
+		}
+	}
+
+	mutex_unlock(&dma_list_mutex);
+}
+
+/**
+ * dma_async_client_register - register a &dma_client
+ * @client: ptr to a client structure with valid 'event_callback' and 'cap_mask'
+ */
+void dma_async_client_register(struct dma_client *client)
+{
+	/* validate client data */
+	BUG_ON(dma_has_cap(DMA_SLAVE, client->cap_mask) &&
+		!client->slave);
+
+	mutex_lock(&dma_list_mutex);
+	list_add_tail(&client->global_node, &dma_client_list);
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_client_register);
+
+/**
+ * dma_async_client_unregister - unregister a client and free the &dma_client
+ * @client: &dma_client to free
+ *
+ * Force frees any allocated DMA channels, frees the &dma_client memory
+ */
+void dma_async_client_unregister(struct dma_client *client)
+{
+	struct dma_device *device;
+	struct dma_chan *chan;
+	enum dma_state_client ack;
+
+	if (!client)
+		return;
+
+	mutex_lock(&dma_list_mutex);
+	/* free all channels the client is holding */
+	list_for_each_entry(device, &dma_device_list, global_node)
+		list_for_each_entry(chan, &device->channels, device_node) {
+			ack = client->event_callback(client, chan,
+				DMA_RESOURCE_REMOVED);
+
+			if (ack == DMA_ACK) {
+				dma_chan_put(chan);
+				chan->client_count--;
+			}
+		}
+
+	list_del(&client->global_node);
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_client_unregister);
+
+/**
+ * dma_async_client_chan_request - send all available channels to the
+ * client that satisfy the capability mask
+ * @client - requester
+ */
+void dma_async_client_chan_request(struct dma_client *client)
+{
+	mutex_lock(&dma_list_mutex);
+	dma_client_chan_alloc(client);
+	mutex_unlock(&dma_list_mutex);
+}
+EXPORT_SYMBOL(dma_async_client_chan_request);
+
+/**
+ * dma_async_device_register - registers DMA devices found
+ * @device: &dma_device
+ */
+int dma_async_device_register(struct dma_device *device)
+{
+	static int id;
+	int chancnt = 0, rc;
+	struct dma_chan* chan;
+
+	if (!device)
+		return -ENODEV;
+
+	/* validate device routines */
+	BUG_ON(dma_has_cap(DMA_MEMCPY, device->cap_mask) &&
+		!device->device_prep_dma_memcpy);
+	BUG_ON(dma_has_cap(DMA_XOR, device->cap_mask) &&
+		!device->device_prep_dma_xor);
+	BUG_ON(dma_has_cap(DMA_ZERO_SUM, device->cap_mask) &&
+		!device->device_prep_dma_zero_sum);
+	BUG_ON(dma_has_cap(DMA_MEMSET, device->cap_mask) &&
+		!device->device_prep_dma_memset);
+	BUG_ON(dma_has_cap(DMA_INTERRUPT, device->cap_mask) &&
+		!device->device_prep_dma_interrupt);
+	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+		!device->device_prep_slave_sg);
+	BUG_ON(dma_has_cap(DMA_SLAVE, device->cap_mask) &&
+		!device->device_terminate_all);
+
+	BUG_ON(!device->device_alloc_chan_resources);
+	BUG_ON(!device->device_free_chan_resources);
+	BUG_ON(!device->device_is_tx_complete);
+	BUG_ON(!device->device_issue_pending);
+	BUG_ON(!device->dev);
+
+	init_completion(&device->done);
+	kref_init(&device->refcount);
+
+	mutex_lock(&dma_list_mutex);
+	device->dev_id = id++;
+	mutex_unlock(&dma_list_mutex);
+
+	/* represent channels in sysfs. Probably want devs too */
+	list_for_each_entry(chan, &device->channels, device_node) {
+		chan->local = alloc_percpu(typeof(*chan->local));
+		if (chan->local == NULL)
+			continue;
+
+		chan->chan_id = chancnt++;
+		chan->dev.class = &dma_devclass;
+		chan->dev.parent = device->dev;
+		dev_set_name(&chan->dev, "dma%dchan%d",
+			     device->dev_id, chan->chan_id);
+
+		rc = device_register(&chan->dev);
+		if (rc) {
+			chancnt--;
+			free_percpu(chan->local);
+			chan->local = NULL;
+			goto err_out;
+		}
+
+		/* One for the channel, one of the class device */
+		kref_get(&device->refcount);
+		kref_get(&device->refcount);
+		kref_init(&chan->refcount);
+		chan->client_count = 0;
+		chan->slow_ref = 0;
+		INIT_RCU_HEAD(&chan->rcu);
+	}
+
+	mutex_lock(&dma_list_mutex);
+	list_add_tail(&device->global_node, &dma_device_list);
+	mutex_unlock(&dma_list_mutex);
+
+	dma_clients_notify_available();
+
+	return 0;
+
+err_out:
+	list_for_each_entry(chan, &device->channels, device_node) {
+		if (chan->local == NULL)
+			continue;
+		kref_put(&device->refcount, dma_async_device_cleanup);
+		device_unregister(&chan->dev);
+		chancnt--;
+		free_percpu(chan->local);
+	}
+	return rc;
+}
+EXPORT_SYMBOL(dma_async_device_register);
+
+/**
+ * dma_async_device_cleanup - function called when all references are released
+ * @kref: kernel reference object
+ */
+static void dma_async_device_cleanup(struct kref *kref)
+{
+	struct dma_device *device;
+
+	device = container_of(kref, struct dma_device, refcount);
+	complete(&device->done);
+}
+
+/**
+ * dma_async_device_unregister - unregisters DMA devices
+ * @device: &dma_device
+ */
+void dma_async_device_unregister(struct dma_device *device)
+{
+	struct dma_chan *chan;
+
+	mutex_lock(&dma_list_mutex);
+	list_del(&device->global_node);
+	mutex_unlock(&dma_list_mutex);
+
+	list_for_each_entry(chan, &device->channels, device_node) {
+		dma_clients_notify_removed(chan);
+		device_unregister(&chan->dev);
+		dma_chan_release(chan);
+	}
+
+	kref_put(&device->refcount, dma_async_device_cleanup);
+	wait_for_completion(&device->done);
+}
+EXPORT_SYMBOL(dma_async_device_unregister);
+
+/**
+ * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
+ * @chan: DMA channel to offload copy to
+ * @dest: destination address (virtual)
+ * @src: source address (virtual)
+ * @len: length
+ *
+ * Both @dest and @src must be mappable to a bus address according to the
+ * DMA mapping API rules for streaming mappings.
+ * Both @dest and @src must stay memory resident (kernel memory or locked
+ * user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
+			void *src, size_t len)
+{
+	struct dma_device *dev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int cpu;
+
+	dma_src = dma_map_single(dev->dev, src, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_single(dev->dev, dest, len, DMA_FROM_DEVICE);
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
+					 DMA_CTRL_ACK);
+
+	if (!tx) {
+		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_single(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		return -ENOMEM;
+	}
+
+	tx->callback = NULL;
+	cookie = tx->tx_submit(tx);
+
+	cpu = get_cpu();
+	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+	put_cpu();
+
+	return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
+
+/**
+ * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
+ * @chan: DMA channel to offload copy to
+ * @page: destination page
+ * @offset: offset in page to copy to
+ * @kdata: source address (virtual)
+ * @len: length
+ *
+ * Both @page/@offset and @kdata must be mappable to a bus address according
+ * to the DMA mapping API rules for streaming mappings.
+ * Both @page/@offset and @kdata must stay memory resident (kernel memory or
+ * locked user space pages)
+ */
+dma_cookie_t
+dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
+			unsigned int offset, void *kdata, size_t len)
+{
+	struct dma_device *dev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int cpu;
+
+	dma_src = dma_map_single(dev->dev, kdata, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_page(dev->dev, page, offset, len, DMA_FROM_DEVICE);
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
+					 DMA_CTRL_ACK);
+
+	if (!tx) {
+		dma_unmap_single(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		return -ENOMEM;
+	}
+
+	tx->callback = NULL;
+	cookie = tx->tx_submit(tx);
+
+	cpu = get_cpu();
+	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+	put_cpu();
+
+	return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
+
+/**
+ * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
+ * @chan: DMA channel to offload copy to
+ * @dest_pg: destination page
+ * @dest_off: offset in page to copy to
+ * @src_pg: source page
+ * @src_off: offset in page to copy from
+ * @len: length
+ *
+ * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
+ * address according to the DMA mapping API rules for streaming mappings.
+ * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
+ * (kernel memory or locked user space pages).
+ */
+dma_cookie_t
+dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
+	unsigned int dest_off, struct page *src_pg, unsigned int src_off,
+	size_t len)
+{
+	struct dma_device *dev = chan->device;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int cpu;
+
+	dma_src = dma_map_page(dev->dev, src_pg, src_off, len, DMA_TO_DEVICE);
+	dma_dest = dma_map_page(dev->dev, dest_pg, dest_off, len,
+				DMA_FROM_DEVICE);
+	tx = dev->device_prep_dma_memcpy(chan, dma_dest, dma_src, len,
+					 DMA_CTRL_ACK);
+
+	if (!tx) {
+		dma_unmap_page(dev->dev, dma_src, len, DMA_TO_DEVICE);
+		dma_unmap_page(dev->dev, dma_dest, len, DMA_FROM_DEVICE);
+		return -ENOMEM;
+	}
+
+	tx->callback = NULL;
+	cookie = tx->tx_submit(tx);
+
+	cpu = get_cpu();
+	per_cpu_ptr(chan->local, cpu)->bytes_transferred += len;
+	per_cpu_ptr(chan->local, cpu)->memcpy_count++;
+	put_cpu();
+
+	return cookie;
+}
+EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
+
+void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
+	struct dma_chan *chan)
+{
+	tx->chan = chan;
+	spin_lock_init(&tx->lock);
+}
+EXPORT_SYMBOL(dma_async_tx_descriptor_init);
+
+static int __init dma_bus_init(void)
+{
+	mutex_init(&dma_list_mutex);
+	return class_register(&dma_devclass);
+}
+subsys_initcall(dma_bus_init);
+
diff --git a/drivers/dma/dmatest.c b/drivers/dma/dmatest.c
new file mode 100644
index 0000000..ed9636b
--- /dev/null
+++ b/drivers/dma/dmatest.c
@@ -0,0 +1,449 @@
+/*
+ * DMA Engine test module
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/init.h>
+#include <linux/kthread.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/wait.h>
+
+static unsigned int test_buf_size = 16384;
+module_param(test_buf_size, uint, S_IRUGO);
+MODULE_PARM_DESC(test_buf_size, "Size of the memcpy test buffer");
+
+static char test_channel[20];
+module_param_string(channel, test_channel, sizeof(test_channel), S_IRUGO);
+MODULE_PARM_DESC(channel, "Bus ID of the channel to test (default: any)");
+
+static char test_device[20];
+module_param_string(device, test_device, sizeof(test_device), S_IRUGO);
+MODULE_PARM_DESC(device, "Bus ID of the DMA Engine to test (default: any)");
+
+static unsigned int threads_per_chan = 1;
+module_param(threads_per_chan, uint, S_IRUGO);
+MODULE_PARM_DESC(threads_per_chan,
+		"Number of threads to start per channel (default: 1)");
+
+static unsigned int max_channels;
+module_param(max_channels, uint, S_IRUGO);
+MODULE_PARM_DESC(nr_channels,
+		"Maximum number of channels to use (default: all)");
+
+/*
+ * Initialization patterns. All bytes in the source buffer has bit 7
+ * set, all bytes in the destination buffer has bit 7 cleared.
+ *
+ * Bit 6 is set for all bytes which are to be copied by the DMA
+ * engine. Bit 5 is set for all bytes which are to be overwritten by
+ * the DMA engine.
+ *
+ * The remaining bits are the inverse of a counter which increments by
+ * one for each byte address.
+ */
+#define PATTERN_SRC		0x80
+#define PATTERN_DST		0x00
+#define PATTERN_COPY		0x40
+#define PATTERN_OVERWRITE	0x20
+#define PATTERN_COUNT_MASK	0x1f
+
+struct dmatest_thread {
+	struct list_head	node;
+	struct task_struct	*task;
+	struct dma_chan		*chan;
+	u8			*srcbuf;
+	u8			*dstbuf;
+};
+
+struct dmatest_chan {
+	struct list_head	node;
+	struct dma_chan		*chan;
+	struct list_head	threads;
+};
+
+/*
+ * These are protected by dma_list_mutex since they're only used by
+ * the DMA client event callback
+ */
+static LIST_HEAD(dmatest_channels);
+static unsigned int nr_channels;
+
+static bool dmatest_match_channel(struct dma_chan *chan)
+{
+	if (test_channel[0] == '\0')
+		return true;
+	return strcmp(dev_name(&chan->dev), test_channel) == 0;
+}
+
+static bool dmatest_match_device(struct dma_device *device)
+{
+	if (test_device[0] == '\0')
+		return true;
+	return strcmp(dev_name(device->dev), test_device) == 0;
+}
+
+static unsigned long dmatest_random(void)
+{
+	unsigned long buf;
+
+	get_random_bytes(&buf, sizeof(buf));
+	return buf;
+}
+
+static void dmatest_init_srcbuf(u8 *buf, unsigned int start, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < start; i++)
+		buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+	for ( ; i < start + len; i++)
+		buf[i] = PATTERN_SRC | PATTERN_COPY
+			| (~i & PATTERN_COUNT_MASK);;
+	for ( ; i < test_buf_size; i++)
+		buf[i] = PATTERN_SRC | (~i & PATTERN_COUNT_MASK);
+}
+
+static void dmatest_init_dstbuf(u8 *buf, unsigned int start, unsigned int len)
+{
+	unsigned int i;
+
+	for (i = 0; i < start; i++)
+		buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+	for ( ; i < start + len; i++)
+		buf[i] = PATTERN_DST | PATTERN_OVERWRITE
+			| (~i & PATTERN_COUNT_MASK);
+	for ( ; i < test_buf_size; i++)
+		buf[i] = PATTERN_DST | (~i & PATTERN_COUNT_MASK);
+}
+
+static void dmatest_mismatch(u8 actual, u8 pattern, unsigned int index,
+		unsigned int counter, bool is_srcbuf)
+{
+	u8		diff = actual ^ pattern;
+	u8		expected = pattern | (~counter & PATTERN_COUNT_MASK);
+	const char	*thread_name = current->comm;
+
+	if (is_srcbuf)
+		pr_warning("%s: srcbuf[0x%x] overwritten!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+	else if ((pattern & PATTERN_COPY)
+			&& (diff & (PATTERN_COPY | PATTERN_OVERWRITE)))
+		pr_warning("%s: dstbuf[0x%x] not copied!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+	else if (diff & PATTERN_SRC)
+		pr_warning("%s: dstbuf[0x%x] was copied!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+	else
+		pr_warning("%s: dstbuf[0x%x] mismatch!"
+				" Expected %02x, got %02x\n",
+				thread_name, index, expected, actual);
+}
+
+static unsigned int dmatest_verify(u8 *buf, unsigned int start,
+		unsigned int end, unsigned int counter, u8 pattern,
+		bool is_srcbuf)
+{
+	unsigned int i;
+	unsigned int error_count = 0;
+	u8 actual;
+
+	for (i = start; i < end; i++) {
+		actual = buf[i];
+		if (actual != (pattern | (~counter & PATTERN_COUNT_MASK))) {
+			if (error_count < 32)
+				dmatest_mismatch(actual, pattern, i, counter,
+						is_srcbuf);
+			error_count++;
+		}
+		counter++;
+	}
+
+	if (error_count > 32)
+		pr_warning("%s: %u errors suppressed\n",
+			current->comm, error_count - 32);
+
+	return error_count;
+}
+
+/*
+ * This function repeatedly tests DMA transfers of various lengths and
+ * offsets until it is told to exit by kthread_stop(). There may be
+ * multiple threads running this function in parallel for a single
+ * channel, and there may be multiple channels being tested in
+ * parallel.
+ *
+ * Before each test, the source and destination buffer is initialized
+ * with a known pattern. This pattern is different depending on
+ * whether it's in an area which is supposed to be copied or
+ * overwritten, and different in the source and destination buffers.
+ * So if the DMA engine doesn't copy exactly what we tell it to copy,
+ * we'll notice.
+ */
+static int dmatest_func(void *data)
+{
+	struct dmatest_thread	*thread = data;
+	struct dma_chan		*chan;
+	const char		*thread_name;
+	unsigned int		src_off, dst_off, len;
+	unsigned int		error_count;
+	unsigned int		failed_tests = 0;
+	unsigned int		total_tests = 0;
+	dma_cookie_t		cookie;
+	enum dma_status		status;
+	int			ret;
+
+	thread_name = current->comm;
+
+	ret = -ENOMEM;
+	thread->srcbuf = kmalloc(test_buf_size, GFP_KERNEL);
+	if (!thread->srcbuf)
+		goto err_srcbuf;
+	thread->dstbuf = kmalloc(test_buf_size, GFP_KERNEL);
+	if (!thread->dstbuf)
+		goto err_dstbuf;
+
+	smp_rmb();
+	chan = thread->chan;
+	dma_chan_get(chan);
+
+	while (!kthread_should_stop()) {
+		total_tests++;
+
+		len = dmatest_random() % test_buf_size + 1;
+		src_off = dmatest_random() % (test_buf_size - len + 1);
+		dst_off = dmatest_random() % (test_buf_size - len + 1);
+
+		dmatest_init_srcbuf(thread->srcbuf, src_off, len);
+		dmatest_init_dstbuf(thread->dstbuf, dst_off, len);
+
+		cookie = dma_async_memcpy_buf_to_buf(chan,
+				thread->dstbuf + dst_off,
+				thread->srcbuf + src_off,
+				len);
+		if (dma_submit_error(cookie)) {
+			pr_warning("%s: #%u: submit error %d with src_off=0x%x "
+					"dst_off=0x%x len=0x%x\n",
+					thread_name, total_tests - 1, cookie,
+					src_off, dst_off, len);
+			msleep(100);
+			failed_tests++;
+			continue;
+		}
+		dma_async_memcpy_issue_pending(chan);
+
+		do {
+			msleep(1);
+			status = dma_async_memcpy_complete(
+					chan, cookie, NULL, NULL);
+		} while (status == DMA_IN_PROGRESS);
+
+		if (status == DMA_ERROR) {
+			pr_warning("%s: #%u: error during copy\n",
+					thread_name, total_tests - 1);
+			failed_tests++;
+			continue;
+		}
+
+		error_count = 0;
+
+		pr_debug("%s: verifying source buffer...\n", thread_name);
+		error_count += dmatest_verify(thread->srcbuf, 0, src_off,
+				0, PATTERN_SRC, true);
+		error_count += dmatest_verify(thread->srcbuf, src_off,
+				src_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, true);
+		error_count += dmatest_verify(thread->srcbuf, src_off + len,
+				test_buf_size, src_off + len,
+				PATTERN_SRC, true);
+
+		pr_debug("%s: verifying dest buffer...\n",
+				thread->task->comm);
+		error_count += dmatest_verify(thread->dstbuf, 0, dst_off,
+				0, PATTERN_DST, false);
+		error_count += dmatest_verify(thread->dstbuf, dst_off,
+				dst_off + len, src_off,
+				PATTERN_SRC | PATTERN_COPY, false);
+		error_count += dmatest_verify(thread->dstbuf, dst_off + len,
+				test_buf_size, dst_off + len,
+				PATTERN_DST, false);
+
+		if (error_count) {
+			pr_warning("%s: #%u: %u errors with "
+				"src_off=0x%x dst_off=0x%x len=0x%x\n",
+				thread_name, total_tests - 1, error_count,
+				src_off, dst_off, len);
+			failed_tests++;
+		} else {
+			pr_debug("%s: #%u: No errors with "
+				"src_off=0x%x dst_off=0x%x len=0x%x\n",
+				thread_name, total_tests - 1,
+				src_off, dst_off, len);
+		}
+	}
+
+	ret = 0;
+	dma_chan_put(chan);
+	kfree(thread->dstbuf);
+err_dstbuf:
+	kfree(thread->srcbuf);
+err_srcbuf:
+	pr_notice("%s: terminating after %u tests, %u failures (status %d)\n",
+			thread_name, total_tests, failed_tests, ret);
+	return ret;
+}
+
+static void dmatest_cleanup_channel(struct dmatest_chan *dtc)
+{
+	struct dmatest_thread	*thread;
+	struct dmatest_thread	*_thread;
+	int			ret;
+
+	list_for_each_entry_safe(thread, _thread, &dtc->threads, node) {
+		ret = kthread_stop(thread->task);
+		pr_debug("dmatest: thread %s exited with status %d\n",
+				thread->task->comm, ret);
+		list_del(&thread->node);
+		kfree(thread);
+	}
+	kfree(dtc);
+}
+
+static enum dma_state_client dmatest_add_channel(struct dma_chan *chan)
+{
+	struct dmatest_chan	*dtc;
+	struct dmatest_thread	*thread;
+	unsigned int		i;
+
+	/* Have we already been told about this channel? */
+	list_for_each_entry(dtc, &dmatest_channels, node)
+		if (dtc->chan == chan)
+			return DMA_DUP;
+
+	dtc = kmalloc(sizeof(struct dmatest_chan), GFP_KERNEL);
+	if (!dtc) {
+		pr_warning("dmatest: No memory for %s\n", dev_name(&chan->dev));
+		return DMA_NAK;
+	}
+
+	dtc->chan = chan;
+	INIT_LIST_HEAD(&dtc->threads);
+
+	for (i = 0; i < threads_per_chan; i++) {
+		thread = kzalloc(sizeof(struct dmatest_thread), GFP_KERNEL);
+		if (!thread) {
+			pr_warning("dmatest: No memory for %s-test%u\n",
+				   dev_name(&chan->dev), i);
+			break;
+		}
+		thread->chan = dtc->chan;
+		smp_wmb();
+		thread->task = kthread_run(dmatest_func, thread, "%s-test%u",
+				dev_name(&chan->dev), i);
+		if (IS_ERR(thread->task)) {
+			pr_warning("dmatest: Failed to run thread %s-test%u\n",
+					dev_name(&chan->dev), i);
+			kfree(thread);
+			break;
+		}
+
+		/* srcbuf and dstbuf are allocated by the thread itself */
+
+		list_add_tail(&thread->node, &dtc->threads);
+	}
+
+	pr_info("dmatest: Started %u threads using %s\n", i, dev_name(&chan->dev));
+
+	list_add_tail(&dtc->node, &dmatest_channels);
+	nr_channels++;
+
+	return DMA_ACK;
+}
+
+static enum dma_state_client dmatest_remove_channel(struct dma_chan *chan)
+{
+	struct dmatest_chan	*dtc, *_dtc;
+
+	list_for_each_entry_safe(dtc, _dtc, &dmatest_channels, node) {
+		if (dtc->chan == chan) {
+			list_del(&dtc->node);
+			dmatest_cleanup_channel(dtc);
+			pr_debug("dmatest: lost channel %s\n",
+					dev_name(&chan->dev));
+			return DMA_ACK;
+		}
+	}
+
+	return DMA_DUP;
+}
+
+/*
+ * Start testing threads as new channels are assigned to us, and kill
+ * them when the channels go away.
+ *
+ * When we unregister the client, all channels are removed so this
+ * will also take care of cleaning things up when the module is
+ * unloaded.
+ */
+static enum dma_state_client
+dmatest_event(struct dma_client *client, struct dma_chan *chan,
+		enum dma_state state)
+{
+	enum dma_state_client	ack = DMA_NAK;
+
+	switch (state) {
+	case DMA_RESOURCE_AVAILABLE:
+		if (!dmatest_match_channel(chan)
+				|| !dmatest_match_device(chan->device))
+			ack = DMA_DUP;
+		else if (max_channels && nr_channels >= max_channels)
+			ack = DMA_NAK;
+		else
+			ack = dmatest_add_channel(chan);
+		break;
+
+	case DMA_RESOURCE_REMOVED:
+		ack = dmatest_remove_channel(chan);
+		break;
+
+	default:
+		pr_info("dmatest: Unhandled event %u (%s)\n",
+				state, dev_name(&chan->dev));
+		break;
+	}
+
+	return ack;
+}
+
+static struct dma_client dmatest_client = {
+	.event_callback	= dmatest_event,
+};
+
+static int __init dmatest_init(void)
+{
+	dma_cap_set(DMA_MEMCPY, dmatest_client.cap_mask);
+	dma_async_client_register(&dmatest_client);
+	dma_async_client_chan_request(&dmatest_client);
+
+	return 0;
+}
+module_init(dmatest_init);
+
+static void __exit dmatest_exit(void)
+{
+	dma_async_client_unregister(&dmatest_client);
+}
+module_exit(dmatest_exit);
+
+MODULE_AUTHOR("Haavard Skinnemoen <hskinnemoen@atmel.com>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/dma/dw_dmac.c b/drivers/dma/dw_dmac.c
new file mode 100644
index 0000000..0778d99
--- /dev/null
+++ b/drivers/dma/dw_dmac.c
@@ -0,0 +1,1122 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on
+ * AVR32 systems.)
+ *
+ * Copyright (C) 2007-2008 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#include <linux/clk.h>
+#include <linux/delay.h>
+#include <linux/dmaengine.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "dw_dmac_regs.h"
+
+/*
+ * This supports the Synopsys "DesignWare AHB Central DMA Controller",
+ * (DW_ahb_dmac) which is used with various AMBA 2.0 systems (not all
+ * of which use ARM any more).  See the "Databook" from Synopsys for
+ * information beyond what licensees probably provide.
+ *
+ * The driver has currently been tested only with the Atmel AT32AP7000,
+ * which does not support descriptor writeback.
+ */
+
+/* NOTE:  DMS+SMS is system-specific. We should get this information
+ * from the platform code somehow.
+ */
+#define DWC_DEFAULT_CTLLO	(DWC_CTLL_DST_MSIZE(0)		\
+				| DWC_CTLL_SRC_MSIZE(0)		\
+				| DWC_CTLL_DMS(0)		\
+				| DWC_CTLL_SMS(1)		\
+				| DWC_CTLL_LLP_D_EN		\
+				| DWC_CTLL_LLP_S_EN)
+
+/*
+ * This is configuration-dependent and usually a funny size like 4095.
+ * Let's round it down to the nearest power of two.
+ *
+ * Note that this is a transfer count, i.e. if we transfer 32-bit
+ * words, we can do 8192 bytes per descriptor.
+ *
+ * This parameter is also system-specific.
+ */
+#define DWC_MAX_COUNT	2048U
+
+/*
+ * Number of descriptors to allocate for each channel. This should be
+ * made configurable somehow; preferably, the clients (at least the
+ * ones using slave transfers) should be able to give us a hint.
+ */
+#define NR_DESCS_PER_CHANNEL	64
+
+/*----------------------------------------------------------------------*/
+
+/*
+ * Because we're not relying on writeback from the controller (it may not
+ * even be configured into the core!) we don't need to use dma_pool.  These
+ * descriptors -- and associated data -- are cacheable.  We do need to make
+ * sure their dcache entries are written back before handing them off to
+ * the controller, though.
+ */
+
+static struct dw_desc *dwc_first_active(struct dw_dma_chan *dwc)
+{
+	return list_entry(dwc->active_list.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_first_queued(struct dw_dma_chan *dwc)
+{
+	return list_entry(dwc->queue.next, struct dw_desc, desc_node);
+}
+
+static struct dw_desc *dwc_desc_get(struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc, *_desc;
+	struct dw_desc *ret = NULL;
+	unsigned int i = 0;
+
+	spin_lock_bh(&dwc->lock);
+	list_for_each_entry_safe(desc, _desc, &dwc->free_list, desc_node) {
+		if (async_tx_test_ack(&desc->txd)) {
+			list_del(&desc->desc_node);
+			ret = desc;
+			break;
+		}
+		dev_dbg(&dwc->chan.dev, "desc %p not ACKed\n", desc);
+		i++;
+	}
+	spin_unlock_bh(&dwc->lock);
+
+	dev_vdbg(&dwc->chan.dev, "scanned %u descriptors on freelist\n", i);
+
+	return ret;
+}
+
+static void dwc_sync_desc_for_cpu(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	struct dw_desc	*child;
+
+	list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+		dma_sync_single_for_cpu(dwc->chan.dev.parent,
+				child->txd.phys, sizeof(child->lli),
+				DMA_TO_DEVICE);
+	dma_sync_single_for_cpu(dwc->chan.dev.parent,
+			desc->txd.phys, sizeof(desc->lli),
+			DMA_TO_DEVICE);
+}
+
+/*
+ * Move a descriptor, including any children, to the free list.
+ * `desc' must not be on any lists.
+ */
+static void dwc_desc_put(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	if (desc) {
+		struct dw_desc *child;
+
+		dwc_sync_desc_for_cpu(dwc, desc);
+
+		spin_lock_bh(&dwc->lock);
+		list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+			dev_vdbg(&dwc->chan.dev,
+					"moving child desc %p to freelist\n",
+					child);
+		list_splice_init(&desc->txd.tx_list, &dwc->free_list);
+		dev_vdbg(&dwc->chan.dev, "moving desc %p to freelist\n", desc);
+		list_add(&desc->desc_node, &dwc->free_list);
+		spin_unlock_bh(&dwc->lock);
+	}
+}
+
+/* Called with dwc->lock held and bh disabled */
+static dma_cookie_t
+dwc_assign_cookie(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	dma_cookie_t cookie = dwc->chan.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+
+	dwc->chan.cookie = cookie;
+	desc->txd.cookie = cookie;
+
+	return cookie;
+}
+
+/*----------------------------------------------------------------------*/
+
+/* Called with dwc->lock held and bh disabled */
+static void dwc_dostart(struct dw_dma_chan *dwc, struct dw_desc *first)
+{
+	struct dw_dma	*dw = to_dw_dma(dwc->chan.device);
+
+	/* ASSERT:  channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(&dwc->chan.dev,
+			"BUG: Attempted to start non-idle channel\n");
+		dev_err(&dwc->chan.dev,
+			"  SAR: 0x%x DAR: 0x%x LLP: 0x%x CTL: 0x%x:%08x\n",
+			channel_readl(dwc, SAR),
+			channel_readl(dwc, DAR),
+			channel_readl(dwc, LLP),
+			channel_readl(dwc, CTL_HI),
+			channel_readl(dwc, CTL_LO));
+
+		/* The tasklet will hopefully advance the queue... */
+		return;
+	}
+
+	channel_writel(dwc, LLP, first->txd.phys);
+	channel_writel(dwc, CTL_LO,
+			DWC_CTLL_LLP_D_EN | DWC_CTLL_LLP_S_EN);
+	channel_writel(dwc, CTL_HI, 0);
+	channel_set_bit(dw, CH_EN, dwc->mask);
+}
+
+/*----------------------------------------------------------------------*/
+
+static void
+dwc_descriptor_complete(struct dw_dma_chan *dwc, struct dw_desc *desc)
+{
+	dma_async_tx_callback		callback;
+	void				*param;
+	struct dma_async_tx_descriptor	*txd = &desc->txd;
+
+	dev_vdbg(&dwc->chan.dev, "descriptor %u complete\n", txd->cookie);
+
+	dwc->completed = txd->cookie;
+	callback = txd->callback;
+	param = txd->callback_param;
+
+	dwc_sync_desc_for_cpu(dwc, desc);
+	list_splice_init(&txd->tx_list, &dwc->free_list);
+	list_move(&desc->desc_node, &dwc->free_list);
+
+	/*
+	 * We use dma_unmap_page() regardless of how the buffers were
+	 * mapped before they were submitted...
+	 */
+	if (!(txd->flags & DMA_COMPL_SKIP_DEST_UNMAP))
+		dma_unmap_page(dwc->chan.dev.parent, desc->lli.dar, desc->len,
+				DMA_FROM_DEVICE);
+	if (!(txd->flags & DMA_COMPL_SKIP_SRC_UNMAP))
+		dma_unmap_page(dwc->chan.dev.parent, desc->lli.sar, desc->len,
+				DMA_TO_DEVICE);
+
+	/*
+	 * The API requires that no submissions are done from a
+	 * callback, so we don't need to drop the lock here
+	 */
+	if (callback)
+		callback(param);
+}
+
+static void dwc_complete_all(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	struct dw_desc *desc, *_desc;
+	LIST_HEAD(list);
+
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_err(&dwc->chan.dev,
+			"BUG: XFER bit set, but channel not idle!\n");
+
+		/* Try to continue after resetting the channel... */
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+		while (dma_readl(dw, CH_EN) & dwc->mask)
+			cpu_relax();
+	}
+
+	/*
+	 * Submit queued descriptors ASAP, i.e. before we go through
+	 * the completed ones.
+	 */
+	if (!list_empty(&dwc->queue))
+		dwc_dostart(dwc, dwc_first_queued(dwc));
+	list_splice_init(&dwc->active_list, &list);
+	list_splice_init(&dwc->queue, &dwc->active_list);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		dwc_descriptor_complete(dwc, desc);
+}
+
+static void dwc_scan_descriptors(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	dma_addr_t llp;
+	struct dw_desc *desc, *_desc;
+	struct dw_desc *child;
+	u32 status_xfer;
+
+	/*
+	 * Clear block interrupt flag before scanning so that we don't
+	 * miss any, and read LLP before RAW_XFER to ensure it is
+	 * valid if we decide to scan the list.
+	 */
+	dma_writel(dw, CLEAR.BLOCK, dwc->mask);
+	llp = channel_readl(dwc, LLP);
+	status_xfer = dma_readl(dw, RAW.XFER);
+
+	if (status_xfer & dwc->mask) {
+		/* Everything we've submitted is done */
+		dma_writel(dw, CLEAR.XFER, dwc->mask);
+		dwc_complete_all(dw, dwc);
+		return;
+	}
+
+	dev_vdbg(&dwc->chan.dev, "scan_descriptors: llp=0x%x\n", llp);
+
+	list_for_each_entry_safe(desc, _desc, &dwc->active_list, desc_node) {
+		if (desc->lli.llp == llp)
+			/* This one is currently in progress */
+			return;
+
+		list_for_each_entry(child, &desc->txd.tx_list, desc_node)
+			if (child->lli.llp == llp)
+				/* Currently in progress */
+				return;
+
+		/*
+		 * No descriptors so far seem to be in progress, i.e.
+		 * this one must be done.
+		 */
+		dwc_descriptor_complete(dwc, desc);
+	}
+
+	dev_err(&dwc->chan.dev,
+		"BUG: All descriptors done, but channel not idle!\n");
+
+	/* Try to continue after resetting the channel... */
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+
+	if (!list_empty(&dwc->queue)) {
+		dwc_dostart(dwc, dwc_first_queued(dwc));
+		list_splice_init(&dwc->queue, &dwc->active_list);
+	}
+}
+
+static void dwc_dump_lli(struct dw_dma_chan *dwc, struct dw_lli *lli)
+{
+	dev_printk(KERN_CRIT, &dwc->chan.dev,
+			"  desc: s0x%x d0x%x l0x%x c0x%x:%x\n",
+			lli->sar, lli->dar, lli->llp,
+			lli->ctlhi, lli->ctllo);
+}
+
+static void dwc_handle_error(struct dw_dma *dw, struct dw_dma_chan *dwc)
+{
+	struct dw_desc *bad_desc;
+	struct dw_desc *child;
+
+	dwc_scan_descriptors(dw, dwc);
+
+	/*
+	 * The descriptor currently at the head of the active list is
+	 * borked. Since we don't have any way to report errors, we'll
+	 * just have to scream loudly and try to carry on.
+	 */
+	bad_desc = dwc_first_active(dwc);
+	list_del_init(&bad_desc->desc_node);
+	list_splice_init(&dwc->queue, dwc->active_list.prev);
+
+	/* Clear the error flag and try to restart the controller */
+	dma_writel(dw, CLEAR.ERROR, dwc->mask);
+	if (!list_empty(&dwc->active_list))
+		dwc_dostart(dwc, dwc_first_active(dwc));
+
+	/*
+	 * KERN_CRITICAL may seem harsh, but since this only happens
+	 * when someone submits a bad physical address in a
+	 * descriptor, we should consider ourselves lucky that the
+	 * controller flagged an error instead of scribbling over
+	 * random memory locations.
+	 */
+	dev_printk(KERN_CRIT, &dwc->chan.dev,
+			"Bad descriptor submitted for DMA!\n");
+	dev_printk(KERN_CRIT, &dwc->chan.dev,
+			"  cookie: %d\n", bad_desc->txd.cookie);
+	dwc_dump_lli(dwc, &bad_desc->lli);
+	list_for_each_entry(child, &bad_desc->txd.tx_list, desc_node)
+		dwc_dump_lli(dwc, &child->lli);
+
+	/* Pretend the descriptor completed successfully */
+	dwc_descriptor_complete(dwc, bad_desc);
+}
+
+static void dw_dma_tasklet(unsigned long data)
+{
+	struct dw_dma *dw = (struct dw_dma *)data;
+	struct dw_dma_chan *dwc;
+	u32 status_block;
+	u32 status_xfer;
+	u32 status_err;
+	int i;
+
+	status_block = dma_readl(dw, RAW.BLOCK);
+	status_xfer = dma_readl(dw, RAW.XFER);
+	status_err = dma_readl(dw, RAW.ERROR);
+
+	dev_vdbg(dw->dma.dev, "tasklet: status_block=%x status_err=%x\n",
+			status_block, status_err);
+
+	for (i = 0; i < dw->dma.chancnt; i++) {
+		dwc = &dw->chan[i];
+		spin_lock(&dwc->lock);
+		if (status_err & (1 << i))
+			dwc_handle_error(dw, dwc);
+		else if ((status_block | status_xfer) & (1 << i))
+			dwc_scan_descriptors(dw, dwc);
+		spin_unlock(&dwc->lock);
+	}
+
+	/*
+	 * Re-enable interrupts. Block Complete interrupts are only
+	 * enabled if the INT_EN bit in the descriptor is set. This
+	 * will trigger a scan before the whole list is done.
+	 */
+	channel_set_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_set_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+	channel_set_bit(dw, MASK.ERROR, dw->all_chan_mask);
+}
+
+static irqreturn_t dw_dma_interrupt(int irq, void *dev_id)
+{
+	struct dw_dma *dw = dev_id;
+	u32 status;
+
+	dev_vdbg(dw->dma.dev, "interrupt: status=0x%x\n",
+			dma_readl(dw, STATUS_INT));
+
+	/*
+	 * Just disable the interrupts. We'll turn them back on in the
+	 * softirq handler.
+	 */
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	status = dma_readl(dw, STATUS_INT);
+	if (status) {
+		dev_err(dw->dma.dev,
+			"BUG: Unexpected interrupts pending: 0x%x\n",
+			status);
+
+		/* Try to recover */
+		channel_clear_bit(dw, MASK.XFER, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.BLOCK, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.SRC_TRAN, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.DST_TRAN, (1 << 8) - 1);
+		channel_clear_bit(dw, MASK.ERROR, (1 << 8) - 1);
+	}
+
+	tasklet_schedule(&dw->tasklet);
+
+	return IRQ_HANDLED;
+}
+
+/*----------------------------------------------------------------------*/
+
+static dma_cookie_t dwc_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct dw_desc		*desc = txd_to_dw_desc(tx);
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(tx->chan);
+	dma_cookie_t		cookie;
+
+	spin_lock_bh(&dwc->lock);
+	cookie = dwc_assign_cookie(dwc, desc);
+
+	/*
+	 * REVISIT: We should attempt to chain as many descriptors as
+	 * possible, perhaps even appending to those already submitted
+	 * for DMA. But this is hard to do in a race-free manner.
+	 */
+	if (list_empty(&dwc->active_list)) {
+		dev_vdbg(&tx->chan->dev, "tx_submit: started %u\n",
+				desc->txd.cookie);
+		dwc_dostart(dwc, desc);
+		list_add_tail(&desc->desc_node, &dwc->active_list);
+	} else {
+		dev_vdbg(&tx->chan->dev, "tx_submit: queued %u\n",
+				desc->txd.cookie);
+
+		list_add_tail(&desc->desc_node, &dwc->queue);
+	}
+
+	spin_unlock_bh(&dwc->lock);
+
+	return cookie;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_desc		*desc;
+	struct dw_desc		*first;
+	struct dw_desc		*prev;
+	size_t			xfer_count;
+	size_t			offset;
+	unsigned int		src_width;
+	unsigned int		dst_width;
+	u32			ctllo;
+
+	dev_vdbg(&chan->dev, "prep_dma_memcpy d0x%x s0x%x l0x%zx f0x%lx\n",
+			dest, src, len, flags);
+
+	if (unlikely(!len)) {
+		dev_dbg(&chan->dev, "prep_dma_memcpy: length is zero!\n");
+		return NULL;
+	}
+
+	/*
+	 * We can be a lot more clever here, but this should take care
+	 * of the most common optimization.
+	 */
+	if (!((src | dest  | len) & 3))
+		src_width = dst_width = 2;
+	else if (!((src | dest | len) & 1))
+		src_width = dst_width = 1;
+	else
+		src_width = dst_width = 0;
+
+	ctllo = DWC_DEFAULT_CTLLO
+			| DWC_CTLL_DST_WIDTH(dst_width)
+			| DWC_CTLL_SRC_WIDTH(src_width)
+			| DWC_CTLL_DST_INC
+			| DWC_CTLL_SRC_INC
+			| DWC_CTLL_FC_M2M;
+	prev = first = NULL;
+
+	for (offset = 0; offset < len; offset += xfer_count << src_width) {
+		xfer_count = min_t(size_t, (len - offset) >> src_width,
+				DWC_MAX_COUNT);
+
+		desc = dwc_desc_get(dwc);
+		if (!desc)
+			goto err_desc_get;
+
+		desc->lli.sar = src + offset;
+		desc->lli.dar = dest + offset;
+		desc->lli.ctllo = ctllo;
+		desc->lli.ctlhi = xfer_count;
+
+		if (!first) {
+			first = desc;
+		} else {
+			prev->lli.llp = desc->txd.phys;
+			dma_sync_single_for_device(chan->dev.parent,
+					prev->txd.phys, sizeof(prev->lli),
+					DMA_TO_DEVICE);
+			list_add_tail(&desc->desc_node,
+					&first->txd.tx_list);
+		}
+		prev = desc;
+	}
+
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last block */
+		prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+	prev->lli.llp = 0;
+	dma_sync_single_for_device(chan->dev.parent,
+			prev->txd.phys, sizeof(prev->lli),
+			DMA_TO_DEVICE);
+
+	first->txd.flags = flags;
+	first->len = len;
+
+	return &first->txd;
+
+err_desc_get:
+	dwc_desc_put(dwc, first);
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dwc_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+		unsigned int sg_len, enum dma_data_direction direction,
+		unsigned long flags)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma_slave	*dws = dwc->dws;
+	struct dw_desc		*prev;
+	struct dw_desc		*first;
+	u32			ctllo;
+	dma_addr_t		reg;
+	unsigned int		reg_width;
+	unsigned int		mem_width;
+	unsigned int		i;
+	struct scatterlist	*sg;
+	size_t			total_len = 0;
+
+	dev_vdbg(&chan->dev, "prep_dma_slave\n");
+
+	if (unlikely(!dws || !sg_len))
+		return NULL;
+
+	reg_width = dws->slave.reg_width;
+	prev = first = NULL;
+
+	sg_len = dma_map_sg(chan->dev.parent, sgl, sg_len, direction);
+
+	switch (direction) {
+	case DMA_TO_DEVICE:
+		ctllo = (DWC_DEFAULT_CTLLO
+				| DWC_CTLL_DST_WIDTH(reg_width)
+				| DWC_CTLL_DST_FIX
+				| DWC_CTLL_SRC_INC
+				| DWC_CTLL_FC_M2P);
+		reg = dws->slave.tx_reg;
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct dw_desc	*desc;
+			u32		len;
+			u32		mem;
+
+			desc = dwc_desc_get(dwc);
+			if (!desc) {
+				dev_err(&chan->dev,
+					"not enough descriptors available\n");
+				goto err_desc_get;
+			}
+
+			mem = sg_phys(sg);
+			len = sg_dma_len(sg);
+			mem_width = 2;
+			if (unlikely(mem & 3 || len & 3))
+				mem_width = 0;
+
+			desc->lli.sar = mem;
+			desc->lli.dar = reg;
+			desc->lli.ctllo = ctllo | DWC_CTLL_SRC_WIDTH(mem_width);
+			desc->lli.ctlhi = len >> mem_width;
+
+			if (!first) {
+				first = desc;
+			} else {
+				prev->lli.llp = desc->txd.phys;
+				dma_sync_single_for_device(chan->dev.parent,
+						prev->txd.phys,
+						sizeof(prev->lli),
+						DMA_TO_DEVICE);
+				list_add_tail(&desc->desc_node,
+						&first->txd.tx_list);
+			}
+			prev = desc;
+			total_len += len;
+		}
+		break;
+	case DMA_FROM_DEVICE:
+		ctllo = (DWC_DEFAULT_CTLLO
+				| DWC_CTLL_SRC_WIDTH(reg_width)
+				| DWC_CTLL_DST_INC
+				| DWC_CTLL_SRC_FIX
+				| DWC_CTLL_FC_P2M);
+
+		reg = dws->slave.rx_reg;
+		for_each_sg(sgl, sg, sg_len, i) {
+			struct dw_desc	*desc;
+			u32		len;
+			u32		mem;
+
+			desc = dwc_desc_get(dwc);
+			if (!desc) {
+				dev_err(&chan->dev,
+					"not enough descriptors available\n");
+				goto err_desc_get;
+			}
+
+			mem = sg_phys(sg);
+			len = sg_dma_len(sg);
+			mem_width = 2;
+			if (unlikely(mem & 3 || len & 3))
+				mem_width = 0;
+
+			desc->lli.sar = reg;
+			desc->lli.dar = mem;
+			desc->lli.ctllo = ctllo | DWC_CTLL_DST_WIDTH(mem_width);
+			desc->lli.ctlhi = len >> reg_width;
+
+			if (!first) {
+				first = desc;
+			} else {
+				prev->lli.llp = desc->txd.phys;
+				dma_sync_single_for_device(chan->dev.parent,
+						prev->txd.phys,
+						sizeof(prev->lli),
+						DMA_TO_DEVICE);
+				list_add_tail(&desc->desc_node,
+						&first->txd.tx_list);
+			}
+			prev = desc;
+			total_len += len;
+		}
+		break;
+	default:
+		return NULL;
+	}
+
+	if (flags & DMA_PREP_INTERRUPT)
+		/* Trigger interrupt after last block */
+		prev->lli.ctllo |= DWC_CTLL_INT_EN;
+
+	prev->lli.llp = 0;
+	dma_sync_single_for_device(chan->dev.parent,
+			prev->txd.phys, sizeof(prev->lli),
+			DMA_TO_DEVICE);
+
+	first->len = total_len;
+
+	return &first->txd;
+
+err_desc_get:
+	dwc_desc_put(dwc, first);
+	return NULL;
+}
+
+static void dwc_terminate_all(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc, *_desc;
+	LIST_HEAD(list);
+
+	/*
+	 * This is only called when something went wrong elsewhere, so
+	 * we don't really care about the data. Just disable the
+	 * channel. We still have to poll the channel enable bit due
+	 * to AHB/HSB limitations.
+	 */
+	spin_lock_bh(&dwc->lock);
+
+	channel_clear_bit(dw, CH_EN, dwc->mask);
+
+	while (dma_readl(dw, CH_EN) & dwc->mask)
+		cpu_relax();
+
+	/* active_list entries will end up before queued entries */
+	list_splice_init(&dwc->queue, &list);
+	list_splice_init(&dwc->active_list, &list);
+
+	spin_unlock_bh(&dwc->lock);
+
+	/* Flush all pending and queued descriptors */
+	list_for_each_entry_safe(desc, _desc, &list, desc_node)
+		dwc_descriptor_complete(dwc, desc);
+}
+
+static enum dma_status
+dwc_is_tx_complete(struct dma_chan *chan,
+		dma_cookie_t cookie,
+		dma_cookie_t *done, dma_cookie_t *used)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	dma_cookie_t		last_used;
+	dma_cookie_t		last_complete;
+	int			ret;
+
+	last_complete = dwc->completed;
+	last_used = chan->cookie;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret != DMA_SUCCESS) {
+		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+
+		last_complete = dwc->completed;
+		last_used = chan->cookie;
+
+		ret = dma_async_is_complete(cookie, last_complete, last_used);
+	}
+
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	return ret;
+}
+
+static void dwc_issue_pending(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+
+	spin_lock_bh(&dwc->lock);
+	if (!list_empty(&dwc->queue))
+		dwc_scan_descriptors(to_dw_dma(chan->device), dwc);
+	spin_unlock_bh(&dwc->lock);
+}
+
+static int dwc_alloc_chan_resources(struct dma_chan *chan,
+		struct dma_client *client)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc;
+	struct dma_slave	*slave;
+	struct dw_dma_slave	*dws;
+	int			i;
+	u32			cfghi;
+	u32			cfglo;
+
+	dev_vdbg(&chan->dev, "alloc_chan_resources\n");
+
+	/* Channels doing slave DMA can only handle one client. */
+	if (dwc->dws || client->slave) {
+		if (chan->client_count)
+			return -EBUSY;
+	}
+
+	/* ASSERT:  channel is idle */
+	if (dma_readl(dw, CH_EN) & dwc->mask) {
+		dev_dbg(&chan->dev, "DMA channel not idle?\n");
+		return -EIO;
+	}
+
+	dwc->completed = chan->cookie = 1;
+
+	cfghi = DWC_CFGH_FIFO_MODE;
+	cfglo = 0;
+
+	slave = client->slave;
+	if (slave) {
+		/*
+		 * We need controller-specific data to set up slave
+		 * transfers.
+		 */
+		BUG_ON(!slave->dma_dev || slave->dma_dev != dw->dma.dev);
+
+		dws = container_of(slave, struct dw_dma_slave, slave);
+
+		dwc->dws = dws;
+		cfghi = dws->cfg_hi;
+		cfglo = dws->cfg_lo;
+	} else {
+		dwc->dws = NULL;
+	}
+
+	channel_writel(dwc, CFG_LO, cfglo);
+	channel_writel(dwc, CFG_HI, cfghi);
+
+	/*
+	 * NOTE: some controllers may have additional features that we
+	 * need to initialize here, like "scatter-gather" (which
+	 * doesn't mean what you think it means), and status writeback.
+	 */
+
+	spin_lock_bh(&dwc->lock);
+	i = dwc->descs_allocated;
+	while (dwc->descs_allocated < NR_DESCS_PER_CHANNEL) {
+		spin_unlock_bh(&dwc->lock);
+
+		desc = kzalloc(sizeof(struct dw_desc), GFP_KERNEL);
+		if (!desc) {
+			dev_info(&chan->dev,
+				"only allocated %d descriptors\n", i);
+			spin_lock_bh(&dwc->lock);
+			break;
+		}
+
+		dma_async_tx_descriptor_init(&desc->txd, chan);
+		desc->txd.tx_submit = dwc_tx_submit;
+		desc->txd.flags = DMA_CTRL_ACK;
+		INIT_LIST_HEAD(&desc->txd.tx_list);
+		desc->txd.phys = dma_map_single(chan->dev.parent, &desc->lli,
+				sizeof(desc->lli), DMA_TO_DEVICE);
+		dwc_desc_put(dwc, desc);
+
+		spin_lock_bh(&dwc->lock);
+		i = ++dwc->descs_allocated;
+	}
+
+	/* Enable interrupts */
+	channel_set_bit(dw, MASK.XFER, dwc->mask);
+	channel_set_bit(dw, MASK.BLOCK, dwc->mask);
+	channel_set_bit(dw, MASK.ERROR, dwc->mask);
+
+	spin_unlock_bh(&dwc->lock);
+
+	dev_dbg(&chan->dev,
+		"alloc_chan_resources allocated %d descriptors\n", i);
+
+	return i;
+}
+
+static void dwc_free_chan_resources(struct dma_chan *chan)
+{
+	struct dw_dma_chan	*dwc = to_dw_dma_chan(chan);
+	struct dw_dma		*dw = to_dw_dma(chan->device);
+	struct dw_desc		*desc, *_desc;
+	LIST_HEAD(list);
+
+	dev_dbg(&chan->dev, "free_chan_resources (descs allocated=%u)\n",
+			dwc->descs_allocated);
+
+	/* ASSERT:  channel is idle */
+	BUG_ON(!list_empty(&dwc->active_list));
+	BUG_ON(!list_empty(&dwc->queue));
+	BUG_ON(dma_readl(to_dw_dma(chan->device), CH_EN) & dwc->mask);
+
+	spin_lock_bh(&dwc->lock);
+	list_splice_init(&dwc->free_list, &list);
+	dwc->descs_allocated = 0;
+	dwc->dws = NULL;
+
+	/* Disable interrupts */
+	channel_clear_bit(dw, MASK.XFER, dwc->mask);
+	channel_clear_bit(dw, MASK.BLOCK, dwc->mask);
+	channel_clear_bit(dw, MASK.ERROR, dwc->mask);
+
+	spin_unlock_bh(&dwc->lock);
+
+	list_for_each_entry_safe(desc, _desc, &list, desc_node) {
+		dev_vdbg(&chan->dev, "  freeing descriptor %p\n", desc);
+		dma_unmap_single(chan->dev.parent, desc->txd.phys,
+				sizeof(desc->lli), DMA_TO_DEVICE);
+		kfree(desc);
+	}
+
+	dev_vdbg(&chan->dev, "free_chan_resources done\n");
+}
+
+/*----------------------------------------------------------------------*/
+
+static void dw_dma_off(struct dw_dma *dw)
+{
+	dma_writel(dw, CFG, 0);
+
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	while (dma_readl(dw, CFG) & DW_CFG_DMA_EN)
+		cpu_relax();
+}
+
+static int __init dw_probe(struct platform_device *pdev)
+{
+	struct dw_dma_platform_data *pdata;
+	struct resource		*io;
+	struct dw_dma		*dw;
+	size_t			size;
+	int			irq;
+	int			err;
+	int			i;
+
+	pdata = pdev->dev.platform_data;
+	if (!pdata || pdata->nr_channels > DW_DMA_MAX_NR_CHANNELS)
+		return -EINVAL;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!io)
+		return -EINVAL;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0)
+		return irq;
+
+	size = sizeof(struct dw_dma);
+	size += pdata->nr_channels * sizeof(struct dw_dma_chan);
+	dw = kzalloc(size, GFP_KERNEL);
+	if (!dw)
+		return -ENOMEM;
+
+	if (!request_mem_region(io->start, DW_REGLEN, pdev->dev.driver->name)) {
+		err = -EBUSY;
+		goto err_kfree;
+	}
+
+	memset(dw, 0, sizeof *dw);
+
+	dw->regs = ioremap(io->start, DW_REGLEN);
+	if (!dw->regs) {
+		err = -ENOMEM;
+		goto err_release_r;
+	}
+
+	dw->clk = clk_get(&pdev->dev, "hclk");
+	if (IS_ERR(dw->clk)) {
+		err = PTR_ERR(dw->clk);
+		goto err_clk;
+	}
+	clk_enable(dw->clk);
+
+	/* force dma off, just in case */
+	dw_dma_off(dw);
+
+	err = request_irq(irq, dw_dma_interrupt, 0, "dw_dmac", dw);
+	if (err)
+		goto err_irq;
+
+	platform_set_drvdata(pdev, dw);
+
+	tasklet_init(&dw->tasklet, dw_dma_tasklet, (unsigned long)dw);
+
+	dw->all_chan_mask = (1 << pdata->nr_channels) - 1;
+
+	INIT_LIST_HEAD(&dw->dma.channels);
+	for (i = 0; i < pdata->nr_channels; i++, dw->dma.chancnt++) {
+		struct dw_dma_chan	*dwc = &dw->chan[i];
+
+		dwc->chan.device = &dw->dma;
+		dwc->chan.cookie = dwc->completed = 1;
+		dwc->chan.chan_id = i;
+		list_add_tail(&dwc->chan.device_node, &dw->dma.channels);
+
+		dwc->ch_regs = &__dw_regs(dw)->CHAN[i];
+		spin_lock_init(&dwc->lock);
+		dwc->mask = 1 << i;
+
+		INIT_LIST_HEAD(&dwc->active_list);
+		INIT_LIST_HEAD(&dwc->queue);
+		INIT_LIST_HEAD(&dwc->free_list);
+
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+	}
+
+	/* Clear/disable all interrupts on all channels. */
+	dma_writel(dw, CLEAR.XFER, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.BLOCK, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.SRC_TRAN, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.DST_TRAN, dw->all_chan_mask);
+	dma_writel(dw, CLEAR.ERROR, dw->all_chan_mask);
+
+	channel_clear_bit(dw, MASK.XFER, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.BLOCK, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.SRC_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.DST_TRAN, dw->all_chan_mask);
+	channel_clear_bit(dw, MASK.ERROR, dw->all_chan_mask);
+
+	dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+	dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+	dw->dma.dev = &pdev->dev;
+	dw->dma.device_alloc_chan_resources = dwc_alloc_chan_resources;
+	dw->dma.device_free_chan_resources = dwc_free_chan_resources;
+
+	dw->dma.device_prep_dma_memcpy = dwc_prep_dma_memcpy;
+
+	dw->dma.device_prep_slave_sg = dwc_prep_slave_sg;
+	dw->dma.device_terminate_all = dwc_terminate_all;
+
+	dw->dma.device_is_tx_complete = dwc_is_tx_complete;
+	dw->dma.device_issue_pending = dwc_issue_pending;
+
+	dma_writel(dw, CFG, DW_CFG_DMA_EN);
+
+	printk(KERN_INFO "%s: DesignWare DMA Controller, %d channels\n",
+			pdev->dev.bus_id, dw->dma.chancnt);
+
+	dma_async_device_register(&dw->dma);
+
+	return 0;
+
+err_irq:
+	clk_disable(dw->clk);
+	clk_put(dw->clk);
+err_clk:
+	iounmap(dw->regs);
+	dw->regs = NULL;
+err_release_r:
+	release_resource(io);
+err_kfree:
+	kfree(dw);
+	return err;
+}
+
+static int __exit dw_remove(struct platform_device *pdev)
+{
+	struct dw_dma		*dw = platform_get_drvdata(pdev);
+	struct dw_dma_chan	*dwc, *_dwc;
+	struct resource		*io;
+
+	dw_dma_off(dw);
+	dma_async_device_unregister(&dw->dma);
+
+	free_irq(platform_get_irq(pdev, 0), dw);
+	tasklet_kill(&dw->tasklet);
+
+	list_for_each_entry_safe(dwc, _dwc, &dw->dma.channels,
+			chan.device_node) {
+		list_del(&dwc->chan.device_node);
+		channel_clear_bit(dw, CH_EN, dwc->mask);
+	}
+
+	clk_disable(dw->clk);
+	clk_put(dw->clk);
+
+	iounmap(dw->regs);
+	dw->regs = NULL;
+
+	io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	release_mem_region(io->start, DW_REGLEN);
+
+	kfree(dw);
+
+	return 0;
+}
+
+static void dw_shutdown(struct platform_device *pdev)
+{
+	struct dw_dma	*dw = platform_get_drvdata(pdev);
+
+	dw_dma_off(platform_get_drvdata(pdev));
+	clk_disable(dw->clk);
+}
+
+static int dw_suspend_late(struct platform_device *pdev, pm_message_t mesg)
+{
+	struct dw_dma	*dw = platform_get_drvdata(pdev);
+
+	dw_dma_off(platform_get_drvdata(pdev));
+	clk_disable(dw->clk);
+	return 0;
+}
+
+static int dw_resume_early(struct platform_device *pdev)
+{
+	struct dw_dma	*dw = platform_get_drvdata(pdev);
+
+	clk_enable(dw->clk);
+	dma_writel(dw, CFG, DW_CFG_DMA_EN);
+	return 0;
+
+}
+
+static struct platform_driver dw_driver = {
+	.remove		= __exit_p(dw_remove),
+	.shutdown	= dw_shutdown,
+	.suspend_late	= dw_suspend_late,
+	.resume_early	= dw_resume_early,
+	.driver = {
+		.name	= "dw_dmac",
+	},
+};
+
+static int __init dw_init(void)
+{
+	return platform_driver_probe(&dw_driver, dw_probe);
+}
+module_init(dw_init);
+
+static void __exit dw_exit(void)
+{
+	platform_driver_unregister(&dw_driver);
+}
+module_exit(dw_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("Synopsys DesignWare DMA Controller driver");
+MODULE_AUTHOR("Haavard Skinnemoen <haavard.skinnemoen@atmel.com>");
diff --git a/drivers/dma/dw_dmac_regs.h b/drivers/dma/dw_dmac_regs.h
new file mode 100644
index 0000000..00fdd18
--- /dev/null
+++ b/drivers/dma/dw_dmac_regs.h
@@ -0,0 +1,225 @@
+/*
+ * Driver for the Synopsys DesignWare AHB DMA Controller
+ *
+ * Copyright (C) 2005-2007 Atmel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/dw_dmac.h>
+
+#define DW_DMA_MAX_NR_CHANNELS	8
+
+/*
+ * Redefine this macro to handle differences between 32- and 64-bit
+ * addressing, big vs. little endian, etc.
+ */
+#define DW_REG(name)		u32 name; u32 __pad_##name
+
+/* Hardware register definitions. */
+struct dw_dma_chan_regs {
+	DW_REG(SAR);		/* Source Address Register */
+	DW_REG(DAR);		/* Destination Address Register */
+	DW_REG(LLP);		/* Linked List Pointer */
+	u32	CTL_LO;		/* Control Register Low */
+	u32	CTL_HI;		/* Control Register High */
+	DW_REG(SSTAT);
+	DW_REG(DSTAT);
+	DW_REG(SSTATAR);
+	DW_REG(DSTATAR);
+	u32	CFG_LO;		/* Configuration Register Low */
+	u32	CFG_HI;		/* Configuration Register High */
+	DW_REG(SGR);
+	DW_REG(DSR);
+};
+
+struct dw_dma_irq_regs {
+	DW_REG(XFER);
+	DW_REG(BLOCK);
+	DW_REG(SRC_TRAN);
+	DW_REG(DST_TRAN);
+	DW_REG(ERROR);
+};
+
+struct dw_dma_regs {
+	/* per-channel registers */
+	struct dw_dma_chan_regs	CHAN[DW_DMA_MAX_NR_CHANNELS];
+
+	/* irq handling */
+	struct dw_dma_irq_regs	RAW;		/* r */
+	struct dw_dma_irq_regs	STATUS;		/* r (raw & mask) */
+	struct dw_dma_irq_regs	MASK;		/* rw (set = irq enabled) */
+	struct dw_dma_irq_regs	CLEAR;		/* w (ack, affects "raw") */
+
+	DW_REG(STATUS_INT);			/* r */
+
+	/* software handshaking */
+	DW_REG(REQ_SRC);
+	DW_REG(REQ_DST);
+	DW_REG(SGL_REQ_SRC);
+	DW_REG(SGL_REQ_DST);
+	DW_REG(LAST_SRC);
+	DW_REG(LAST_DST);
+
+	/* miscellaneous */
+	DW_REG(CFG);
+	DW_REG(CH_EN);
+	DW_REG(ID);
+	DW_REG(TEST);
+
+	/* optional encoded params, 0x3c8..0x3 */
+};
+
+/* Bitfields in CTL_LO */
+#define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
+#define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
+#define DWC_CTLL_SRC_WIDTH(n)	((n)<<4)
+#define DWC_CTLL_DST_INC	(0<<7)		/* DAR update/not */
+#define DWC_CTLL_DST_DEC	(1<<7)
+#define DWC_CTLL_DST_FIX	(2<<7)
+#define DWC_CTLL_SRC_INC	(0<<7)		/* SAR update/not */
+#define DWC_CTLL_SRC_DEC	(1<<9)
+#define DWC_CTLL_SRC_FIX	(2<<9)
+#define DWC_CTLL_DST_MSIZE(n)	((n)<<11)	/* burst, #elements */
+#define DWC_CTLL_SRC_MSIZE(n)	((n)<<14)
+#define DWC_CTLL_S_GATH_EN	(1 << 17)	/* src gather, !FIX */
+#define DWC_CTLL_D_SCAT_EN	(1 << 18)	/* dst scatter, !FIX */
+#define DWC_CTLL_FC_M2M		(0 << 20)	/* mem-to-mem */
+#define DWC_CTLL_FC_M2P		(1 << 20)	/* mem-to-periph */
+#define DWC_CTLL_FC_P2M		(2 << 20)	/* periph-to-mem */
+#define DWC_CTLL_FC_P2P		(3 << 20)	/* periph-to-periph */
+/* plus 4 transfer types for peripheral-as-flow-controller */
+#define DWC_CTLL_DMS(n)		((n)<<23)	/* dst master select */
+#define DWC_CTLL_SMS(n)		((n)<<25)	/* src master select */
+#define DWC_CTLL_LLP_D_EN	(1 << 27)	/* dest block chain */
+#define DWC_CTLL_LLP_S_EN	(1 << 28)	/* src block chain */
+
+/* Bitfields in CTL_HI */
+#define DWC_CTLH_DONE		0x00001000
+#define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
+
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
+#define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
+#define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
+#define DWC_CFGL_HS_SRC		(1 << 11)	/* handshake w/src */
+#define DWC_CFGL_MAX_BURST(x)	((x) << 20)
+#define DWC_CFGL_RELOAD_SAR	(1 << 30)
+#define DWC_CFGL_RELOAD_DAR	(1 << 31)
+
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+#define DWC_CFGH_DS_UPD_EN	(1 << 5)
+#define DWC_CFGH_SS_UPD_EN	(1 << 6)
+
+/* Bitfields in SGR */
+#define DWC_SGR_SGI(x)		((x) << 0)
+#define DWC_SGR_SGC(x)		((x) << 20)
+
+/* Bitfields in DSR */
+#define DWC_DSR_DSI(x)		((x) << 0)
+#define DWC_DSR_DSC(x)		((x) << 20)
+
+/* Bitfields in CFG */
+#define DW_CFG_DMA_EN		(1 << 0)
+
+#define DW_REGLEN		0x400
+
+struct dw_dma_chan {
+	struct dma_chan		chan;
+	void __iomem		*ch_regs;
+	u8			mask;
+
+	spinlock_t		lock;
+
+	/* these other elements are all protected by lock */
+	dma_cookie_t		completed;
+	struct list_head	active_list;
+	struct list_head	queue;
+	struct list_head	free_list;
+
+	struct dw_dma_slave	*dws;
+
+	unsigned int		descs_allocated;
+};
+
+static inline struct dw_dma_chan_regs __iomem *
+__dwc_regs(struct dw_dma_chan *dwc)
+{
+	return dwc->ch_regs;
+}
+
+#define channel_readl(dwc, name) \
+	__raw_readl(&(__dwc_regs(dwc)->name))
+#define channel_writel(dwc, name, val) \
+	__raw_writel((val), &(__dwc_regs(dwc)->name))
+
+static inline struct dw_dma_chan *to_dw_dma_chan(struct dma_chan *chan)
+{
+	return container_of(chan, struct dw_dma_chan, chan);
+}
+
+
+struct dw_dma {
+	struct dma_device	dma;
+	void __iomem		*regs;
+	struct tasklet_struct	tasklet;
+	struct clk		*clk;
+
+	u8			all_chan_mask;
+
+	struct dw_dma_chan	chan[0];
+};
+
+static inline struct dw_dma_regs __iomem *__dw_regs(struct dw_dma *dw)
+{
+	return dw->regs;
+}
+
+#define dma_readl(dw, name) \
+	__raw_readl(&(__dw_regs(dw)->name))
+#define dma_writel(dw, name, val) \
+	__raw_writel((val), &(__dw_regs(dw)->name))
+
+#define channel_set_bit(dw, reg, mask) \
+	dma_writel(dw, reg, ((mask) << 8) | (mask))
+#define channel_clear_bit(dw, reg, mask) \
+	dma_writel(dw, reg, ((mask) << 8) | 0)
+
+static inline struct dw_dma *to_dw_dma(struct dma_device *ddev)
+{
+	return container_of(ddev, struct dw_dma, dma);
+}
+
+/* LLI == Linked List Item; a.k.a. DMA block descriptor */
+struct dw_lli {
+	/* values that are not changed by hardware */
+	dma_addr_t	sar;
+	dma_addr_t	dar;
+	dma_addr_t	llp;		/* chain to next lli */
+	u32		ctllo;
+	/* values that may get written back: */
+	u32		ctlhi;
+	/* sstat and dstat can snapshot peripheral register state.
+	 * silicon config may discard either or both...
+	 */
+	u32		sstat;
+	u32		dstat;
+};
+
+struct dw_desc {
+	/* FIRST values the hardware uses */
+	struct dw_lli			lli;
+
+	/* THEN values for driver housekeeping */
+	struct list_head		desc_node;
+	struct dma_async_tx_descriptor	txd;
+	size_t				len;
+};
+
+static inline struct dw_desc *
+txd_to_dw_desc(struct dma_async_tx_descriptor *txd)
+{
+	return container_of(txd, struct dw_desc, txd);
+}
diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
new file mode 100644
index 0000000..0b95dcc
--- /dev/null
+++ b/drivers/dma/fsldma.c
@@ -0,0 +1,1036 @@
+/*
+ * Freescale MPC85xx, MPC83xx DMA Engine support
+ *
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ *   Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * Description:
+ *   DMA engine driver for Freescale MPC8540 DMA controller, which is
+ *   also fit for MPC8560, MPC8555, MPC8548, MPC8641, and etc.
+ *   The support for MPC8349 DMA contorller is also added.
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/of_platform.h>
+
+#include "fsldma.h"
+
+static void dma_init(struct fsl_dma_chan *fsl_chan)
+{
+	/* Reset the channel */
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, 0, 32);
+
+	switch (fsl_chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		/* Set the channel to below modes:
+		 * EIE - Error interrupt enable
+		 * EOSIE - End of segments interrupt enable (basic mode)
+		 * EOLNIE - End of links interrupt enable
+		 */
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EIE
+				| FSL_DMA_MR_EOLNIE | FSL_DMA_MR_EOSIE, 32);
+		break;
+	case FSL_DMA_IP_83XX:
+		/* Set the channel to below modes:
+		 * EOTIE - End-of-transfer interrupt enable
+		 */
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr, FSL_DMA_MR_EOTIE,
+				32);
+		break;
+	}
+
+}
+
+static void set_sr(struct fsl_dma_chan *fsl_chan, u32 val)
+{
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->sr, val, 32);
+}
+
+static u32 get_sr(struct fsl_dma_chan *fsl_chan)
+{
+	return DMA_IN(fsl_chan, &fsl_chan->reg_base->sr, 32);
+}
+
+static void set_desc_cnt(struct fsl_dma_chan *fsl_chan,
+				struct fsl_dma_ld_hw *hw, u32 count)
+{
+	hw->count = CPU_TO_DMA(fsl_chan, count, 32);
+}
+
+static void set_desc_src(struct fsl_dma_chan *fsl_chan,
+				struct fsl_dma_ld_hw *hw, dma_addr_t src)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_SATR_SREADTYPE_SNOOP_READ << 32) : 0;
+	hw->src_addr = CPU_TO_DMA(fsl_chan, snoop_bits | src, 64);
+}
+
+static void set_desc_dest(struct fsl_dma_chan *fsl_chan,
+				struct fsl_dma_ld_hw *hw, dma_addr_t dest)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_85XX)
+		? ((u64)FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE << 32) : 0;
+	hw->dst_addr = CPU_TO_DMA(fsl_chan, snoop_bits | dest, 64);
+}
+
+static void set_desc_next(struct fsl_dma_chan *fsl_chan,
+				struct fsl_dma_ld_hw *hw, dma_addr_t next)
+{
+	u64 snoop_bits;
+
+	snoop_bits = ((fsl_chan->feature & FSL_DMA_IP_MASK) == FSL_DMA_IP_83XX)
+		? FSL_DMA_SNEN : 0;
+	hw->next_ln_addr = CPU_TO_DMA(fsl_chan, snoop_bits | next, 64);
+}
+
+static void set_cdar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+{
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->cdar, addr | FSL_DMA_SNEN, 64);
+}
+
+static dma_addr_t get_cdar(struct fsl_dma_chan *fsl_chan)
+{
+	return DMA_IN(fsl_chan, &fsl_chan->reg_base->cdar, 64) & ~FSL_DMA_SNEN;
+}
+
+static void set_ndar(struct fsl_dma_chan *fsl_chan, dma_addr_t addr)
+{
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->ndar, addr, 64);
+}
+
+static dma_addr_t get_ndar(struct fsl_dma_chan *fsl_chan)
+{
+	return DMA_IN(fsl_chan, &fsl_chan->reg_base->ndar, 64);
+}
+
+static u32 get_bcr(struct fsl_dma_chan *fsl_chan)
+{
+	return DMA_IN(fsl_chan, &fsl_chan->reg_base->bcr, 32);
+}
+
+static int dma_is_idle(struct fsl_dma_chan *fsl_chan)
+{
+	u32 sr = get_sr(fsl_chan);
+	return (!(sr & FSL_DMA_SR_CB)) || (sr & FSL_DMA_SR_CH);
+}
+
+static void dma_start(struct fsl_dma_chan *fsl_chan)
+{
+	u32 mr_set = 0;;
+
+	if (fsl_chan->feature & FSL_DMA_CHAN_PAUSE_EXT) {
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->bcr, 0, 32);
+		mr_set |= FSL_DMA_MR_EMP_EN;
+	} else
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+				& ~FSL_DMA_MR_EMP_EN, 32);
+
+	if (fsl_chan->feature & FSL_DMA_CHAN_START_EXT)
+		mr_set |= FSL_DMA_MR_EMS_EN;
+	else
+		mr_set |= FSL_DMA_MR_CS;
+
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+			| mr_set, 32);
+}
+
+static void dma_halt(struct fsl_dma_chan *fsl_chan)
+{
+	int i = 0;
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+		DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) | FSL_DMA_MR_CA,
+		32);
+	DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+		DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) & ~(FSL_DMA_MR_CS
+		| FSL_DMA_MR_EMS_EN | FSL_DMA_MR_CA), 32);
+
+	while (!dma_is_idle(fsl_chan) && (i++ < 100))
+		udelay(10);
+	if (i >= 100 && !dma_is_idle(fsl_chan))
+		dev_err(fsl_chan->dev, "DMA halt timeout!\n");
+}
+
+static void set_ld_eol(struct fsl_dma_chan *fsl_chan,
+			struct fsl_desc_sw *desc)
+{
+	desc->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
+		DMA_TO_CPU(fsl_chan, desc->hw.next_ln_addr, 64)	| FSL_DMA_EOL,
+		64);
+}
+
+static void append_ld_queue(struct fsl_dma_chan *fsl_chan,
+		struct fsl_desc_sw *new_desc)
+{
+	struct fsl_desc_sw *queue_tail = to_fsl_desc(fsl_chan->ld_queue.prev);
+
+	if (list_empty(&fsl_chan->ld_queue))
+		return;
+
+	/* Link to the new descriptor physical address and
+	 * Enable End-of-segment interrupt for
+	 * the last link descriptor.
+	 * (the previous node's next link descriptor)
+	 *
+	 * For FSL_DMA_IP_83xx, the snoop enable bit need be set.
+	 */
+	queue_tail->hw.next_ln_addr = CPU_TO_DMA(fsl_chan,
+			new_desc->async_tx.phys | FSL_DMA_EOSIE |
+			(((fsl_chan->feature & FSL_DMA_IP_MASK)
+				== FSL_DMA_IP_83XX) ? FSL_DMA_SNEN : 0), 64);
+}
+
+/**
+ * fsl_chan_set_src_loop_size - Set source address hold transfer size
+ * @fsl_chan : Freescale DMA channel
+ * @size     : Address loop size, 0 for disable loop
+ *
+ * The set source address hold transfer size. The source
+ * address hold or loop transfer size is when the DMA transfer
+ * data from source address (SA), if the loop size is 4, the DMA will
+ * read data from SA, SA + 1, SA + 2, SA + 3, then loop back to SA,
+ * SA + 1 ... and so on.
+ */
+static void fsl_chan_set_src_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+{
+	switch (size) {
+	case 0:
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
+			(~FSL_DMA_MR_SAHE), 32);
+		break;
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
+			FSL_DMA_MR_SAHE | (__ilog2(size) << 14),
+			32);
+		break;
+	}
+}
+
+/**
+ * fsl_chan_set_dest_loop_size - Set destination address hold transfer size
+ * @fsl_chan : Freescale DMA channel
+ * @size     : Address loop size, 0 for disable loop
+ *
+ * The set destination address hold transfer size. The destination
+ * address hold or loop transfer size is when the DMA transfer
+ * data to destination address (TA), if the loop size is 4, the DMA will
+ * write data to TA, TA + 1, TA + 2, TA + 3, then loop back to TA,
+ * TA + 1 ... and so on.
+ */
+static void fsl_chan_set_dest_loop_size(struct fsl_dma_chan *fsl_chan, int size)
+{
+	switch (size) {
+	case 0:
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) &
+			(~FSL_DMA_MR_DAHE), 32);
+		break;
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32) |
+			FSL_DMA_MR_DAHE | (__ilog2(size) << 16),
+			32);
+		break;
+	}
+}
+
+/**
+ * fsl_chan_toggle_ext_pause - Toggle channel external pause status
+ * @fsl_chan : Freescale DMA channel
+ * @size     : Pause control size, 0 for disable external pause control.
+ *             The maximum is 1024.
+ *
+ * The Freescale DMA channel can be controlled by the external
+ * signal DREQ#. The pause control size is how many bytes are allowed
+ * to transfer before pausing the channel, after which a new assertion
+ * of DREQ# resumes channel operation.
+ */
+static void fsl_chan_toggle_ext_pause(struct fsl_dma_chan *fsl_chan, int size)
+{
+	if (size > 1024)
+		return;
+
+	if (size) {
+		DMA_OUT(fsl_chan, &fsl_chan->reg_base->mr,
+			DMA_IN(fsl_chan, &fsl_chan->reg_base->mr, 32)
+				| ((__ilog2(size) << 24) & 0x0f000000),
+			32);
+		fsl_chan->feature |= FSL_DMA_CHAN_PAUSE_EXT;
+	} else
+		fsl_chan->feature &= ~FSL_DMA_CHAN_PAUSE_EXT;
+}
+
+/**
+ * fsl_chan_toggle_ext_start - Toggle channel external start status
+ * @fsl_chan : Freescale DMA channel
+ * @enable   : 0 is disabled, 1 is enabled.
+ *
+ * If enable the external start, the channel can be started by an
+ * external DMA start pin. So the dma_start() does not start the
+ * transfer immediately. The DMA channel will wait for the
+ * control pin asserted.
+ */
+static void fsl_chan_toggle_ext_start(struct fsl_dma_chan *fsl_chan, int enable)
+{
+	if (enable)
+		fsl_chan->feature |= FSL_DMA_CHAN_START_EXT;
+	else
+		fsl_chan->feature &= ~FSL_DMA_CHAN_START_EXT;
+}
+
+static dma_cookie_t fsl_dma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct fsl_desc_sw *desc = tx_to_fsl_desc(tx);
+	struct fsl_dma_chan *fsl_chan = to_fsl_chan(tx->chan);
+	unsigned long flags;
+	dma_cookie_t cookie;
+
+	/* cookie increment and adding to ld_queue must be atomic */
+	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+	cookie = fsl_chan->common.cookie;
+	cookie++;
+	if (cookie < 0)
+		cookie = 1;
+	desc->async_tx.cookie = cookie;
+	fsl_chan->common.cookie = desc->async_tx.cookie;
+
+	append_ld_queue(fsl_chan, desc);
+	list_splice_init(&desc->async_tx.tx_list, fsl_chan->ld_queue.prev);
+
+	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+
+	return cookie;
+}
+
+/**
+ * fsl_dma_alloc_descriptor - Allocate descriptor from channel's DMA pool.
+ * @fsl_chan : Freescale DMA channel
+ *
+ * Return - The descriptor allocated. NULL for failed.
+ */
+static struct fsl_desc_sw *fsl_dma_alloc_descriptor(
+					struct fsl_dma_chan *fsl_chan)
+{
+	dma_addr_t pdesc;
+	struct fsl_desc_sw *desc_sw;
+
+	desc_sw = dma_pool_alloc(fsl_chan->desc_pool, GFP_ATOMIC, &pdesc);
+	if (desc_sw) {
+		memset(desc_sw, 0, sizeof(struct fsl_desc_sw));
+		dma_async_tx_descriptor_init(&desc_sw->async_tx,
+						&fsl_chan->common);
+		desc_sw->async_tx.tx_submit = fsl_dma_tx_submit;
+		INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
+		desc_sw->async_tx.phys = pdesc;
+	}
+
+	return desc_sw;
+}
+
+
+/**
+ * fsl_dma_alloc_chan_resources - Allocate resources for DMA channel.
+ * @fsl_chan : Freescale DMA channel
+ *
+ * This function will create a dma pool for descriptor allocation.
+ *
+ * Return - The number of descriptors allocated.
+ */
+static int fsl_dma_alloc_chan_resources(struct dma_chan *chan,
+					struct dma_client *client)
+{
+	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+
+	/* Has this channel already been allocated? */
+	if (fsl_chan->desc_pool)
+		return 1;
+
+	/* We need the descriptor to be aligned to 32bytes
+	 * for meeting FSL DMA specification requirement.
+	 */
+	fsl_chan->desc_pool = dma_pool_create("fsl_dma_engine_desc_pool",
+			fsl_chan->dev, sizeof(struct fsl_desc_sw),
+			32, 0);
+	if (!fsl_chan->desc_pool) {
+		dev_err(fsl_chan->dev, "No memory for channel %d "
+			"descriptor dma pool.\n", fsl_chan->id);
+		return 0;
+	}
+
+	return 1;
+}
+
+/**
+ * fsl_dma_free_chan_resources - Free all resources of the channel.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+	struct fsl_desc_sw *desc, *_desc;
+	unsigned long flags;
+
+	dev_dbg(fsl_chan->dev, "Free all channel resources.\n");
+	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+	list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+#ifdef FSL_DMA_LD_DEBUG
+		dev_dbg(fsl_chan->dev,
+				"LD %p will be released.\n", desc);
+#endif
+		list_del(&desc->node);
+		/* free link descriptor */
+		dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+	}
+	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+	dma_pool_destroy(fsl_chan->desc_pool);
+
+	fsl_chan->desc_pool = NULL;
+}
+
+static struct dma_async_tx_descriptor *
+fsl_dma_prep_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct fsl_dma_chan *fsl_chan;
+	struct fsl_desc_sw *new;
+
+	if (!chan)
+		return NULL;
+
+	fsl_chan = to_fsl_chan(chan);
+
+	new = fsl_dma_alloc_descriptor(fsl_chan);
+	if (!new) {
+		dev_err(fsl_chan->dev, "No free memory for link descriptor\n");
+		return NULL;
+	}
+
+	new->async_tx.cookie = -EBUSY;
+	new->async_tx.flags = flags;
+
+	/* Insert the link descriptor to the LD ring */
+	list_add_tail(&new->node, &new->async_tx.tx_list);
+
+	/* Set End-of-link to the last link descriptor of new list*/
+	set_ld_eol(fsl_chan, new);
+
+	return &new->async_tx;
+}
+
+static struct dma_async_tx_descriptor *fsl_dma_prep_memcpy(
+	struct dma_chan *chan, dma_addr_t dma_dest, dma_addr_t dma_src,
+	size_t len, unsigned long flags)
+{
+	struct fsl_dma_chan *fsl_chan;
+	struct fsl_desc_sw *first = NULL, *prev = NULL, *new;
+	size_t copy;
+	LIST_HEAD(link_chain);
+
+	if (!chan)
+		return NULL;
+
+	if (!len)
+		return NULL;
+
+	fsl_chan = to_fsl_chan(chan);
+
+	do {
+
+		/* Allocate the link descriptor from DMA pool */
+		new = fsl_dma_alloc_descriptor(fsl_chan);
+		if (!new) {
+			dev_err(fsl_chan->dev,
+					"No free memory for link descriptor\n");
+			return NULL;
+		}
+#ifdef FSL_DMA_LD_DEBUG
+		dev_dbg(fsl_chan->dev, "new link desc alloc %p\n", new);
+#endif
+
+		copy = min(len, (size_t)FSL_DMA_BCR_MAX_CNT);
+
+		set_desc_cnt(fsl_chan, &new->hw, copy);
+		set_desc_src(fsl_chan, &new->hw, dma_src);
+		set_desc_dest(fsl_chan, &new->hw, dma_dest);
+
+		if (!first)
+			first = new;
+		else
+			set_desc_next(fsl_chan, &prev->hw, new->async_tx.phys);
+
+		new->async_tx.cookie = 0;
+		async_tx_ack(&new->async_tx);
+
+		prev = new;
+		len -= copy;
+		dma_src += copy;
+		dma_dest += copy;
+
+		/* Insert the link descriptor to the LD ring */
+		list_add_tail(&new->node, &first->async_tx.tx_list);
+	} while (len);
+
+	new->async_tx.flags = flags; /* client is in control of this ack */
+	new->async_tx.cookie = -EBUSY;
+
+	/* Set End-of-link to the last link descriptor of new list*/
+	set_ld_eol(fsl_chan, new);
+
+	return first ? &first->async_tx : NULL;
+}
+
+/**
+ * fsl_dma_update_completed_cookie - Update the completed cookie.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_update_completed_cookie(struct fsl_dma_chan *fsl_chan)
+{
+	struct fsl_desc_sw *cur_desc, *desc;
+	dma_addr_t ld_phy;
+
+	ld_phy = get_cdar(fsl_chan) & FSL_DMA_NLDA_MASK;
+
+	if (ld_phy) {
+		cur_desc = NULL;
+		list_for_each_entry(desc, &fsl_chan->ld_queue, node)
+			if (desc->async_tx.phys == ld_phy) {
+				cur_desc = desc;
+				break;
+			}
+
+		if (cur_desc && cur_desc->async_tx.cookie) {
+			if (dma_is_idle(fsl_chan))
+				fsl_chan->completed_cookie =
+					cur_desc->async_tx.cookie;
+			else
+				fsl_chan->completed_cookie =
+					cur_desc->async_tx.cookie - 1;
+		}
+	}
+}
+
+/**
+ * fsl_chan_ld_cleanup - Clean up link descriptors
+ * @fsl_chan : Freescale DMA channel
+ *
+ * This function clean up the ld_queue of DMA channel.
+ * If 'in_intr' is set, the function will move the link descriptor to
+ * the recycle list. Otherwise, free it directly.
+ */
+static void fsl_chan_ld_cleanup(struct fsl_dma_chan *fsl_chan)
+{
+	struct fsl_desc_sw *desc, *_desc;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+	dev_dbg(fsl_chan->dev, "chan completed_cookie = %d\n",
+			fsl_chan->completed_cookie);
+	list_for_each_entry_safe(desc, _desc, &fsl_chan->ld_queue, node) {
+		dma_async_tx_callback callback;
+		void *callback_param;
+
+		if (dma_async_is_complete(desc->async_tx.cookie,
+			    fsl_chan->completed_cookie, fsl_chan->common.cookie)
+				== DMA_IN_PROGRESS)
+			break;
+
+		callback = desc->async_tx.callback;
+		callback_param = desc->async_tx.callback_param;
+
+		/* Remove from ld_queue list */
+		list_del(&desc->node);
+
+		dev_dbg(fsl_chan->dev, "link descriptor %p will be recycle.\n",
+				desc);
+		dma_pool_free(fsl_chan->desc_pool, desc, desc->async_tx.phys);
+
+		/* Run the link descriptor callback function */
+		if (callback) {
+			spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+			dev_dbg(fsl_chan->dev, "link descriptor %p callback\n",
+					desc);
+			callback(callback_param);
+			spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+		}
+	}
+	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+}
+
+/**
+ * fsl_chan_xfer_ld_queue - Transfer link descriptors in channel ld_queue.
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_chan_xfer_ld_queue(struct fsl_dma_chan *fsl_chan)
+{
+	struct list_head *ld_node;
+	dma_addr_t next_dest_addr;
+	unsigned long flags;
+
+	if (!dma_is_idle(fsl_chan))
+		return;
+
+	dma_halt(fsl_chan);
+
+	/* If there are some link descriptors
+	 * not transfered in queue. We need to start it.
+	 */
+	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+
+	/* Find the first un-transfer desciptor */
+	for (ld_node = fsl_chan->ld_queue.next;
+		(ld_node != &fsl_chan->ld_queue)
+			&& (dma_async_is_complete(
+				to_fsl_desc(ld_node)->async_tx.cookie,
+				fsl_chan->completed_cookie,
+				fsl_chan->common.cookie) == DMA_SUCCESS);
+		ld_node = ld_node->next);
+
+	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+
+	if (ld_node != &fsl_chan->ld_queue) {
+		/* Get the ld start address from ld_queue */
+		next_dest_addr = to_fsl_desc(ld_node)->async_tx.phys;
+		dev_dbg(fsl_chan->dev, "xfer LDs staring from %p\n",
+				(void *)next_dest_addr);
+		set_cdar(fsl_chan, next_dest_addr);
+		dma_start(fsl_chan);
+	} else {
+		set_cdar(fsl_chan, 0);
+		set_ndar(fsl_chan, 0);
+	}
+}
+
+/**
+ * fsl_dma_memcpy_issue_pending - Issue the DMA start command
+ * @fsl_chan : Freescale DMA channel
+ */
+static void fsl_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+
+#ifdef FSL_DMA_LD_DEBUG
+	struct fsl_desc_sw *ld;
+	unsigned long flags;
+
+	spin_lock_irqsave(&fsl_chan->desc_lock, flags);
+	if (list_empty(&fsl_chan->ld_queue)) {
+		spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+		return;
+	}
+
+	dev_dbg(fsl_chan->dev, "--memcpy issue--\n");
+	list_for_each_entry(ld, &fsl_chan->ld_queue, node) {
+		int i;
+		dev_dbg(fsl_chan->dev, "Ch %d, LD %08x\n",
+				fsl_chan->id, ld->async_tx.phys);
+		for (i = 0; i < 8; i++)
+			dev_dbg(fsl_chan->dev, "LD offset %d: %08x\n",
+					i, *(((u32 *)&ld->hw) + i));
+	}
+	dev_dbg(fsl_chan->dev, "----------------\n");
+	spin_unlock_irqrestore(&fsl_chan->desc_lock, flags);
+#endif
+
+	fsl_chan_xfer_ld_queue(fsl_chan);
+}
+
+/**
+ * fsl_dma_is_complete - Determine the DMA status
+ * @fsl_chan : Freescale DMA channel
+ */
+static enum dma_status fsl_dma_is_complete(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					dma_cookie_t *done,
+					dma_cookie_t *used)
+{
+	struct fsl_dma_chan *fsl_chan = to_fsl_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+
+	fsl_chan_ld_cleanup(fsl_chan);
+
+	last_used = chan->cookie;
+	last_complete = fsl_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t fsl_dma_chan_do_interrupt(int irq, void *data)
+{
+	struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
+	u32 stat;
+	int update_cookie = 0;
+	int xfer_ld_q = 0;
+
+	stat = get_sr(fsl_chan);
+	dev_dbg(fsl_chan->dev, "event: channel %d, stat = 0x%x\n",
+						fsl_chan->id, stat);
+	set_sr(fsl_chan, stat);		/* Clear the event register */
+
+	stat &= ~(FSL_DMA_SR_CB | FSL_DMA_SR_CH);
+	if (!stat)
+		return IRQ_NONE;
+
+	if (stat & FSL_DMA_SR_TE)
+		dev_err(fsl_chan->dev, "Transfer Error!\n");
+
+	/* Programming Error
+	 * The DMA_INTERRUPT async_tx is a NULL transfer, which will
+	 * triger a PE interrupt.
+	 */
+	if (stat & FSL_DMA_SR_PE) {
+		dev_dbg(fsl_chan->dev, "event: Programming Error INT\n");
+		if (get_bcr(fsl_chan) == 0) {
+			/* BCR register is 0, this is a DMA_INTERRUPT async_tx.
+			 * Now, update the completed cookie, and continue the
+			 * next uncompleted transfer.
+			 */
+			update_cookie = 1;
+			xfer_ld_q = 1;
+		}
+		stat &= ~FSL_DMA_SR_PE;
+	}
+
+	/* If the link descriptor segment transfer finishes,
+	 * we will recycle the used descriptor.
+	 */
+	if (stat & FSL_DMA_SR_EOSI) {
+		dev_dbg(fsl_chan->dev, "event: End-of-segments INT\n");
+		dev_dbg(fsl_chan->dev, "event: clndar %p, nlndar %p\n",
+			(void *)get_cdar(fsl_chan), (void *)get_ndar(fsl_chan));
+		stat &= ~FSL_DMA_SR_EOSI;
+		update_cookie = 1;
+	}
+
+	/* For MPC8349, EOCDI event need to update cookie
+	 * and start the next transfer if it exist.
+	 */
+	if (stat & FSL_DMA_SR_EOCDI) {
+		dev_dbg(fsl_chan->dev, "event: End-of-Chain link INT\n");
+		stat &= ~FSL_DMA_SR_EOCDI;
+		update_cookie = 1;
+		xfer_ld_q = 1;
+	}
+
+	/* If it current transfer is the end-of-transfer,
+	 * we should clear the Channel Start bit for
+	 * prepare next transfer.
+	 */
+	if (stat & FSL_DMA_SR_EOLNI) {
+		dev_dbg(fsl_chan->dev, "event: End-of-link INT\n");
+		stat &= ~FSL_DMA_SR_EOLNI;
+		xfer_ld_q = 1;
+	}
+
+	if (update_cookie)
+		fsl_dma_update_completed_cookie(fsl_chan);
+	if (xfer_ld_q)
+		fsl_chan_xfer_ld_queue(fsl_chan);
+	if (stat)
+		dev_dbg(fsl_chan->dev, "event: unhandled sr 0x%02x\n",
+					stat);
+
+	dev_dbg(fsl_chan->dev, "event: Exit\n");
+	tasklet_schedule(&fsl_chan->tasklet);
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t fsl_dma_do_interrupt(int irq, void *data)
+{
+	struct fsl_dma_device *fdev = (struct fsl_dma_device *)data;
+	u32 gsr;
+	int ch_nr;
+
+	gsr = (fdev->feature & FSL_DMA_BIG_ENDIAN) ? in_be32(fdev->reg_base)
+			: in_le32(fdev->reg_base);
+	ch_nr = (32 - ffs(gsr)) / 8;
+
+	return fdev->chan[ch_nr] ? fsl_dma_chan_do_interrupt(irq,
+			fdev->chan[ch_nr]) : IRQ_NONE;
+}
+
+static void dma_do_tasklet(unsigned long data)
+{
+	struct fsl_dma_chan *fsl_chan = (struct fsl_dma_chan *)data;
+	fsl_chan_ld_cleanup(fsl_chan);
+}
+
+static int __devinit fsl_dma_chan_probe(struct fsl_dma_device *fdev,
+	struct device_node *node, u32 feature, const char *compatible)
+{
+	struct fsl_dma_chan *new_fsl_chan;
+	int err;
+
+	/* alloc channel */
+	new_fsl_chan = kzalloc(sizeof(struct fsl_dma_chan), GFP_KERNEL);
+	if (!new_fsl_chan) {
+		dev_err(fdev->dev, "No free memory for allocating "
+				"dma channels!\n");
+		return -ENOMEM;
+	}
+
+	/* get dma channel register base */
+	err = of_address_to_resource(node, 0, &new_fsl_chan->reg);
+	if (err) {
+		dev_err(fdev->dev, "Can't get %s property 'reg'\n",
+				node->full_name);
+		goto err_no_reg;
+	}
+
+	new_fsl_chan->feature = feature;
+
+	if (!fdev->feature)
+		fdev->feature = new_fsl_chan->feature;
+
+	/* If the DMA device's feature is different than its channels',
+	 * report the bug.
+	 */
+	WARN_ON(fdev->feature != new_fsl_chan->feature);
+
+	new_fsl_chan->dev = &new_fsl_chan->common.dev;
+	new_fsl_chan->reg_base = ioremap(new_fsl_chan->reg.start,
+			new_fsl_chan->reg.end - new_fsl_chan->reg.start + 1);
+
+	new_fsl_chan->id = ((new_fsl_chan->reg.start - 0x100) & 0xfff) >> 7;
+	if (new_fsl_chan->id > FSL_DMA_MAX_CHANS_PER_DEVICE) {
+		dev_err(fdev->dev, "There is no %d channel!\n",
+				new_fsl_chan->id);
+		err = -EINVAL;
+		goto err_no_chan;
+	}
+	fdev->chan[new_fsl_chan->id] = new_fsl_chan;
+	tasklet_init(&new_fsl_chan->tasklet, dma_do_tasklet,
+			(unsigned long)new_fsl_chan);
+
+	/* Init the channel */
+	dma_init(new_fsl_chan);
+
+	/* Clear cdar registers */
+	set_cdar(new_fsl_chan, 0);
+
+	switch (new_fsl_chan->feature & FSL_DMA_IP_MASK) {
+	case FSL_DMA_IP_85XX:
+		new_fsl_chan->toggle_ext_start = fsl_chan_toggle_ext_start;
+		new_fsl_chan->toggle_ext_pause = fsl_chan_toggle_ext_pause;
+	case FSL_DMA_IP_83XX:
+		new_fsl_chan->set_src_loop_size = fsl_chan_set_src_loop_size;
+		new_fsl_chan->set_dest_loop_size = fsl_chan_set_dest_loop_size;
+	}
+
+	spin_lock_init(&new_fsl_chan->desc_lock);
+	INIT_LIST_HEAD(&new_fsl_chan->ld_queue);
+
+	new_fsl_chan->common.device = &fdev->common;
+
+	/* Add the channel to DMA device channel list */
+	list_add_tail(&new_fsl_chan->common.device_node,
+			&fdev->common.channels);
+	fdev->common.chancnt++;
+
+	new_fsl_chan->irq = irq_of_parse_and_map(node, 0);
+	if (new_fsl_chan->irq != NO_IRQ) {
+		err = request_irq(new_fsl_chan->irq,
+					&fsl_dma_chan_do_interrupt, IRQF_SHARED,
+					"fsldma-channel", new_fsl_chan);
+		if (err) {
+			dev_err(fdev->dev, "DMA channel %s request_irq error "
+				"with return %d\n", node->full_name, err);
+			goto err_no_irq;
+		}
+	}
+
+	dev_info(fdev->dev, "#%d (%s), irq %d\n", new_fsl_chan->id,
+				compatible, new_fsl_chan->irq);
+
+	return 0;
+
+err_no_irq:
+	list_del(&new_fsl_chan->common.device_node);
+err_no_chan:
+	iounmap(new_fsl_chan->reg_base);
+err_no_reg:
+	kfree(new_fsl_chan);
+	return err;
+}
+
+static void fsl_dma_chan_remove(struct fsl_dma_chan *fchan)
+{
+	free_irq(fchan->irq, fchan);
+	list_del(&fchan->common.device_node);
+	iounmap(fchan->reg_base);
+	kfree(fchan);
+}
+
+static int __devinit of_fsl_dma_probe(struct of_device *dev,
+			const struct of_device_id *match)
+{
+	int err;
+	struct fsl_dma_device *fdev;
+	struct device_node *child;
+
+	fdev = kzalloc(sizeof(struct fsl_dma_device), GFP_KERNEL);
+	if (!fdev) {
+		dev_err(&dev->dev, "No enough memory for 'priv'\n");
+		return -ENOMEM;
+	}
+	fdev->dev = &dev->dev;
+	INIT_LIST_HEAD(&fdev->common.channels);
+
+	/* get DMA controller register base */
+	err = of_address_to_resource(dev->node, 0, &fdev->reg);
+	if (err) {
+		dev_err(&dev->dev, "Can't get %s property 'reg'\n",
+				dev->node->full_name);
+		goto err_no_reg;
+	}
+
+	dev_info(&dev->dev, "Probe the Freescale DMA driver for %s "
+			"controller at %p...\n",
+			match->compatible, (void *)fdev->reg.start);
+	fdev->reg_base = ioremap(fdev->reg.start, fdev->reg.end
+						- fdev->reg.start + 1);
+
+	dma_cap_set(DMA_MEMCPY, fdev->common.cap_mask);
+	dma_cap_set(DMA_INTERRUPT, fdev->common.cap_mask);
+	fdev->common.device_alloc_chan_resources = fsl_dma_alloc_chan_resources;
+	fdev->common.device_free_chan_resources = fsl_dma_free_chan_resources;
+	fdev->common.device_prep_dma_interrupt = fsl_dma_prep_interrupt;
+	fdev->common.device_prep_dma_memcpy = fsl_dma_prep_memcpy;
+	fdev->common.device_is_tx_complete = fsl_dma_is_complete;
+	fdev->common.device_issue_pending = fsl_dma_memcpy_issue_pending;
+	fdev->common.dev = &dev->dev;
+
+	fdev->irq = irq_of_parse_and_map(dev->node, 0);
+	if (fdev->irq != NO_IRQ) {
+		err = request_irq(fdev->irq, &fsl_dma_do_interrupt, IRQF_SHARED,
+					"fsldma-device", fdev);
+		if (err) {
+			dev_err(&dev->dev, "DMA device request_irq error "
+				"with return %d\n", err);
+			goto err;
+		}
+	}
+
+	dev_set_drvdata(&(dev->dev), fdev);
+
+	/* We cannot use of_platform_bus_probe() because there is no
+	 * of_platform_bus_remove.  Instead, we manually instantiate every DMA
+	 * channel object.
+	 */
+	for_each_child_of_node(dev->node, child) {
+		if (of_device_is_compatible(child, "fsl,eloplus-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_85XX | FSL_DMA_BIG_ENDIAN,
+				"fsl,eloplus-dma-channel");
+		if (of_device_is_compatible(child, "fsl,elo-dma-channel"))
+			fsl_dma_chan_probe(fdev, child,
+				FSL_DMA_IP_83XX | FSL_DMA_LITTLE_ENDIAN,
+				"fsl,elo-dma-channel");
+	}
+
+	dma_async_device_register(&fdev->common);
+	return 0;
+
+err:
+	iounmap(fdev->reg_base);
+err_no_reg:
+	kfree(fdev);
+	return err;
+}
+
+static int of_fsl_dma_remove(struct of_device *of_dev)
+{
+	struct fsl_dma_device *fdev;
+	unsigned int i;
+
+	fdev = dev_get_drvdata(&of_dev->dev);
+
+	dma_async_device_unregister(&fdev->common);
+
+	for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++)
+		if (fdev->chan[i])
+			fsl_dma_chan_remove(fdev->chan[i]);
+
+	if (fdev->irq != NO_IRQ)
+		free_irq(fdev->irq, fdev);
+
+	iounmap(fdev->reg_base);
+
+	kfree(fdev);
+	dev_set_drvdata(&of_dev->dev, NULL);
+
+	return 0;
+}
+
+static struct of_device_id of_fsl_dma_ids[] = {
+	{ .compatible = "fsl,eloplus-dma", },
+	{ .compatible = "fsl,elo-dma", },
+	{}
+};
+
+static struct of_platform_driver of_fsl_dma_driver = {
+	.name = "fsl-elo-dma",
+	.match_table = of_fsl_dma_ids,
+	.probe = of_fsl_dma_probe,
+	.remove = of_fsl_dma_remove,
+};
+
+static __init int of_fsl_dma_init(void)
+{
+	int ret;
+
+	pr_info("Freescale Elo / Elo Plus DMA driver\n");
+
+	ret = of_register_platform_driver(&of_fsl_dma_driver);
+	if (ret)
+		pr_err("fsldma: failed to register platform driver\n");
+
+	return ret;
+}
+
+static void __exit of_fsl_dma_exit(void)
+{
+	of_unregister_platform_driver(&of_fsl_dma_driver);
+}
+
+subsys_initcall(of_fsl_dma_init);
+module_exit(of_fsl_dma_exit);
+
+MODULE_DESCRIPTION("Freescale Elo / Elo Plus DMA driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/fsldma.h b/drivers/dma/fsldma.h
new file mode 100644
index 0000000..4f21a51
--- /dev/null
+++ b/drivers/dma/fsldma.h
@@ -0,0 +1,198 @@
+/*
+ * Copyright (C) 2007 Freescale Semiconductor, Inc. All rights reserved.
+ *
+ * Author:
+ *   Zhang Wei <wei.zhang@freescale.com>, Jul 2007
+ *   Ebony Zhu <ebony.zhu@freescale.com>, May 2007
+ *
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+#ifndef __DMA_FSLDMA_H
+#define __DMA_FSLDMA_H
+
+#include <linux/device.h>
+#include <linux/dmapool.h>
+#include <linux/dmaengine.h>
+
+/* Define data structures needed by Freescale
+ * MPC8540 and MPC8349 DMA controller.
+ */
+#define FSL_DMA_MR_CS		0x00000001
+#define FSL_DMA_MR_CC		0x00000002
+#define FSL_DMA_MR_CA		0x00000008
+#define FSL_DMA_MR_EIE		0x00000040
+#define FSL_DMA_MR_XFE		0x00000020
+#define FSL_DMA_MR_EOLNIE	0x00000100
+#define FSL_DMA_MR_EOLSIE	0x00000080
+#define FSL_DMA_MR_EOSIE	0x00000200
+#define FSL_DMA_MR_CDSM		0x00000010
+#define FSL_DMA_MR_CTM		0x00000004
+#define FSL_DMA_MR_EMP_EN	0x00200000
+#define FSL_DMA_MR_EMS_EN	0x00040000
+#define FSL_DMA_MR_DAHE		0x00002000
+#define FSL_DMA_MR_SAHE		0x00001000
+
+/* Special MR definition for MPC8349 */
+#define FSL_DMA_MR_EOTIE	0x00000080
+
+#define FSL_DMA_SR_CH		0x00000020
+#define FSL_DMA_SR_PE		0x00000010
+#define FSL_DMA_SR_CB		0x00000004
+#define FSL_DMA_SR_TE		0x00000080
+#define FSL_DMA_SR_EOSI		0x00000002
+#define FSL_DMA_SR_EOLSI	0x00000001
+#define FSL_DMA_SR_EOCDI	0x00000001
+#define FSL_DMA_SR_EOLNI	0x00000008
+
+#define FSL_DMA_SATR_SBPATMU			0x20000000
+#define FSL_DMA_SATR_STRANSINT_RIO		0x00c00000
+#define FSL_DMA_SATR_SREADTYPE_SNOOP_READ	0x00050000
+#define FSL_DMA_SATR_SREADTYPE_BP_IORH		0x00020000
+#define FSL_DMA_SATR_SREADTYPE_BP_NREAD		0x00040000
+#define FSL_DMA_SATR_SREADTYPE_BP_MREAD		0x00070000
+
+#define FSL_DMA_DATR_DBPATMU			0x20000000
+#define FSL_DMA_DATR_DTRANSINT_RIO		0x00c00000
+#define FSL_DMA_DATR_DWRITETYPE_SNOOP_WRITE	0x00050000
+#define FSL_DMA_DATR_DWRITETYPE_BP_FLUSH	0x00010000
+
+#define FSL_DMA_EOL		((u64)0x1)
+#define FSL_DMA_SNEN		((u64)0x10)
+#define FSL_DMA_EOSIE		0x8
+#define FSL_DMA_NLDA_MASK	(~(u64)0x1f)
+
+#define FSL_DMA_BCR_MAX_CNT	0x03ffffffu
+
+#define FSL_DMA_DGSR_TE		0x80
+#define FSL_DMA_DGSR_CH		0x20
+#define FSL_DMA_DGSR_PE		0x10
+#define FSL_DMA_DGSR_EOLNI	0x08
+#define FSL_DMA_DGSR_CB		0x04
+#define FSL_DMA_DGSR_EOSI	0x02
+#define FSL_DMA_DGSR_EOLSI	0x01
+
+typedef u64 __bitwise v64;
+typedef u32 __bitwise v32;
+
+struct fsl_dma_ld_hw {
+	v64 src_addr;
+	v64 dst_addr;
+	v64 next_ln_addr;
+	v32 count;
+	v32 reserve;
+} __attribute__((aligned(32)));
+
+struct fsl_desc_sw {
+	struct fsl_dma_ld_hw hw;
+	struct list_head node;
+	struct dma_async_tx_descriptor async_tx;
+	struct list_head *ld;
+	void *priv;
+} __attribute__((aligned(32)));
+
+struct fsl_dma_chan_regs {
+	u32 mr;	/* 0x00 - Mode Register */
+	u32 sr;	/* 0x04 - Status Register */
+	u64 cdar;	/* 0x08 - Current descriptor address register */
+	u64 sar;	/* 0x10 - Source Address Register */
+	u64 dar;	/* 0x18 - Destination Address Register */
+	u32 bcr;	/* 0x20 - Byte Count Register */
+	u64 ndar;	/* 0x24 - Next Descriptor Address Register */
+};
+
+struct fsl_dma_chan;
+#define FSL_DMA_MAX_CHANS_PER_DEVICE 4
+
+struct fsl_dma_device {
+	void __iomem *reg_base;	/* DGSR register base */
+	struct resource reg;	/* Resource for register */
+	struct device *dev;
+	struct dma_device common;
+	struct fsl_dma_chan *chan[FSL_DMA_MAX_CHANS_PER_DEVICE];
+	u32 feature;		/* The same as DMA channels */
+	int irq;		/* Channel IRQ */
+};
+
+/* Define macros for fsl_dma_chan->feature property */
+#define FSL_DMA_LITTLE_ENDIAN	0x00000000
+#define FSL_DMA_BIG_ENDIAN	0x00000001
+
+#define FSL_DMA_IP_MASK		0x00000ff0
+#define FSL_DMA_IP_85XX		0x00000010
+#define FSL_DMA_IP_83XX		0x00000020
+
+#define FSL_DMA_CHAN_PAUSE_EXT	0x00001000
+#define FSL_DMA_CHAN_START_EXT	0x00002000
+
+struct fsl_dma_chan {
+	struct fsl_dma_chan_regs __iomem *reg_base;
+	dma_cookie_t completed_cookie;	/* The maximum cookie completed */
+	spinlock_t desc_lock;		/* Descriptor operation lock */
+	struct list_head ld_queue;	/* Link descriptors queue */
+	struct dma_chan common;		/* DMA common channel */
+	struct dma_pool *desc_pool;	/* Descriptors pool */
+	struct device *dev;		/* Channel device */
+	struct resource reg;		/* Resource for register */
+	int irq;			/* Channel IRQ */
+	int id;				/* Raw id of this channel */
+	struct tasklet_struct tasklet;
+	u32 feature;
+
+	void (*toggle_ext_pause)(struct fsl_dma_chan *fsl_chan, int size);
+	void (*toggle_ext_start)(struct fsl_dma_chan *fsl_chan, int enable);
+	void (*set_src_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+	void (*set_dest_loop_size)(struct fsl_dma_chan *fsl_chan, int size);
+};
+
+#define to_fsl_chan(chan) container_of(chan, struct fsl_dma_chan, common)
+#define to_fsl_desc(lh) container_of(lh, struct fsl_desc_sw, node)
+#define tx_to_fsl_desc(tx) container_of(tx, struct fsl_desc_sw, async_tx)
+
+#ifndef __powerpc64__
+static u64 in_be64(const u64 __iomem *addr)
+{
+	return ((u64)in_be32((u32 __iomem *)addr) << 32) |
+		(in_be32((u32 __iomem *)addr + 1));
+}
+
+static void out_be64(u64 __iomem *addr, u64 val)
+{
+	out_be32((u32 __iomem *)addr, val >> 32);
+	out_be32((u32 __iomem *)addr + 1, (u32)val);
+}
+
+/* There is no asm instructions for 64 bits reverse loads and stores */
+static u64 in_le64(const u64 __iomem *addr)
+{
+	return ((u64)in_le32((u32 __iomem *)addr + 1) << 32) |
+		(in_le32((u32 __iomem *)addr));
+}
+
+static void out_le64(u64 __iomem *addr, u64 val)
+{
+	out_le32((u32 __iomem *)addr + 1, val >> 32);
+	out_le32((u32 __iomem *)addr, (u32)val);
+}
+#endif
+
+#define DMA_IN(fsl_chan, addr, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			in_be##width(addr) : in_le##width(addr))
+#define DMA_OUT(fsl_chan, addr, val, width)				\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			out_be##width(addr, val) : out_le##width(addr, val))
+
+#define DMA_TO_CPU(fsl_chan, d, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			be##width##_to_cpu((__force __be##width)(v##width)d) : \
+			le##width##_to_cpu((__force __le##width)(v##width)d))
+#define CPU_TO_DMA(fsl_chan, c, width)					\
+		(((fsl_chan)->feature & FSL_DMA_BIG_ENDIAN) ?		\
+			(__force v##width)cpu_to_be##width(c) :		\
+			(__force v##width)cpu_to_le##width(c))
+
+#endif	/* __DMA_FSLDMA_H */
diff --git a/drivers/dma/ioat.c b/drivers/dma/ioat.c
new file mode 100644
index 0000000..9b16a3a
--- /dev/null
+++ b/drivers/dma/ioat.c
@@ -0,0 +1,226 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+#include "ioatdma_hw.h"
+
+MODULE_VERSION(IOAT_DMA_VERSION);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Intel Corporation");
+
+static struct pci_device_id ioat_pci_tbl[] = {
+	/* I/OAT v1 platforms */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_CNB)  },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SCNB) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_UNISYS, PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR) },
+
+	/* I/OAT v2 platforms */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB) },
+
+	/* I/OAT v3 platforms */
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG0) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG1) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG2) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG3) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG4) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG5) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG6) },
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_TBG7) },
+	{ 0, }
+};
+
+struct ioat_device {
+	struct pci_dev		*pdev;
+	void __iomem		*iobase;
+	struct ioatdma_device	*dma;
+	struct dca_provider	*dca;
+};
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id);
+static void __devexit ioat_remove(struct pci_dev *pdev);
+
+static int ioat_dca_enabled = 1;
+module_param(ioat_dca_enabled, int, 0644);
+MODULE_PARM_DESC(ioat_dca_enabled, "control support of dca service (default: 1)");
+
+static int ioat_setup_functionality(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct ioat_device *device = pci_get_drvdata(pdev);
+	u8 version;
+	int err = 0;
+
+	version = readb(iobase + IOAT_VER_OFFSET);
+	switch (version) {
+	case IOAT_VER_1_2:
+		device->dma = ioat_dma_probe(pdev, iobase);
+		if (device->dma && ioat_dca_enabled)
+			device->dca = ioat_dca_init(pdev, iobase);
+		break;
+	case IOAT_VER_2_0:
+		device->dma = ioat_dma_probe(pdev, iobase);
+		if (device->dma && ioat_dca_enabled)
+			device->dca = ioat2_dca_init(pdev, iobase);
+		break;
+	case IOAT_VER_3_0:
+		device->dma = ioat_dma_probe(pdev, iobase);
+		if (device->dma && ioat_dca_enabled)
+			device->dca = ioat3_dca_init(pdev, iobase);
+		break;
+	default:
+		err = -ENODEV;
+		break;
+	}
+	if (!device->dma)
+		err = -ENODEV;
+	return err;
+}
+
+static void ioat_shutdown_functionality(struct pci_dev *pdev)
+{
+	struct ioat_device *device = pci_get_drvdata(pdev);
+
+	dev_err(&pdev->dev, "Removing dma and dca services\n");
+	if (device->dca) {
+		unregister_dca_provider(device->dca);
+		free_dca_provider(device->dca);
+		device->dca = NULL;
+	}
+
+	if (device->dma) {
+		ioat_dma_remove(device->dma);
+		device->dma = NULL;
+	}
+}
+
+static struct pci_driver ioat_pci_driver = {
+	.name		= "ioatdma",
+	.id_table	= ioat_pci_tbl,
+	.probe		= ioat_probe,
+	.shutdown	= ioat_shutdown_functionality,
+	.remove		= __devexit_p(ioat_remove),
+};
+
+static int __devinit ioat_probe(struct pci_dev *pdev,
+				const struct pci_device_id *id)
+{
+	void __iomem *iobase;
+	struct ioat_device *device;
+	unsigned long mmio_start, mmio_len;
+	int err;
+
+	err = pci_enable_device(pdev);
+	if (err)
+		goto err_enable_device;
+
+	err = pci_request_regions(pdev, ioat_pci_driver.name);
+	if (err)
+		goto err_request_regions;
+
+	err = pci_set_dma_mask(pdev, DMA_64BIT_MASK);
+	if (err)
+		err = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+	if (err)
+		goto err_set_dma_mask;
+
+	err = pci_set_consistent_dma_mask(pdev, DMA_64BIT_MASK);
+	if (err)
+		err = pci_set_consistent_dma_mask(pdev, DMA_32BIT_MASK);
+	if (err)
+		goto err_set_dma_mask;
+
+	mmio_start = pci_resource_start(pdev, 0);
+	mmio_len = pci_resource_len(pdev, 0);
+	iobase = ioremap(mmio_start, mmio_len);
+	if (!iobase) {
+		err = -ENOMEM;
+		goto err_ioremap;
+	}
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device) {
+		err = -ENOMEM;
+		goto err_kzalloc;
+	}
+	device->pdev = pdev;
+	pci_set_drvdata(pdev, device);
+	device->iobase = iobase;
+
+	pci_set_master(pdev);
+
+	err = ioat_setup_functionality(pdev, iobase);
+	if (err)
+		goto err_version;
+
+	return 0;
+
+err_version:
+	kfree(device);
+err_kzalloc:
+	iounmap(iobase);
+err_ioremap:
+err_set_dma_mask:
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
+err_request_regions:
+err_enable_device:
+	return err;
+}
+
+/*
+ * It is unsafe to remove this module: if removed while a requested
+ * dma is outstanding, esp. from tcp, it is possible to hang while
+ * waiting for something that will never finish.  However, if you're
+ * feeling lucky, this usually works just fine.
+ */
+static void __devexit ioat_remove(struct pci_dev *pdev)
+{
+	struct ioat_device *device = pci_get_drvdata(pdev);
+
+	ioat_shutdown_functionality(pdev);
+
+	kfree(device);
+}
+
+static int __init ioat_init_module(void)
+{
+	return pci_register_driver(&ioat_pci_driver);
+}
+module_init(ioat_init_module);
+
+static void __exit ioat_exit_module(void)
+{
+	pci_unregister_driver(&ioat_pci_driver);
+}
+module_exit(ioat_exit_module);
diff --git a/drivers/dma/ioat_dca.c b/drivers/dma/ioat_dca.c
new file mode 100644
index 0000000..6cf622d
--- /dev/null
+++ b/drivers/dma/ioat_dca.c
@@ -0,0 +1,657 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/smp.h>
+#include <linux/interrupt.h>
+#include <linux/dca.h>
+
+/* either a kernel change is needed, or we need something like this in kernel */
+#ifndef CONFIG_SMP
+#include <asm/smp.h>
+#undef cpu_physical_id
+#define cpu_physical_id(cpu) (cpuid_ebx(1) >> 24)
+#endif
+
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+
+/*
+ * Bit 7 of a tag map entry is the "valid" bit, if it is set then bits 0:6
+ * contain the bit number of the APIC ID to map into the DCA tag.  If the valid
+ * bit is not set, then the value must be 0 or 1 and defines the bit in the tag.
+ */
+#define DCA_TAG_MAP_VALID 0x80
+
+#define DCA3_TAG_MAP_BIT_TO_INV 0x80
+#define DCA3_TAG_MAP_BIT_TO_SEL 0x40
+#define DCA3_TAG_MAP_LITERAL_VAL 0x1
+
+#define DCA_TAG_MAP_MASK 0xDF
+
+/*
+ * "Legacy" DCA systems do not implement the DCA register set in the
+ * I/OAT device.  Software needs direct support for their tag mappings.
+ */
+
+#define APICID_BIT(x)		(DCA_TAG_MAP_VALID | (x))
+#define IOAT_TAG_MAP_LEN	8
+
+static u8 ioat_tag_map_BNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_SCNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(2), APICID_BIT(2), };
+static u8 ioat_tag_map_CNB[IOAT_TAG_MAP_LEN] = {
+	1, APICID_BIT(1), APICID_BIT(3), APICID_BIT(4), APICID_BIT(2), };
+static u8 ioat_tag_map_UNISYS[IOAT_TAG_MAP_LEN] = { 0 };
+
+/* pack PCI B/D/F into a u16 */
+static inline u16 dcaid_from_pcidev(struct pci_dev *pci)
+{
+	return (pci->bus->number << 8) | pci->devfn;
+}
+
+static int dca_enabled_in_bios(struct pci_dev *pdev)
+{
+	/* CPUID level 9 returns DCA configuration */
+	/* Bit 0 indicates DCA enabled by the BIOS */
+	unsigned long cpuid_level_9;
+	int res;
+
+	cpuid_level_9 = cpuid_eax(9);
+	res = test_bit(0, &cpuid_level_9);
+	if (!res)
+		dev_err(&pdev->dev, "DCA is disabled in BIOS\n");
+
+	return res;
+}
+
+static int system_has_dca_enabled(struct pci_dev *pdev)
+{
+	if (boot_cpu_has(X86_FEATURE_DCA))
+		return dca_enabled_in_bios(pdev);
+
+	dev_err(&pdev->dev, "boot cpu doesn't have X86_FEATURE_DCA\n");
+	return 0;
+}
+
+struct ioat_dca_slot {
+	struct pci_dev *pdev;	/* requester device */
+	u16 rid;		/* requester id, as used by IOAT */
+};
+
+#define IOAT_DCA_MAX_REQ 6
+#define IOAT3_DCA_MAX_REQ 2
+
+struct ioat_dca_priv {
+	void __iomem		*iobase;
+	void __iomem		*dca_base;
+	int			 max_requesters;
+	int			 requester_count;
+	u8			 tag_map[IOAT_TAG_MAP_LEN];
+	struct ioat_dca_slot 	 req_slots[0];
+};
+
+/* 5000 series chipset DCA Port Requester ID Table Entry Format
+ * [15:8]	PCI-Express Bus Number
+ * [7:3]	PCI-Express Device Number
+ * [2:0]	PCI-Express Function Number
+ *
+ * 5000 series chipset DCA control register format
+ * [7:1]	Reserved (0)
+ * [0]		Ignore Function Number
+ */
+
+static int ioat_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			writew(id, ioatdca->dca_base + (i * 4));
+			/* make sure the ignore function bit is off */
+			writeb(0, ioatdca->dca_base + (i * 4) + 2);
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat_dca_remove_requester(struct dca_provider *dca,
+				     struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			writew(0, ioatdca->dca_base + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat_dca_get_tag(struct dca_provider *dca,
+			   struct device *dev,
+			   int cpu)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	int i, apic_id, bit, value;
+	u8 entry, tag;
+
+	tag = 0;
+	apic_id = cpu_physical_id(cpu);
+
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+		entry = ioatdca->tag_map[i];
+		if (entry & DCA_TAG_MAP_VALID) {
+			bit = entry & ~DCA_TAG_MAP_VALID;
+			value = (apic_id & (1 << bit)) ? 1 : 0;
+		} else {
+			value = entry ? 1 : 0;
+		}
+		tag |= (value << i);
+	}
+	return tag;
+}
+
+static int ioat_dca_dev_managed(struct dca_provider *dca,
+				struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+
+	pdev = to_pci_dev(dev);
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev)
+			return 1;
+	}
+	return 0;
+}
+
+static struct dca_ops ioat_dca_ops = {
+	.add_requester		= ioat_dca_add_requester,
+	.remove_requester	= ioat_dca_remove_requester,
+	.get_tag		= ioat_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	u8 *tag_map = NULL;
+	int i;
+	int err;
+	u8 version;
+	u8 max_requesters;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	/* I/OAT v1 systems must have a known tag_map to support DCA */
+	switch (pdev->vendor) {
+	case PCI_VENDOR_ID_INTEL:
+		switch (pdev->device) {
+		case PCI_DEVICE_ID_INTEL_IOAT:
+			tag_map = ioat_tag_map_BNB;
+			break;
+		case PCI_DEVICE_ID_INTEL_IOAT_CNB:
+			tag_map = ioat_tag_map_CNB;
+			break;
+		case PCI_DEVICE_ID_INTEL_IOAT_SCNB:
+			tag_map = ioat_tag_map_SCNB;
+			break;
+		}
+		break;
+	case PCI_VENDOR_ID_UNISYS:
+		switch (pdev->device) {
+		case PCI_DEVICE_ID_UNISYS_DMA_DIRECTOR:
+			tag_map = ioat_tag_map_UNISYS;
+			break;
+		}
+		break;
+	}
+	if (tag_map == NULL)
+		return NULL;
+
+	version = readb(iobase + IOAT_VER_OFFSET);
+	if (version == IOAT_VER_3_0)
+		max_requesters = IOAT3_DCA_MAX_REQ;
+	else
+		max_requesters = IOAT_DCA_MAX_REQ;
+
+	dca = alloc_dca_provider(&ioat_dca_ops,
+			sizeof(*ioatdca) +
+			(sizeof(struct ioat_dca_slot) * max_requesters));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->max_requesters = max_requesters;
+	ioatdca->dca_base = iobase + 0x54;
+
+	/* copy over the APIC ID to DCA tag mapping */
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++)
+		ioatdca->tag_map[i] = tag_map[i];
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
+
+
+static int ioat2_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+			writel(id | IOAT_DCA_GREQID_VALID,
+			       ioatdca->iobase + global_req_table + (i * 4));
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat2_dca_remove_requester(struct dca_provider *dca,
+				      struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT_DCA_GREQID_OFFSET);
+			writel(0, ioatdca->iobase + global_req_table + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat2_dca_get_tag(struct dca_provider *dca,
+			    struct device *dev,
+			    int cpu)
+{
+	u8 tag;
+
+	tag = ioat_dca_get_tag(dca, dev, cpu);
+	tag = (~tag) & 0x1F;
+	return tag;
+}
+
+static struct dca_ops ioat2_dca_ops = {
+	.add_requester		= ioat2_dca_add_requester,
+	.remove_requester	= ioat2_dca_remove_requester,
+	.get_tag		= ioat2_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+static int ioat2_dca_count_dca_slots(void __iomem *iobase, u16 dca_offset)
+{
+	int slots = 0;
+	u32 req;
+	u16 global_req_table;
+
+	global_req_table = readw(iobase + dca_offset + IOAT_DCA_GREQID_OFFSET);
+	if (global_req_table == 0)
+		return 0;
+	do {
+		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+		slots++;
+	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+	return slots;
+}
+
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	int slots;
+	int i;
+	int err;
+	u32 tag_map;
+	u16 dca_offset;
+	u16 csi_fsb_control;
+	u16 pcie_control;
+	u8 bit;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+	if (dca_offset == 0)
+		return NULL;
+
+	slots = ioat2_dca_count_dca_slots(iobase, dca_offset);
+	if (slots == 0)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat2_dca_ops,
+				 sizeof(*ioatdca)
+				      + (sizeof(struct ioat_dca_slot) * slots));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->iobase = iobase;
+	ioatdca->dca_base = iobase + dca_offset;
+	ioatdca->max_requesters = slots;
+
+	/* some bios might not know to turn these on */
+	csi_fsb_control = readw(ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+	if ((csi_fsb_control & IOAT_FSB_CAP_ENABLE_PREFETCH) == 0) {
+		csi_fsb_control |= IOAT_FSB_CAP_ENABLE_PREFETCH;
+		writew(csi_fsb_control,
+		       ioatdca->dca_base + IOAT_FSB_CAP_ENABLE_OFFSET);
+	}
+	pcie_control = readw(ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+	if ((pcie_control & IOAT_PCI_CAP_ENABLE_MEMWR) == 0) {
+		pcie_control |= IOAT_PCI_CAP_ENABLE_MEMWR;
+		writew(pcie_control,
+		       ioatdca->dca_base + IOAT_PCI_CAP_ENABLE_OFFSET);
+	}
+
+
+	/* TODO version, compatibility and configuration checks */
+
+	/* copy out the APIC to DCA tag map */
+	tag_map = readl(ioatdca->dca_base + IOAT_APICID_TAG_MAP_OFFSET);
+	for (i = 0; i < 5; i++) {
+		bit = (tag_map >> (4 * i)) & 0x0f;
+		if (bit < 8)
+			ioatdca->tag_map[i] = bit | DCA_TAG_MAP_VALID;
+		else
+			ioatdca->tag_map[i] = 0;
+	}
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
+
+static int ioat3_dca_add_requester(struct dca_provider *dca, struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 id;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+	id = dcaid_from_pcidev(pdev);
+
+	if (ioatdca->requester_count == ioatdca->max_requesters)
+		return -ENODEV;
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == NULL) {
+			/* found an empty slot */
+			ioatdca->requester_count++;
+			ioatdca->req_slots[i].pdev = pdev;
+			ioatdca->req_slots[i].rid = id;
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+			writel(id | IOAT_DCA_GREQID_VALID,
+			       ioatdca->iobase + global_req_table + (i * 4));
+			return i;
+		}
+	}
+	/* Error, ioatdma->requester_count is out of whack */
+	return -EFAULT;
+}
+
+static int ioat3_dca_remove_requester(struct dca_provider *dca,
+				      struct device *dev)
+{
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	struct pci_dev *pdev;
+	int i;
+	u16 global_req_table;
+
+	/* This implementation only supports PCI-Express */
+	if (dev->bus != &pci_bus_type)
+		return -ENODEV;
+	pdev = to_pci_dev(dev);
+
+	for (i = 0; i < ioatdca->max_requesters; i++) {
+		if (ioatdca->req_slots[i].pdev == pdev) {
+			global_req_table =
+			      readw(ioatdca->dca_base + IOAT3_DCA_GREQID_OFFSET);
+			writel(0, ioatdca->iobase + global_req_table + (i * 4));
+			ioatdca->req_slots[i].pdev = NULL;
+			ioatdca->req_slots[i].rid = 0;
+			ioatdca->requester_count--;
+			return i;
+		}
+	}
+	return -ENODEV;
+}
+
+static u8 ioat3_dca_get_tag(struct dca_provider *dca,
+			    struct device *dev,
+			    int cpu)
+{
+	u8 tag;
+
+	struct ioat_dca_priv *ioatdca = dca_priv(dca);
+	int i, apic_id, bit, value;
+	u8 entry;
+
+	tag = 0;
+	apic_id = cpu_physical_id(cpu);
+
+	for (i = 0; i < IOAT_TAG_MAP_LEN; i++) {
+		entry = ioatdca->tag_map[i];
+		if (entry & DCA3_TAG_MAP_BIT_TO_SEL) {
+			bit = entry &
+				~(DCA3_TAG_MAP_BIT_TO_SEL | DCA3_TAG_MAP_BIT_TO_INV);
+			value = (apic_id & (1 << bit)) ? 1 : 0;
+		} else if (entry & DCA3_TAG_MAP_BIT_TO_INV) {
+			bit = entry & ~DCA3_TAG_MAP_BIT_TO_INV;
+			value = (apic_id & (1 << bit)) ? 0 : 1;
+		} else {
+			value = (entry & DCA3_TAG_MAP_LITERAL_VAL) ? 1 : 0;
+		}
+		tag |= (value << i);
+	}
+
+	return tag;
+}
+
+static struct dca_ops ioat3_dca_ops = {
+	.add_requester		= ioat3_dca_add_requester,
+	.remove_requester	= ioat3_dca_remove_requester,
+	.get_tag		= ioat3_dca_get_tag,
+	.dev_managed		= ioat_dca_dev_managed,
+};
+
+static int ioat3_dca_count_dca_slots(void *iobase, u16 dca_offset)
+{
+	int slots = 0;
+	u32 req;
+	u16 global_req_table;
+
+	global_req_table = readw(iobase + dca_offset + IOAT3_DCA_GREQID_OFFSET);
+	if (global_req_table == 0)
+		return 0;
+
+	do {
+		req = readl(iobase + global_req_table + (slots * sizeof(u32)));
+		slots++;
+	} while ((req & IOAT_DCA_GREQID_LASTID) == 0);
+
+	return slots;
+}
+
+struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase)
+{
+	struct dca_provider *dca;
+	struct ioat_dca_priv *ioatdca;
+	int slots;
+	int i;
+	int err;
+	u16 dca_offset;
+	u16 csi_fsb_control;
+	u16 pcie_control;
+	u8 bit;
+
+	union {
+		u64 full;
+		struct {
+			u32 low;
+			u32 high;
+		};
+	} tag_map;
+
+	if (!system_has_dca_enabled(pdev))
+		return NULL;
+
+	dca_offset = readw(iobase + IOAT_DCAOFFSET_OFFSET);
+	if (dca_offset == 0)
+		return NULL;
+
+	slots = ioat3_dca_count_dca_slots(iobase, dca_offset);
+	if (slots == 0)
+		return NULL;
+
+	dca = alloc_dca_provider(&ioat3_dca_ops,
+				 sizeof(*ioatdca)
+				      + (sizeof(struct ioat_dca_slot) * slots));
+	if (!dca)
+		return NULL;
+
+	ioatdca = dca_priv(dca);
+	ioatdca->iobase = iobase;
+	ioatdca->dca_base = iobase + dca_offset;
+	ioatdca->max_requesters = slots;
+
+	/* some bios might not know to turn these on */
+	csi_fsb_control = readw(ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+	if ((csi_fsb_control & IOAT3_CSI_CONTROL_PREFETCH) == 0) {
+		csi_fsb_control |= IOAT3_CSI_CONTROL_PREFETCH;
+		writew(csi_fsb_control,
+		       ioatdca->dca_base + IOAT3_CSI_CONTROL_OFFSET);
+	}
+	pcie_control = readw(ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+	if ((pcie_control & IOAT3_PCI_CONTROL_MEMWR) == 0) {
+		pcie_control |= IOAT3_PCI_CONTROL_MEMWR;
+		writew(pcie_control,
+		       ioatdca->dca_base + IOAT3_PCI_CONTROL_OFFSET);
+	}
+
+
+	/* TODO version, compatibility and configuration checks */
+
+	/* copy out the APIC to DCA tag map */
+	tag_map.low =
+		readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_LOW);
+	tag_map.high =
+		readl(ioatdca->dca_base + IOAT3_APICID_TAG_MAP_OFFSET_HIGH);
+	for (i = 0; i < 8; i++) {
+		bit = tag_map.full >> (8 * i);
+		ioatdca->tag_map[i] = bit & DCA_TAG_MAP_MASK;
+	}
+
+	err = register_dca_provider(dca, &pdev->dev);
+	if (err) {
+		free_dca_provider(dca);
+		return NULL;
+	}
+
+	return dca;
+}
diff --git a/drivers/dma/ioat_dma.c b/drivers/dma/ioat_dma.c
new file mode 100644
index 0000000..f2ed033
--- /dev/null
+++ b/drivers/dma/ioat_dma.c
@@ -0,0 +1,1723 @@
+/*
+ * Intel I/OAT DMA Linux driver
+ * Copyright(c) 2004 - 2007 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * The full GNU General Public License is included in this distribution in
+ * the file called "COPYING".
+ *
+ */
+
+/*
+ * This driver supports an Intel I/OAT DMA engine, which does asynchronous
+ * copy operations.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/workqueue.h>
+#include <linux/i7300_idle.h>
+#include "ioatdma.h"
+#include "ioatdma_registers.h"
+#include "ioatdma_hw.h"
+
+#define to_ioat_chan(chan) container_of(chan, struct ioat_dma_chan, common)
+#define to_ioatdma_device(dev) container_of(dev, struct ioatdma_device, common)
+#define to_ioat_desc(lh) container_of(lh, struct ioat_desc_sw, node)
+#define tx_to_ioat_desc(tx) container_of(tx, struct ioat_desc_sw, async_tx)
+
+#define chan_num(ch) ((int)((ch)->reg_base - (ch)->device->reg_base) / 0x80)
+static int ioat_pending_level = 4;
+module_param(ioat_pending_level, int, 0644);
+MODULE_PARM_DESC(ioat_pending_level,
+		 "high-water mark for pushing ioat descriptors (default: 4)");
+
+#define RESET_DELAY  msecs_to_jiffies(100)
+#define WATCHDOG_DELAY  round_jiffies(msecs_to_jiffies(2000))
+static void ioat_dma_chan_reset_part2(struct work_struct *work);
+static void ioat_dma_chan_watchdog(struct work_struct *work);
+
+/*
+ * workaround for IOAT ver.3.0 null descriptor issue
+ * (channel returns error when size is 0)
+ */
+#define NULL_DESC_BUFFER_SIZE 1
+
+/* internal functions */
+static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan);
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan);
+
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
+static struct ioat_desc_sw *
+ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan);
+
+static inline struct ioat_dma_chan *ioat_lookup_chan_by_index(
+						struct ioatdma_device *device,
+						int index)
+{
+	return device->idx[index];
+}
+
+/**
+ * ioat_dma_do_interrupt - handler used for single vector interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt(int irq, void *data)
+{
+	struct ioatdma_device *instance = data;
+	struct ioat_dma_chan *ioat_chan;
+	unsigned long attnstatus;
+	int bit;
+	u8 intrctrl;
+
+	intrctrl = readb(instance->reg_base + IOAT_INTRCTRL_OFFSET);
+
+	if (!(intrctrl & IOAT_INTRCTRL_MASTER_INT_EN))
+		return IRQ_NONE;
+
+	if (!(intrctrl & IOAT_INTRCTRL_INT_STATUS)) {
+		writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+		return IRQ_NONE;
+	}
+
+	attnstatus = readl(instance->reg_base + IOAT_ATTNSTATUS_OFFSET);
+	for_each_bit(bit, &attnstatus, BITS_PER_LONG) {
+		ioat_chan = ioat_lookup_chan_by_index(instance, bit);
+		tasklet_schedule(&ioat_chan->cleanup_task);
+	}
+
+	writeb(intrctrl, instance->reg_base + IOAT_INTRCTRL_OFFSET);
+	return IRQ_HANDLED;
+}
+
+/**
+ * ioat_dma_do_interrupt_msix - handler used for vector-per-channel interrupt mode
+ * @irq: interrupt id
+ * @data: interrupt data
+ */
+static irqreturn_t ioat_dma_do_interrupt_msix(int irq, void *data)
+{
+	struct ioat_dma_chan *ioat_chan = data;
+
+	tasklet_schedule(&ioat_chan->cleanup_task);
+
+	return IRQ_HANDLED;
+}
+
+static void ioat_dma_cleanup_tasklet(unsigned long data);
+
+/**
+ * ioat_dma_enumerate_channels - find and initialize the device's channels
+ * @device: the device to be enumerated
+ */
+static int ioat_dma_enumerate_channels(struct ioatdma_device *device)
+{
+	u8 xfercap_scale;
+	u32 xfercap;
+	int i;
+	struct ioat_dma_chan *ioat_chan;
+
+	/*
+	 * IOAT ver.3 workarounds
+	 */
+	if (device->version == IOAT_VER_3_0) {
+		u32 chan_err_mask;
+		u16 dev_id;
+		u32 dmauncerrsts;
+
+		/*
+		 * Write CHANERRMSK_INT with 3E07h to mask out the errors
+		 * that can cause stability issues for IOAT ver.3
+		 */
+		chan_err_mask = 0x3E07;
+		pci_write_config_dword(device->pdev,
+			IOAT_PCI_CHANERRMASK_INT_OFFSET,
+			chan_err_mask);
+
+		/*
+		 * Clear DMAUNCERRSTS Cfg-Reg Parity Error status bit
+		 * (workaround for spurious config parity error after restart)
+		 */
+		pci_read_config_word(device->pdev,
+			IOAT_PCI_DEVICE_ID_OFFSET,
+			&dev_id);
+		if (dev_id == PCI_DEVICE_ID_INTEL_IOAT_TBG0) {
+			dmauncerrsts = 0x10;
+			pci_write_config_dword(device->pdev,
+				IOAT_PCI_DMAUNCERRSTS_OFFSET,
+				dmauncerrsts);
+		}
+	}
+
+	device->common.chancnt = readb(device->reg_base + IOAT_CHANCNT_OFFSET);
+	xfercap_scale = readb(device->reg_base + IOAT_XFERCAP_OFFSET);
+	xfercap = (xfercap_scale == 0 ? -1 : (1UL << xfercap_scale));
+
+#ifdef  CONFIG_I7300_IDLE_IOAT_CHANNEL
+	if (i7300_idle_platform_probe(NULL, NULL) == 0) {
+		device->common.chancnt--;
+	}
+#endif
+	for (i = 0; i < device->common.chancnt; i++) {
+		ioat_chan = kzalloc(sizeof(*ioat_chan), GFP_KERNEL);
+		if (!ioat_chan) {
+			device->common.chancnt = i;
+			break;
+		}
+
+		ioat_chan->device = device;
+		ioat_chan->reg_base = device->reg_base + (0x80 * (i + 1));
+		ioat_chan->xfercap = xfercap;
+		ioat_chan->desccount = 0;
+		INIT_DELAYED_WORK(&ioat_chan->work, ioat_dma_chan_reset_part2);
+		if (ioat_chan->device->version != IOAT_VER_1_2) {
+			writel(IOAT_DCACTRL_CMPL_WRITE_ENABLE
+					| IOAT_DMA_DCA_ANY_CPU,
+				ioat_chan->reg_base + IOAT_DCACTRL_OFFSET);
+		}
+		spin_lock_init(&ioat_chan->cleanup_lock);
+		spin_lock_init(&ioat_chan->desc_lock);
+		INIT_LIST_HEAD(&ioat_chan->free_desc);
+		INIT_LIST_HEAD(&ioat_chan->used_desc);
+		/* This should be made common somewhere in dmaengine.c */
+		ioat_chan->common.device = &device->common;
+		list_add_tail(&ioat_chan->common.device_node,
+			      &device->common.channels);
+		device->idx[i] = ioat_chan;
+		tasklet_init(&ioat_chan->cleanup_task,
+			     ioat_dma_cleanup_tasklet,
+			     (unsigned long) ioat_chan);
+		tasklet_disable(&ioat_chan->cleanup_task);
+	}
+	return device->common.chancnt;
+}
+
+/**
+ * ioat_dma_memcpy_issue_pending - push potentially unrecognized appended
+ *                                 descriptors to hw
+ * @chan: DMA channel handle
+ */
+static inline void __ioat1_dma_memcpy_issue_pending(
+						struct ioat_dma_chan *ioat_chan)
+{
+	ioat_chan->pending = 0;
+	writeb(IOAT_CHANCMD_APPEND, ioat_chan->reg_base + IOAT1_CHANCMD_OFFSET);
+}
+
+static void ioat1_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (ioat_chan->pending > 0) {
+		spin_lock_bh(&ioat_chan->desc_lock);
+		__ioat1_dma_memcpy_issue_pending(ioat_chan);
+		spin_unlock_bh(&ioat_chan->desc_lock);
+	}
+}
+
+static inline void __ioat2_dma_memcpy_issue_pending(
+						struct ioat_dma_chan *ioat_chan)
+{
+	ioat_chan->pending = 0;
+	writew(ioat_chan->dmacount,
+	       ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+}
+
+static void ioat2_dma_memcpy_issue_pending(struct dma_chan *chan)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+
+	if (ioat_chan->pending > 0) {
+		spin_lock_bh(&ioat_chan->desc_lock);
+		__ioat2_dma_memcpy_issue_pending(ioat_chan);
+		spin_unlock_bh(&ioat_chan->desc_lock);
+	}
+}
+
+
+/**
+ * ioat_dma_chan_reset_part2 - reinit the channel after a reset
+ */
+static void ioat_dma_chan_reset_part2(struct work_struct *work)
+{
+	struct ioat_dma_chan *ioat_chan =
+		container_of(work, struct ioat_dma_chan, work.work);
+	struct ioat_desc_sw *desc;
+
+	spin_lock_bh(&ioat_chan->cleanup_lock);
+	spin_lock_bh(&ioat_chan->desc_lock);
+
+	ioat_chan->completion_virt->low = 0;
+	ioat_chan->completion_virt->high = 0;
+	ioat_chan->pending = 0;
+
+	/*
+	 * count the descriptors waiting, and be sure to do it
+	 * right for both the CB1 line and the CB2 ring
+	 */
+	ioat_chan->dmacount = 0;
+	if (ioat_chan->used_desc.prev) {
+		desc = to_ioat_desc(ioat_chan->used_desc.prev);
+		do {
+			ioat_chan->dmacount++;
+			desc = to_ioat_desc(desc->node.next);
+		} while (&desc->node != ioat_chan->used_desc.next);
+	}
+
+	/*
+	 * write the new starting descriptor address
+	 * this puts channel engine into ARMED state
+	 */
+	desc = to_ioat_desc(ioat_chan->used_desc.prev);
+	switch (ioat_chan->device->version) {
+	case IOAT_VER_1_2:
+		writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+		       ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+		writel(((u64) desc->async_tx.phys) >> 32,
+		       ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+
+		writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+			+ IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+		break;
+	case IOAT_VER_2_0:
+		writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+		       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+		writel(((u64) desc->async_tx.phys) >> 32,
+		       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+
+		/* tell the engine to go with what's left to be done */
+		writew(ioat_chan->dmacount,
+		       ioat_chan->reg_base + IOAT_CHAN_DMACOUNT_OFFSET);
+
+		break;
+	}
+	dev_err(&ioat_chan->device->pdev->dev,
+		"chan%d reset - %d descs waiting, %d total desc\n",
+		chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+
+	spin_unlock_bh(&ioat_chan->desc_lock);
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+/**
+ * ioat_dma_reset_channel - restart a channel
+ * @ioat_chan: IOAT DMA channel handle
+ */
+static void ioat_dma_reset_channel(struct ioat_dma_chan *ioat_chan)
+{
+	u32 chansts, chanerr;
+
+	if (!ioat_chan->used_desc.prev)
+		return;
+
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	chansts = (ioat_chan->completion_virt->low
+					& IOAT_CHANSTS_DMA_TRANSFER_STATUS);
+	if (chanerr) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"chan%d, CHANSTS = 0x%08x CHANERR = 0x%04x, clearing\n",
+			chan_num(ioat_chan), chansts, chanerr);
+		writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	}
+
+	/*
+	 * whack it upside the head with a reset
+	 * and wait for things to settle out.
+	 * force the pending count to a really big negative
+	 * to make sure no one forces an issue_pending
+	 * while we're waiting.
+	 */
+
+	spin_lock_bh(&ioat_chan->desc_lock);
+	ioat_chan->pending = INT_MIN;
+	writeb(IOAT_CHANCMD_RESET,
+	       ioat_chan->reg_base
+	       + IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	/* schedule the 2nd half instead of sleeping a long time */
+	schedule_delayed_work(&ioat_chan->work, RESET_DELAY);
+}
+
+/**
+ * ioat_dma_chan_watchdog - watch for stuck channels
+ */
+static void ioat_dma_chan_watchdog(struct work_struct *work)
+{
+	struct ioatdma_device *device =
+		container_of(work, struct ioatdma_device, work.work);
+	struct ioat_dma_chan *ioat_chan;
+	int i;
+
+	union {
+		u64 full;
+		struct {
+			u32 low;
+			u32 high;
+		};
+	} completion_hw;
+	unsigned long compl_desc_addr_hw;
+
+	for (i = 0; i < device->common.chancnt; i++) {
+		ioat_chan = ioat_lookup_chan_by_index(device, i);
+
+		if (ioat_chan->device->version == IOAT_VER_1_2
+			/* have we started processing anything yet */
+		    && ioat_chan->last_completion
+			/* have we completed any since last watchdog cycle? */
+		    && (ioat_chan->last_completion ==
+				ioat_chan->watchdog_completion)
+			/* has TCP stuck on one cookie since last watchdog? */
+		    && (ioat_chan->watchdog_tcp_cookie ==
+				ioat_chan->watchdog_last_tcp_cookie)
+		    && (ioat_chan->watchdog_tcp_cookie !=
+				ioat_chan->completed_cookie)
+			/* is there something in the chain to be processed? */
+			/* CB1 chain always has at least the last one processed */
+		    && (ioat_chan->used_desc.prev != ioat_chan->used_desc.next)
+		    && ioat_chan->pending == 0) {
+
+			/*
+			 * check CHANSTS register for completed
+			 * descriptor address.
+			 * if it is different than completion writeback,
+			 * it is not zero
+			 * and it has changed since the last watchdog
+			 *     we can assume that channel
+			 *     is still working correctly
+			 *     and the problem is in completion writeback.
+			 *     update completion writeback
+			 *     with actual CHANSTS value
+			 * else
+			 *     try resetting the channel
+			 */
+
+			completion_hw.low = readl(ioat_chan->reg_base +
+				IOAT_CHANSTS_OFFSET_LOW(ioat_chan->device->version));
+			completion_hw.high = readl(ioat_chan->reg_base +
+				IOAT_CHANSTS_OFFSET_HIGH(ioat_chan->device->version));
+#if (BITS_PER_LONG == 64)
+			compl_desc_addr_hw =
+				completion_hw.full
+				& IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+#else
+			compl_desc_addr_hw =
+				completion_hw.low & IOAT_LOW_COMPLETION_MASK;
+#endif
+
+			if ((compl_desc_addr_hw != 0)
+			   && (compl_desc_addr_hw != ioat_chan->watchdog_completion)
+			   && (compl_desc_addr_hw != ioat_chan->last_compl_desc_addr_hw)) {
+				ioat_chan->last_compl_desc_addr_hw = compl_desc_addr_hw;
+				ioat_chan->completion_virt->low = completion_hw.low;
+				ioat_chan->completion_virt->high = completion_hw.high;
+			} else {
+				ioat_dma_reset_channel(ioat_chan);
+				ioat_chan->watchdog_completion = 0;
+				ioat_chan->last_compl_desc_addr_hw = 0;
+			}
+
+		/*
+		 * for version 2.0 if there are descriptors yet to be processed
+		 * and the last completed hasn't changed since the last watchdog
+		 *      if they haven't hit the pending level
+		 *          issue the pending to push them through
+		 *      else
+		 *          try resetting the channel
+		 */
+		} else if (ioat_chan->device->version == IOAT_VER_2_0
+		    && ioat_chan->used_desc.prev
+		    && ioat_chan->last_completion
+		    && ioat_chan->last_completion == ioat_chan->watchdog_completion) {
+
+			if (ioat_chan->pending < ioat_pending_level)
+				ioat2_dma_memcpy_issue_pending(&ioat_chan->common);
+			else {
+				ioat_dma_reset_channel(ioat_chan);
+				ioat_chan->watchdog_completion = 0;
+			}
+		} else {
+			ioat_chan->last_compl_desc_addr_hw = 0;
+			ioat_chan->watchdog_completion
+					= ioat_chan->last_completion;
+		}
+
+		ioat_chan->watchdog_last_tcp_cookie =
+			ioat_chan->watchdog_tcp_cookie;
+	}
+
+	schedule_delayed_work(&device->work, WATCHDOG_DELAY);
+}
+
+static dma_cookie_t ioat1_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
+	struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
+	struct ioat_desc_sw *prev, *new;
+	struct ioat_dma_descriptor *hw;
+	dma_cookie_t cookie;
+	LIST_HEAD(new_chain);
+	u32 copy;
+	size_t len;
+	dma_addr_t src, dst;
+	unsigned long orig_flags;
+	unsigned int desc_count = 0;
+
+	/* src and dest and len are stored in the initial descriptor */
+	len = first->len;
+	src = first->src;
+	dst = first->dst;
+	orig_flags = first->async_tx.flags;
+	new = first;
+
+	spin_lock_bh(&ioat_chan->desc_lock);
+	prev = to_ioat_desc(ioat_chan->used_desc.prev);
+	prefetch(prev->hw);
+	do {
+		copy = min_t(size_t, len, ioat_chan->xfercap);
+
+		async_tx_ack(&new->async_tx);
+
+		hw = new->hw;
+		hw->size = copy;
+		hw->ctl = 0;
+		hw->src_addr = src;
+		hw->dst_addr = dst;
+		hw->next = 0;
+
+		/* chain together the physical address list for the HW */
+		wmb();
+		prev->hw->next = (u64) new->async_tx.phys;
+
+		len -= copy;
+		dst += copy;
+		src += copy;
+
+		list_add_tail(&new->node, &new_chain);
+		desc_count++;
+		prev = new;
+	} while (len && (new = ioat1_dma_get_next_descriptor(ioat_chan)));
+
+	if (!new) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"tx submit failed\n");
+		spin_unlock_bh(&ioat_chan->desc_lock);
+		return -ENOMEM;
+	}
+
+	hw->ctl = IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+	if (first->async_tx.callback) {
+		hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
+		if (first != new) {
+			/* move callback into to last desc */
+			new->async_tx.callback = first->async_tx.callback;
+			new->async_tx.callback_param
+					= first->async_tx.callback_param;
+			first->async_tx.callback = NULL;
+			first->async_tx.callback_param = NULL;
+		}
+	}
+
+	new->tx_cnt = desc_count;
+	new->async_tx.flags = orig_flags; /* client is in control of this ack */
+
+	/* store the original values for use in later cleanup */
+	if (new != first) {
+		new->src = first->src;
+		new->dst = first->dst;
+		new->len = first->len;
+	}
+
+	/* cookie incr and addition to used_list must be atomic */
+	cookie = ioat_chan->common.cookie;
+	cookie++;
+	if (cookie < 0)
+		cookie = 1;
+	ioat_chan->common.cookie = new->async_tx.cookie = cookie;
+
+	/* write address into NextDescriptor field of last desc in chain */
+	to_ioat_desc(ioat_chan->used_desc.prev)->hw->next =
+							first->async_tx.phys;
+	list_splice_tail(&new_chain, &ioat_chan->used_desc);
+
+	ioat_chan->dmacount += desc_count;
+	ioat_chan->pending += desc_count;
+	if (ioat_chan->pending >= ioat_pending_level)
+		__ioat1_dma_memcpy_issue_pending(ioat_chan);
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	return cookie;
+}
+
+static dma_cookie_t ioat2_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(tx->chan);
+	struct ioat_desc_sw *first = tx_to_ioat_desc(tx);
+	struct ioat_desc_sw *new;
+	struct ioat_dma_descriptor *hw;
+	dma_cookie_t cookie;
+	u32 copy;
+	size_t len;
+	dma_addr_t src, dst;
+	unsigned long orig_flags;
+	unsigned int desc_count = 0;
+
+	/* src and dest and len are stored in the initial descriptor */
+	len = first->len;
+	src = first->src;
+	dst = first->dst;
+	orig_flags = first->async_tx.flags;
+	new = first;
+
+	/*
+	 * ioat_chan->desc_lock is still in force in version 2 path
+	 * it gets unlocked at end of this function
+	 */
+	do {
+		copy = min_t(size_t, len, ioat_chan->xfercap);
+
+		async_tx_ack(&new->async_tx);
+
+		hw = new->hw;
+		hw->size = copy;
+		hw->ctl = 0;
+		hw->src_addr = src;
+		hw->dst_addr = dst;
+
+		len -= copy;
+		dst += copy;
+		src += copy;
+		desc_count++;
+	} while (len && (new = ioat2_dma_get_next_descriptor(ioat_chan)));
+
+	if (!new) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"tx submit failed\n");
+		spin_unlock_bh(&ioat_chan->desc_lock);
+		return -ENOMEM;
+	}
+
+	hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+	if (first->async_tx.callback) {
+		hw->ctl |= IOAT_DMA_DESCRIPTOR_CTL_INT_GN;
+		if (first != new) {
+			/* move callback into to last desc */
+			new->async_tx.callback = first->async_tx.callback;
+			new->async_tx.callback_param
+					= first->async_tx.callback_param;
+			first->async_tx.callback = NULL;
+			first->async_tx.callback_param = NULL;
+		}
+	}
+
+	new->tx_cnt = desc_count;
+	new->async_tx.flags = orig_flags; /* client is in control of this ack */
+
+	/* store the original values for use in later cleanup */
+	if (new != first) {
+		new->src = first->src;
+		new->dst = first->dst;
+		new->len = first->len;
+	}
+
+	/* cookie incr and addition to used_list must be atomic */
+	cookie = ioat_chan->common.cookie;
+	cookie++;
+	if (cookie < 0)
+		cookie = 1;
+	ioat_chan->common.cookie = new->async_tx.cookie = cookie;
+
+	ioat_chan->dmacount += desc_count;
+	ioat_chan->pending += desc_count;
+	if (ioat_chan->pending >= ioat_pending_level)
+		__ioat2_dma_memcpy_issue_pending(ioat_chan);
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	return cookie;
+}
+
+/**
+ * ioat_dma_alloc_descriptor - allocate and return a sw and hw descriptor pair
+ * @ioat_chan: the channel supplying the memory pool for the descriptors
+ * @flags: allocation flags
+ */
+static struct ioat_desc_sw *ioat_dma_alloc_descriptor(
+					struct ioat_dma_chan *ioat_chan,
+					gfp_t flags)
+{
+	struct ioat_dma_descriptor *desc;
+	struct ioat_desc_sw *desc_sw;
+	struct ioatdma_device *ioatdma_device;
+	dma_addr_t phys;
+
+	ioatdma_device = to_ioatdma_device(ioat_chan->common.device);
+	desc = pci_pool_alloc(ioatdma_device->dma_pool, flags, &phys);
+	if (unlikely(!desc))
+		return NULL;
+
+	desc_sw = kzalloc(sizeof(*desc_sw), flags);
+	if (unlikely(!desc_sw)) {
+		pci_pool_free(ioatdma_device->dma_pool, desc, phys);
+		return NULL;
+	}
+
+	memset(desc, 0, sizeof(*desc));
+	dma_async_tx_descriptor_init(&desc_sw->async_tx, &ioat_chan->common);
+	switch (ioat_chan->device->version) {
+	case IOAT_VER_1_2:
+		desc_sw->async_tx.tx_submit = ioat1_tx_submit;
+		break;
+	case IOAT_VER_2_0:
+	case IOAT_VER_3_0:
+		desc_sw->async_tx.tx_submit = ioat2_tx_submit;
+		break;
+	}
+	INIT_LIST_HEAD(&desc_sw->async_tx.tx_list);
+
+	desc_sw->hw = desc;
+	desc_sw->async_tx.phys = phys;
+
+	return desc_sw;
+}
+
+static int ioat_initial_desc_count = 256;
+module_param(ioat_initial_desc_count, int, 0644);
+MODULE_PARM_DESC(ioat_initial_desc_count,
+		 "initial descriptors per channel (default: 256)");
+
+/**
+ * ioat2_dma_massage_chan_desc - link the descriptors into a circle
+ * @ioat_chan: the channel to be massaged
+ */
+static void ioat2_dma_massage_chan_desc(struct ioat_dma_chan *ioat_chan)
+{
+	struct ioat_desc_sw *desc, *_desc;
+
+	/* setup used_desc */
+	ioat_chan->used_desc.next = ioat_chan->free_desc.next;
+	ioat_chan->used_desc.prev = NULL;
+
+	/* pull free_desc out of the circle so that every node is a hw
+	 * descriptor, but leave it pointing to the list
+	 */
+	ioat_chan->free_desc.prev->next = ioat_chan->free_desc.next;
+	ioat_chan->free_desc.next->prev = ioat_chan->free_desc.prev;
+
+	/* circle link the hw descriptors */
+	desc = to_ioat_desc(ioat_chan->free_desc.next);
+	desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
+	list_for_each_entry_safe(desc, _desc, ioat_chan->free_desc.next, node) {
+		desc->hw->next = to_ioat_desc(desc->node.next)->async_tx.phys;
+	}
+}
+
+/**
+ * ioat_dma_alloc_chan_resources - returns the number of allocated descriptors
+ * @chan: the channel to be filled out
+ */
+static int ioat_dma_alloc_chan_resources(struct dma_chan *chan,
+					 struct dma_client *client)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+	struct ioat_desc_sw *desc;
+	u16 chanctrl;
+	u32 chanerr;
+	int i;
+	LIST_HEAD(tmp_list);
+
+	/* have we already been set up? */
+	if (!list_empty(&ioat_chan->free_desc))
+		return ioat_chan->desccount;
+
+	/* Setup register to interrupt and write completion status on error */
+	chanctrl = IOAT_CHANCTRL_ERR_INT_EN |
+		IOAT_CHANCTRL_ANY_ERR_ABORT_EN |
+		IOAT_CHANCTRL_ERR_COMPLETION_EN;
+	writew(chanctrl, ioat_chan->reg_base + IOAT_CHANCTRL_OFFSET);
+
+	chanerr = readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	if (chanerr) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"CHANERR = %x, clearing\n", chanerr);
+		writel(chanerr, ioat_chan->reg_base + IOAT_CHANERR_OFFSET);
+	}
+
+	/* Allocate descriptors */
+	for (i = 0; i < ioat_initial_desc_count; i++) {
+		desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_KERNEL);
+		if (!desc) {
+			dev_err(&ioat_chan->device->pdev->dev,
+				"Only %d initial descriptors\n", i);
+			break;
+		}
+		list_add_tail(&desc->node, &tmp_list);
+	}
+	spin_lock_bh(&ioat_chan->desc_lock);
+	ioat_chan->desccount = i;
+	list_splice(&tmp_list, &ioat_chan->free_desc);
+	if (ioat_chan->device->version != IOAT_VER_1_2)
+		ioat2_dma_massage_chan_desc(ioat_chan);
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	/* allocate a completion writeback area */
+	/* doing 2 32bit writes to mmio since 1 64b write doesn't work */
+	ioat_chan->completion_virt =
+		pci_pool_alloc(ioat_chan->device->completion_pool,
+			       GFP_KERNEL,
+			       &ioat_chan->completion_addr);
+	memset(ioat_chan->completion_virt, 0,
+	       sizeof(*ioat_chan->completion_virt));
+	writel(((u64) ioat_chan->completion_addr) & 0x00000000FFFFFFFF,
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_LOW);
+	writel(((u64) ioat_chan->completion_addr) >> 32,
+	       ioat_chan->reg_base + IOAT_CHANCMP_OFFSET_HIGH);
+
+	tasklet_enable(&ioat_chan->cleanup_task);
+	ioat_dma_start_null_desc(ioat_chan);  /* give chain to dma device */
+	return ioat_chan->desccount;
+}
+
+/**
+ * ioat_dma_free_chan_resources - release all the descriptors
+ * @chan: the channel to be cleaned
+ */
+static void ioat_dma_free_chan_resources(struct dma_chan *chan)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+	struct ioatdma_device *ioatdma_device = to_ioatdma_device(chan->device);
+	struct ioat_desc_sw *desc, *_desc;
+	int in_use_descs = 0;
+
+	/* Before freeing channel resources first check
+	 * if they have been previously allocated for this channel.
+	 */
+	if (ioat_chan->desccount == 0)
+		return;
+
+	tasklet_disable(&ioat_chan->cleanup_task);
+	ioat_dma_memcpy_cleanup(ioat_chan);
+
+	/* Delay 100ms after reset to allow internal DMA logic to quiesce
+	 * before removing DMA descriptor resources.
+	 */
+	writeb(IOAT_CHANCMD_RESET,
+	       ioat_chan->reg_base
+			+ IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+	mdelay(100);
+
+	spin_lock_bh(&ioat_chan->desc_lock);
+	switch (ioat_chan->device->version) {
+	case IOAT_VER_1_2:
+		list_for_each_entry_safe(desc, _desc,
+					 &ioat_chan->used_desc, node) {
+			in_use_descs++;
+			list_del(&desc->node);
+			pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+				      desc->async_tx.phys);
+			kfree(desc);
+		}
+		list_for_each_entry_safe(desc, _desc,
+					 &ioat_chan->free_desc, node) {
+			list_del(&desc->node);
+			pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+				      desc->async_tx.phys);
+			kfree(desc);
+		}
+		break;
+	case IOAT_VER_2_0:
+	case IOAT_VER_3_0:
+		list_for_each_entry_safe(desc, _desc,
+					 ioat_chan->free_desc.next, node) {
+			list_del(&desc->node);
+			pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+				      desc->async_tx.phys);
+			kfree(desc);
+		}
+		desc = to_ioat_desc(ioat_chan->free_desc.next);
+		pci_pool_free(ioatdma_device->dma_pool, desc->hw,
+			      desc->async_tx.phys);
+		kfree(desc);
+		INIT_LIST_HEAD(&ioat_chan->free_desc);
+		INIT_LIST_HEAD(&ioat_chan->used_desc);
+		break;
+	}
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	pci_pool_free(ioatdma_device->completion_pool,
+		      ioat_chan->completion_virt,
+		      ioat_chan->completion_addr);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		dev_err(&ioat_chan->device->pdev->dev,
+			"Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+
+	ioat_chan->last_completion = ioat_chan->completion_addr = 0;
+	ioat_chan->pending = 0;
+	ioat_chan->dmacount = 0;
+	ioat_chan->desccount = 0;
+	ioat_chan->watchdog_completion = 0;
+	ioat_chan->last_compl_desc_addr_hw = 0;
+	ioat_chan->watchdog_tcp_cookie =
+		ioat_chan->watchdog_last_tcp_cookie = 0;
+}
+
+/**
+ * ioat_dma_get_next_descriptor - return the next available descriptor
+ * @ioat_chan: IOAT DMA channel handle
+ *
+ * Gets the next descriptor from the chain, and must be called with the
+ * channel's desc_lock held.  Allocates more descriptors if the channel
+ * has run out.
+ */
+static struct ioat_desc_sw *
+ioat1_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
+{
+	struct ioat_desc_sw *new;
+
+	if (!list_empty(&ioat_chan->free_desc)) {
+		new = to_ioat_desc(ioat_chan->free_desc.next);
+		list_del(&new->node);
+	} else {
+		/* try to get another desc */
+		new = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+		if (!new) {
+			dev_err(&ioat_chan->device->pdev->dev,
+				"alloc failed\n");
+			return NULL;
+		}
+	}
+
+	prefetch(new->hw);
+	return new;
+}
+
+static struct ioat_desc_sw *
+ioat2_dma_get_next_descriptor(struct ioat_dma_chan *ioat_chan)
+{
+	struct ioat_desc_sw *new;
+
+	/*
+	 * used.prev points to where to start processing
+	 * used.next points to next free descriptor
+	 * if used.prev == NULL, there are none waiting to be processed
+	 * if used.next == used.prev.prev, there is only one free descriptor,
+	 *      and we need to use it to as a noop descriptor before
+	 *      linking in a new set of descriptors, since the device
+	 *      has probably already read the pointer to it
+	 */
+	if (ioat_chan->used_desc.prev &&
+	    ioat_chan->used_desc.next == ioat_chan->used_desc.prev->prev) {
+
+		struct ioat_desc_sw *desc;
+		struct ioat_desc_sw *noop_desc;
+		int i;
+
+		/* set up the noop descriptor */
+		noop_desc = to_ioat_desc(ioat_chan->used_desc.next);
+		/* set size to non-zero value (channel returns error when size is 0) */
+		noop_desc->hw->size = NULL_DESC_BUFFER_SIZE;
+		noop_desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL;
+		noop_desc->hw->src_addr = 0;
+		noop_desc->hw->dst_addr = 0;
+
+		ioat_chan->used_desc.next = ioat_chan->used_desc.next->next;
+		ioat_chan->pending++;
+		ioat_chan->dmacount++;
+
+		/* try to get a few more descriptors */
+		for (i = 16; i; i--) {
+			desc = ioat_dma_alloc_descriptor(ioat_chan, GFP_ATOMIC);
+			if (!desc) {
+				dev_err(&ioat_chan->device->pdev->dev,
+					"alloc failed\n");
+				break;
+			}
+			list_add_tail(&desc->node, ioat_chan->used_desc.next);
+
+			desc->hw->next
+				= to_ioat_desc(desc->node.next)->async_tx.phys;
+			to_ioat_desc(desc->node.prev)->hw->next
+				= desc->async_tx.phys;
+			ioat_chan->desccount++;
+		}
+
+		ioat_chan->used_desc.next = noop_desc->node.next;
+	}
+	new = to_ioat_desc(ioat_chan->used_desc.next);
+	prefetch(new);
+	ioat_chan->used_desc.next = new->node.next;
+
+	if (ioat_chan->used_desc.prev == NULL)
+		ioat_chan->used_desc.prev = &new->node;
+
+	prefetch(new->hw);
+	return new;
+}
+
+static struct ioat_desc_sw *ioat_dma_get_next_descriptor(
+						struct ioat_dma_chan *ioat_chan)
+{
+	if (!ioat_chan)
+		return NULL;
+
+	switch (ioat_chan->device->version) {
+	case IOAT_VER_1_2:
+		return ioat1_dma_get_next_descriptor(ioat_chan);
+	case IOAT_VER_2_0:
+	case IOAT_VER_3_0:
+		return ioat2_dma_get_next_descriptor(ioat_chan);
+	}
+	return NULL;
+}
+
+static struct dma_async_tx_descriptor *ioat1_dma_prep_memcpy(
+						struct dma_chan *chan,
+						dma_addr_t dma_dest,
+						dma_addr_t dma_src,
+						size_t len,
+						unsigned long flags)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+	struct ioat_desc_sw *new;
+
+	spin_lock_bh(&ioat_chan->desc_lock);
+	new = ioat_dma_get_next_descriptor(ioat_chan);
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	if (new) {
+		new->len = len;
+		new->dst = dma_dest;
+		new->src = dma_src;
+		new->async_tx.flags = flags;
+		return &new->async_tx;
+	} else {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
+			chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+		return NULL;
+	}
+}
+
+static struct dma_async_tx_descriptor *ioat2_dma_prep_memcpy(
+						struct dma_chan *chan,
+						dma_addr_t dma_dest,
+						dma_addr_t dma_src,
+						size_t len,
+						unsigned long flags)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+	struct ioat_desc_sw *new;
+
+	spin_lock_bh(&ioat_chan->desc_lock);
+	new = ioat2_dma_get_next_descriptor(ioat_chan);
+
+	/*
+	 * leave ioat_chan->desc_lock set in ioat 2 path
+	 * it will get unlocked at end of tx_submit
+	 */
+
+	if (new) {
+		new->len = len;
+		new->dst = dma_dest;
+		new->src = dma_src;
+		new->async_tx.flags = flags;
+		return &new->async_tx;
+	} else {
+		spin_unlock_bh(&ioat_chan->desc_lock);
+		dev_err(&ioat_chan->device->pdev->dev,
+			"chan%d - get_next_desc failed: %d descs waiting, %d total desc\n",
+			chan_num(ioat_chan), ioat_chan->dmacount, ioat_chan->desccount);
+		return NULL;
+	}
+}
+
+static void ioat_dma_cleanup_tasklet(unsigned long data)
+{
+	struct ioat_dma_chan *chan = (void *)data;
+	ioat_dma_memcpy_cleanup(chan);
+	writew(IOAT_CHANCTRL_INT_DISABLE,
+	       chan->reg_base + IOAT_CHANCTRL_OFFSET);
+}
+
+static void
+ioat_dma_unmap(struct ioat_dma_chan *ioat_chan, struct ioat_desc_sw *desc)
+{
+	/*
+	 * yes we are unmapping both _page and _single
+	 * alloc'd regions with unmap_page. Is this
+	 * *really* that bad?
+	 */
+	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_DEST_UNMAP))
+		pci_unmap_page(ioat_chan->device->pdev,
+				pci_unmap_addr(desc, dst),
+				pci_unmap_len(desc, len),
+				PCI_DMA_FROMDEVICE);
+
+	if (!(desc->async_tx.flags & DMA_COMPL_SKIP_SRC_UNMAP))
+		pci_unmap_page(ioat_chan->device->pdev,
+				pci_unmap_addr(desc, src),
+				pci_unmap_len(desc, len),
+				PCI_DMA_TODEVICE);
+}
+
+/**
+ * ioat_dma_memcpy_cleanup - cleanup up finished descriptors
+ * @chan: ioat channel to be cleaned up
+ */
+static void ioat_dma_memcpy_cleanup(struct ioat_dma_chan *ioat_chan)
+{
+	unsigned long phys_complete;
+	struct ioat_desc_sw *desc, *_desc;
+	dma_cookie_t cookie = 0;
+	unsigned long desc_phys;
+	struct ioat_desc_sw *latest_desc;
+
+	prefetch(ioat_chan->completion_virt);
+
+	if (!spin_trylock_bh(&ioat_chan->cleanup_lock))
+		return;
+
+	/* The completion writeback can happen at any time,
+	   so reads by the driver need to be atomic operations
+	   The descriptor physical addresses are limited to 32-bits
+	   when the CPU can only do a 32-bit mov */
+
+#if (BITS_PER_LONG == 64)
+	phys_complete =
+		ioat_chan->completion_virt->full
+		& IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+#else
+	phys_complete =
+		ioat_chan->completion_virt->low & IOAT_LOW_COMPLETION_MASK;
+#endif
+
+	if ((ioat_chan->completion_virt->full
+		& IOAT_CHANSTS_DMA_TRANSFER_STATUS) ==
+				IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"Channel halted, chanerr = %x\n",
+			readl(ioat_chan->reg_base + IOAT_CHANERR_OFFSET));
+
+		/* TODO do something to salvage the situation */
+	}
+
+	if (phys_complete == ioat_chan->last_completion) {
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		/*
+		 * perhaps we're stuck so hard that the watchdog can't go off?
+		 * try to catch it after 2 seconds
+		 */
+		if (ioat_chan->device->version != IOAT_VER_3_0) {
+			if (time_after(jiffies,
+				       ioat_chan->last_completion_time + HZ*WATCHDOG_DELAY)) {
+				ioat_dma_chan_watchdog(&(ioat_chan->device->work.work));
+				ioat_chan->last_completion_time = jiffies;
+			}
+		}
+		return;
+	}
+	ioat_chan->last_completion_time = jiffies;
+
+	cookie = 0;
+	if (!spin_trylock_bh(&ioat_chan->desc_lock)) {
+		spin_unlock_bh(&ioat_chan->cleanup_lock);
+		return;
+	}
+
+	switch (ioat_chan->device->version) {
+	case IOAT_VER_1_2:
+		list_for_each_entry_safe(desc, _desc,
+					 &ioat_chan->used_desc, node) {
+
+			/*
+			 * Incoming DMA requests may use multiple descriptors,
+			 * due to exceeding xfercap, perhaps. If so, only the
+			 * last one will have a cookie, and require unmapping.
+			 */
+			if (desc->async_tx.cookie) {
+				cookie = desc->async_tx.cookie;
+				ioat_dma_unmap(ioat_chan, desc);
+				if (desc->async_tx.callback) {
+					desc->async_tx.callback(desc->async_tx.callback_param);
+					desc->async_tx.callback = NULL;
+				}
+			}
+
+			if (desc->async_tx.phys != phys_complete) {
+				/*
+				 * a completed entry, but not the last, so clean
+				 * up if the client is done with the descriptor
+				 */
+				if (async_tx_test_ack(&desc->async_tx)) {
+					list_del(&desc->node);
+					list_add_tail(&desc->node,
+						      &ioat_chan->free_desc);
+				} else
+					desc->async_tx.cookie = 0;
+			} else {
+				/*
+				 * last used desc. Do not remove, so we can
+				 * append from it, but don't look at it next
+				 * time, either
+				 */
+				desc->async_tx.cookie = 0;
+
+				/* TODO check status bits? */
+				break;
+			}
+		}
+		break;
+	case IOAT_VER_2_0:
+	case IOAT_VER_3_0:
+		/* has some other thread has already cleaned up? */
+		if (ioat_chan->used_desc.prev == NULL)
+			break;
+
+		/* work backwards to find latest finished desc */
+		desc = to_ioat_desc(ioat_chan->used_desc.next);
+		latest_desc = NULL;
+		do {
+			desc = to_ioat_desc(desc->node.prev);
+			desc_phys = (unsigned long)desc->async_tx.phys
+				       & IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR;
+			if (desc_phys == phys_complete) {
+				latest_desc = desc;
+				break;
+			}
+		} while (&desc->node != ioat_chan->used_desc.prev);
+
+		if (latest_desc != NULL) {
+
+			/* work forwards to clear finished descriptors */
+			for (desc = to_ioat_desc(ioat_chan->used_desc.prev);
+			     &desc->node != latest_desc->node.next &&
+			     &desc->node != ioat_chan->used_desc.next;
+			     desc = to_ioat_desc(desc->node.next)) {
+				if (desc->async_tx.cookie) {
+					cookie = desc->async_tx.cookie;
+					desc->async_tx.cookie = 0;
+					ioat_dma_unmap(ioat_chan, desc);
+					if (desc->async_tx.callback) {
+						desc->async_tx.callback(desc->async_tx.callback_param);
+						desc->async_tx.callback = NULL;
+					}
+				}
+			}
+
+			/* move used.prev up beyond those that are finished */
+			if (&desc->node == ioat_chan->used_desc.next)
+				ioat_chan->used_desc.prev = NULL;
+			else
+				ioat_chan->used_desc.prev = &desc->node;
+		}
+		break;
+	}
+
+	spin_unlock_bh(&ioat_chan->desc_lock);
+
+	ioat_chan->last_completion = phys_complete;
+	if (cookie != 0)
+		ioat_chan->completed_cookie = cookie;
+
+	spin_unlock_bh(&ioat_chan->cleanup_lock);
+}
+
+/**
+ * ioat_dma_is_complete - poll the status of a IOAT DMA transaction
+ * @chan: IOAT DMA channel handle
+ * @cookie: DMA transaction identifier
+ * @done: if not %NULL, updated with last completed transaction
+ * @used: if not %NULL, updated with last used transaction
+ */
+static enum dma_status ioat_dma_is_complete(struct dma_chan *chan,
+					    dma_cookie_t cookie,
+					    dma_cookie_t *done,
+					    dma_cookie_t *used)
+{
+	struct ioat_dma_chan *ioat_chan = to_ioat_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+	last_complete = ioat_chan->completed_cookie;
+	ioat_chan->watchdog_tcp_cookie = cookie;
+
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	ioat_dma_memcpy_cleanup(ioat_chan);
+
+	last_used = chan->cookie;
+	last_complete = ioat_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void ioat_dma_start_null_desc(struct ioat_dma_chan *ioat_chan)
+{
+	struct ioat_desc_sw *desc;
+
+	spin_lock_bh(&ioat_chan->desc_lock);
+
+	desc = ioat_dma_get_next_descriptor(ioat_chan);
+
+	if (!desc) {
+		dev_err(&ioat_chan->device->pdev->dev,
+			"Unable to start null desc - get next desc failed\n");
+		spin_unlock_bh(&ioat_chan->desc_lock);
+		return;
+	}
+
+	desc->hw->ctl = IOAT_DMA_DESCRIPTOR_NUL
+				| IOAT_DMA_DESCRIPTOR_CTL_INT_GN
+				| IOAT_DMA_DESCRIPTOR_CTL_CP_STS;
+	/* set size to non-zero value (channel returns error when size is 0) */
+	desc->hw->size = NULL_DESC_BUFFER_SIZE;
+	desc->hw->src_addr = 0;
+	desc->hw->dst_addr = 0;
+	async_tx_ack(&desc->async_tx);
+	switch (ioat_chan->device->version) {
+	case IOAT_VER_1_2:
+		desc->hw->next = 0;
+		list_add_tail(&desc->node, &ioat_chan->used_desc);
+
+		writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+		       ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_LOW);
+		writel(((u64) desc->async_tx.phys) >> 32,
+		       ioat_chan->reg_base + IOAT1_CHAINADDR_OFFSET_HIGH);
+
+		writeb(IOAT_CHANCMD_START, ioat_chan->reg_base
+			+ IOAT_CHANCMD_OFFSET(ioat_chan->device->version));
+		break;
+	case IOAT_VER_2_0:
+	case IOAT_VER_3_0:
+		writel(((u64) desc->async_tx.phys) & 0x00000000FFFFFFFF,
+		       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_LOW);
+		writel(((u64) desc->async_tx.phys) >> 32,
+		       ioat_chan->reg_base + IOAT2_CHAINADDR_OFFSET_HIGH);
+
+		ioat_chan->dmacount++;
+		__ioat2_dma_memcpy_issue_pending(ioat_chan);
+		break;
+	}
+	spin_unlock_bh(&ioat_chan->desc_lock);
+}
+
+/*
+ * Perform a IOAT transaction to verify the HW works.
+ */
+#define IOAT_TEST_SIZE 2000
+
+static void ioat_dma_test_callback(void *dma_async_param)
+{
+	struct completion *cmp = dma_async_param;
+
+	complete(cmp);
+}
+
+/**
+ * ioat_dma_self_test - Perform a IOAT transaction to verify the HW works.
+ * @device: device to be tested
+ */
+static int ioat_dma_self_test(struct ioatdma_device *device)
+{
+	int i;
+	u8 *src;
+	u8 *dest;
+	struct dma_chan *dma_chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_addr_t dma_dest, dma_src;
+	dma_cookie_t cookie;
+	int err = 0;
+	struct completion cmp;
+
+	src = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(sizeof(u8) * IOAT_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOAT_TEST_SIZE; i++)
+		src[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (device->common.device_alloc_chan_resources(dma_chan, NULL) < 1) {
+		dev_err(&device->pdev->dev,
+			"selftest cannot allocate chan resource\n");
+		err = -ENODEV;
+		goto out;
+	}
+
+	dma_src = dma_map_single(dma_chan->device->dev, src, IOAT_TEST_SIZE,
+				 DMA_TO_DEVICE);
+	dma_dest = dma_map_single(dma_chan->device->dev, dest, IOAT_TEST_SIZE,
+				  DMA_FROM_DEVICE);
+	tx = device->common.device_prep_dma_memcpy(dma_chan, dma_dest, dma_src,
+						   IOAT_TEST_SIZE, 0);
+	if (!tx) {
+		dev_err(&device->pdev->dev,
+			"Self-test prep failed, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	async_tx_ack(tx);
+	init_completion(&cmp);
+	tx->callback = ioat_dma_test_callback;
+	tx->callback_param = &cmp;
+	cookie = tx->tx_submit(tx);
+	if (cookie < 0) {
+		dev_err(&device->pdev->dev,
+			"Self-test setup failed, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	device->common.device_issue_pending(dma_chan);
+
+	wait_for_completion_timeout(&cmp, msecs_to_jiffies(3000));
+
+	if (device->common.device_is_tx_complete(dma_chan, cookie, NULL, NULL)
+					!= DMA_SUCCESS) {
+		dev_err(&device->pdev->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+	if (memcmp(src, dest, IOAT_TEST_SIZE)) {
+		dev_err(&device->pdev->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	device->common.device_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+static char ioat_interrupt_style[32] = "msix";
+module_param_string(ioat_interrupt_style, ioat_interrupt_style,
+		    sizeof(ioat_interrupt_style), 0644);
+MODULE_PARM_DESC(ioat_interrupt_style,
+		 "set ioat interrupt style: msix (default), "
+		 "msix-single-vector, msi, intx)");
+
+/**
+ * ioat_dma_setup_interrupts - setup interrupt handler
+ * @device: ioat device
+ */
+static int ioat_dma_setup_interrupts(struct ioatdma_device *device)
+{
+	struct ioat_dma_chan *ioat_chan;
+	int err, i, j, msixcnt;
+	u8 intrctrl = 0;
+
+	if (!strcmp(ioat_interrupt_style, "msix"))
+		goto msix;
+	if (!strcmp(ioat_interrupt_style, "msix-single-vector"))
+		goto msix_single_vector;
+	if (!strcmp(ioat_interrupt_style, "msi"))
+		goto msi;
+	if (!strcmp(ioat_interrupt_style, "intx"))
+		goto intx;
+	dev_err(&device->pdev->dev, "invalid ioat_interrupt_style %s\n",
+		ioat_interrupt_style);
+	goto err_no_irq;
+
+msix:
+	/* The number of MSI-X vectors should equal the number of channels */
+	msixcnt = device->common.chancnt;
+	for (i = 0; i < msixcnt; i++)
+		device->msix_entries[i].entry = i;
+
+	err = pci_enable_msix(device->pdev, device->msix_entries, msixcnt);
+	if (err < 0)
+		goto msi;
+	if (err > 0)
+		goto msix_single_vector;
+
+	for (i = 0; i < msixcnt; i++) {
+		ioat_chan = ioat_lookup_chan_by_index(device, i);
+		err = request_irq(device->msix_entries[i].vector,
+				  ioat_dma_do_interrupt_msix,
+				  0, "ioat-msix", ioat_chan);
+		if (err) {
+			for (j = 0; j < i; j++) {
+				ioat_chan =
+					ioat_lookup_chan_by_index(device, j);
+				free_irq(device->msix_entries[j].vector,
+					 ioat_chan);
+			}
+			goto msix_single_vector;
+		}
+	}
+	intrctrl |= IOAT_INTRCTRL_MSIX_VECTOR_CONTROL;
+	device->irq_mode = msix_multi_vector;
+	goto done;
+
+msix_single_vector:
+	device->msix_entries[0].entry = 0;
+	err = pci_enable_msix(device->pdev, device->msix_entries, 1);
+	if (err)
+		goto msi;
+
+	err = request_irq(device->msix_entries[0].vector, ioat_dma_do_interrupt,
+			  0, "ioat-msix", device);
+	if (err) {
+		pci_disable_msix(device->pdev);
+		goto msi;
+	}
+	device->irq_mode = msix_single_vector;
+	goto done;
+
+msi:
+	err = pci_enable_msi(device->pdev);
+	if (err)
+		goto intx;
+
+	err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
+			  0, "ioat-msi", device);
+	if (err) {
+		pci_disable_msi(device->pdev);
+		goto intx;
+	}
+	/*
+	 * CB 1.2 devices need a bit set in configuration space to enable MSI
+	 */
+	if (device->version == IOAT_VER_1_2) {
+		u32 dmactrl;
+		pci_read_config_dword(device->pdev,
+				      IOAT_PCI_DMACTRL_OFFSET, &dmactrl);
+		dmactrl |= IOAT_PCI_DMACTRL_MSI_EN;
+		pci_write_config_dword(device->pdev,
+				       IOAT_PCI_DMACTRL_OFFSET, dmactrl);
+	}
+	device->irq_mode = msi;
+	goto done;
+
+intx:
+	err = request_irq(device->pdev->irq, ioat_dma_do_interrupt,
+			  IRQF_SHARED, "ioat-intx", device);
+	if (err)
+		goto err_no_irq;
+	device->irq_mode = intx;
+
+done:
+	intrctrl |= IOAT_INTRCTRL_MASTER_INT_EN;
+	writeb(intrctrl, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	return 0;
+
+err_no_irq:
+	/* Disable all interrupt generation */
+	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+	dev_err(&device->pdev->dev, "no usable interrupts\n");
+	device->irq_mode = none;
+	return -1;
+}
+
+/**
+ * ioat_dma_remove_interrupts - remove whatever interrupts were set
+ * @device: ioat device
+ */
+static void ioat_dma_remove_interrupts(struct ioatdma_device *device)
+{
+	struct ioat_dma_chan *ioat_chan;
+	int i;
+
+	/* Disable all interrupt generation */
+	writeb(0, device->reg_base + IOAT_INTRCTRL_OFFSET);
+
+	switch (device->irq_mode) {
+	case msix_multi_vector:
+		for (i = 0; i < device->common.chancnt; i++) {
+			ioat_chan = ioat_lookup_chan_by_index(device, i);
+			free_irq(device->msix_entries[i].vector, ioat_chan);
+		}
+		pci_disable_msix(device->pdev);
+		break;
+	case msix_single_vector:
+		free_irq(device->msix_entries[0].vector, device);
+		pci_disable_msix(device->pdev);
+		break;
+	case msi:
+		free_irq(device->pdev->irq, device);
+		pci_disable_msi(device->pdev);
+		break;
+	case intx:
+		free_irq(device->pdev->irq, device);
+		break;
+	case none:
+		dev_warn(&device->pdev->dev,
+			 "call to %s without interrupts setup\n", __func__);
+	}
+	device->irq_mode = none;
+}
+
+struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
+				      void __iomem *iobase)
+{
+	int err;
+	struct ioatdma_device *device;
+
+	device = kzalloc(sizeof(*device), GFP_KERNEL);
+	if (!device) {
+		err = -ENOMEM;
+		goto err_kzalloc;
+	}
+	device->pdev = pdev;
+	device->reg_base = iobase;
+	device->version = readb(device->reg_base + IOAT_VER_OFFSET);
+
+	/* DMA coherent memory pool for DMA descriptor allocations */
+	device->dma_pool = pci_pool_create("dma_desc_pool", pdev,
+					   sizeof(struct ioat_dma_descriptor),
+					   64, 0);
+	if (!device->dma_pool) {
+		err = -ENOMEM;
+		goto err_dma_pool;
+	}
+
+	device->completion_pool = pci_pool_create("completion_pool", pdev,
+						  sizeof(u64), SMP_CACHE_BYTES,
+						  SMP_CACHE_BYTES);
+	if (!device->completion_pool) {
+		err = -ENOMEM;
+		goto err_completion_pool;
+	}
+
+	INIT_LIST_HEAD(&device->common.channels);
+	ioat_dma_enumerate_channels(device);
+
+	device->common.device_alloc_chan_resources =
+						ioat_dma_alloc_chan_resources;
+	device->common.device_free_chan_resources =
+						ioat_dma_free_chan_resources;
+	device->common.dev = &pdev->dev;
+
+	dma_cap_set(DMA_MEMCPY, device->common.cap_mask);
+	device->common.device_is_tx_complete = ioat_dma_is_complete;
+	switch (device->version) {
+	case IOAT_VER_1_2:
+		device->common.device_prep_dma_memcpy = ioat1_dma_prep_memcpy;
+		device->common.device_issue_pending =
+						ioat1_dma_memcpy_issue_pending;
+		break;
+	case IOAT_VER_2_0:
+	case IOAT_VER_3_0:
+		device->common.device_prep_dma_memcpy = ioat2_dma_prep_memcpy;
+		device->common.device_issue_pending =
+						ioat2_dma_memcpy_issue_pending;
+		break;
+	}
+
+	dev_err(&device->pdev->dev,
+		"Intel(R) I/OAT DMA Engine found,"
+		" %d channels, device version 0x%02x, driver version %s\n",
+		device->common.chancnt, device->version, IOAT_DMA_VERSION);
+
+	err = ioat_dma_setup_interrupts(device);
+	if (err)
+		goto err_setup_interrupts;
+
+	err = ioat_dma_self_test(device);
+	if (err)
+		goto err_self_test;
+
+	ioat_set_tcp_copy_break(device);
+
+	dma_async_device_register(&device->common);
+
+	if (device->version != IOAT_VER_3_0) {
+		INIT_DELAYED_WORK(&device->work, ioat_dma_chan_watchdog);
+		schedule_delayed_work(&device->work,
+				      WATCHDOG_DELAY);
+	}
+
+	return device;
+
+err_self_test:
+	ioat_dma_remove_interrupts(device);
+err_setup_interrupts:
+	pci_pool_destroy(device->completion_pool);
+err_completion_pool:
+	pci_pool_destroy(device->dma_pool);
+err_dma_pool:
+	kfree(device);
+err_kzalloc:
+	dev_err(&pdev->dev,
+		"Intel(R) I/OAT DMA Engine initialization failed\n");
+	return NULL;
+}
+
+void ioat_dma_remove(struct ioatdma_device *device)
+{
+	struct dma_chan *chan, *_chan;
+	struct ioat_dma_chan *ioat_chan;
+
+	ioat_dma_remove_interrupts(device);
+
+	dma_async_device_unregister(&device->common);
+
+	pci_pool_destroy(device->dma_pool);
+	pci_pool_destroy(device->completion_pool);
+
+	iounmap(device->reg_base);
+	pci_release_regions(device->pdev);
+	pci_disable_device(device->pdev);
+
+	if (device->version != IOAT_VER_3_0) {
+		cancel_delayed_work(&device->work);
+	}
+
+	list_for_each_entry_safe(chan, _chan,
+				 &device->common.channels, device_node) {
+		ioat_chan = to_ioat_chan(chan);
+		list_del(&chan->device_node);
+		kfree(ioat_chan);
+	}
+	kfree(device);
+}
+
diff --git a/drivers/dma/ioatdma.h b/drivers/dma/ioatdma.h
new file mode 100644
index 0000000..a3306d0
--- /dev/null
+++ b/drivers/dma/ioatdma.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOATDMA_H
+#define IOATDMA_H
+
+#include <linux/dmaengine.h>
+#include "ioatdma_hw.h"
+#include <linux/init.h>
+#include <linux/dmapool.h>
+#include <linux/cache.h>
+#include <linux/pci_ids.h>
+#include <net/tcp.h>
+
+#define IOAT_DMA_VERSION  "3.30"
+
+enum ioat_interrupt {
+	none = 0,
+	msix_multi_vector = 1,
+	msix_single_vector = 2,
+	msi = 3,
+	intx = 4,
+};
+
+#define IOAT_LOW_COMPLETION_MASK	0xffffffc0
+#define IOAT_DMA_DCA_ANY_CPU		~0
+#define IOAT_WATCHDOG_PERIOD		(2 * HZ)
+
+
+/**
+ * struct ioatdma_device - internal representation of a IOAT device
+ * @pdev: PCI-Express device
+ * @reg_base: MMIO register space base address
+ * @dma_pool: for allocating DMA descriptors
+ * @common: embedded struct dma_device
+ * @version: version of ioatdma device
+ * @irq_mode: which style irq to use
+ * @msix_entries: irq handlers
+ * @idx: per channel data
+ */
+
+struct ioatdma_device {
+	struct pci_dev *pdev;
+	void __iomem *reg_base;
+	struct pci_pool *dma_pool;
+	struct pci_pool *completion_pool;
+	struct dma_device common;
+	u8 version;
+	enum ioat_interrupt irq_mode;
+	struct delayed_work work;
+	struct msix_entry msix_entries[4];
+	struct ioat_dma_chan *idx[4];
+};
+
+/**
+ * struct ioat_dma_chan - internal representation of a DMA channel
+ */
+struct ioat_dma_chan {
+
+	void __iomem *reg_base;
+
+	dma_cookie_t completed_cookie;
+	unsigned long last_completion;
+	unsigned long last_completion_time;
+
+	size_t xfercap;	/* XFERCAP register value expanded out */
+
+	spinlock_t cleanup_lock;
+	spinlock_t desc_lock;
+	struct list_head free_desc;
+	struct list_head used_desc;
+	unsigned long watchdog_completion;
+	int watchdog_tcp_cookie;
+	u32 watchdog_last_tcp_cookie;
+	struct delayed_work work;
+
+	int pending;
+	int dmacount;
+	int desccount;
+
+	struct ioatdma_device *device;
+	struct dma_chan common;
+
+	dma_addr_t completion_addr;
+	union {
+		u64 full; /* HW completion writeback */
+		struct {
+			u32 low;
+			u32 high;
+		};
+	} *completion_virt;
+	unsigned long last_compl_desc_addr_hw;
+	struct tasklet_struct cleanup_task;
+};
+
+/* wrapper around hardware descriptor format + additional software fields */
+
+/**
+ * struct ioat_desc_sw - wrapper around hardware descriptor
+ * @hw: hardware DMA descriptor
+ * @node: this descriptor will either be on the free list,
+ *     or attached to a transaction list (async_tx.tx_list)
+ * @tx_cnt: number of descriptors required to complete the transaction
+ * @async_tx: the generic software descriptor for all engines
+ */
+struct ioat_desc_sw {
+	struct ioat_dma_descriptor *hw;
+	struct list_head node;
+	int tx_cnt;
+	size_t len;
+	dma_addr_t src;
+	dma_addr_t dst;
+	struct dma_async_tx_descriptor async_tx;
+};
+
+static inline void ioat_set_tcp_copy_break(struct ioatdma_device *dev)
+{
+	#ifdef CONFIG_NET_DMA
+	switch (dev->version) {
+	case IOAT_VER_1_2:
+	case IOAT_VER_3_0:
+		sysctl_tcp_dma_copybreak = 4096;
+		break;
+	case IOAT_VER_2_0:
+		sysctl_tcp_dma_copybreak = 2048;
+		break;
+	}
+	#endif
+}
+
+#if defined(CONFIG_INTEL_IOATDMA) || defined(CONFIG_INTEL_IOATDMA_MODULE)
+struct ioatdma_device *ioat_dma_probe(struct pci_dev *pdev,
+				      void __iomem *iobase);
+void ioat_dma_remove(struct ioatdma_device *device);
+struct dca_provider *ioat_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider *ioat2_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+struct dca_provider *ioat3_dca_init(struct pci_dev *pdev, void __iomem *iobase);
+#else
+#define ioat_dma_probe(pdev, iobase)    NULL
+#define ioat_dma_remove(device)         do { } while (0)
+#define ioat_dca_init(pdev, iobase)	NULL
+#define ioat2_dca_init(pdev, iobase)	NULL
+#define ioat3_dca_init(pdev, iobase)	NULL
+#endif
+
+#endif /* IOATDMA_H */
diff --git a/drivers/dma/ioatdma_hw.h b/drivers/dma/ioatdma_hw.h
new file mode 100644
index 0000000..f1ae2c7
--- /dev/null
+++ b/drivers/dma/ioatdma_hw.h
@@ -0,0 +1,70 @@
+/*
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_HW_H_
+#define _IOAT_HW_H_
+
+/* PCI Configuration Space Values */
+#define IOAT_PCI_VID            0x8086
+
+/* CB device ID's */
+#define IOAT_PCI_DID_5000       0x1A38
+#define IOAT_PCI_DID_CNB        0x360B
+#define IOAT_PCI_DID_SCNB       0x65FF
+#define IOAT_PCI_DID_SNB        0x402F
+
+#define IOAT_PCI_RID            0x00
+#define IOAT_PCI_SVID           0x8086
+#define IOAT_PCI_SID            0x8086
+#define IOAT_VER_1_2            0x12    /* Version 1.2 */
+#define IOAT_VER_2_0            0x20    /* Version 2.0 */
+#define IOAT_VER_3_0            0x30    /* Version 3.0 */
+
+struct ioat_dma_descriptor {
+	uint32_t	size;
+	uint32_t	ctl;
+	uint64_t	src_addr;
+	uint64_t	dst_addr;
+	uint64_t	next;
+	uint64_t	rsv1;
+	uint64_t	rsv2;
+	uint64_t	user1;
+	uint64_t	user2;
+};
+
+#define IOAT_DMA_DESCRIPTOR_CTL_INT_GN	0x00000001
+#define IOAT_DMA_DESCRIPTOR_CTL_SRC_SN	0x00000002
+#define IOAT_DMA_DESCRIPTOR_CTL_DST_SN	0x00000004
+#define IOAT_DMA_DESCRIPTOR_CTL_CP_STS	0x00000008
+#define IOAT_DMA_DESCRIPTOR_CTL_FRAME	0x00000010
+#define IOAT_DMA_DESCRIPTOR_NUL		0x00000020
+#define IOAT_DMA_DESCRIPTOR_CTL_SP_BRK	0x00000040
+#define IOAT_DMA_DESCRIPTOR_CTL_DP_BRK	0x00000080
+#define IOAT_DMA_DESCRIPTOR_CTL_BNDL	0x00000100
+#define IOAT_DMA_DESCRIPTOR_CTL_DCA	0x00000200
+#define IOAT_DMA_DESCRIPTOR_CTL_BUFHINT	0x00000400
+
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_CONTEXT	0xFF000000
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_DMA	0x00000000
+
+#define IOAT_DMA_DESCRIPTOR_CTL_CONTEXT_DCA	0x00000001
+#define IOAT_DMA_DESCRIPTOR_CTL_OPCODE_MASK	0xFF000000
+
+#endif
diff --git a/drivers/dma/ioatdma_registers.h b/drivers/dma/ioatdma_registers.h
new file mode 100644
index 0000000..827cb50
--- /dev/null
+++ b/drivers/dma/ioatdma_registers.h
@@ -0,0 +1,226 @@
+/*
+ * Copyright(c) 2004 - 2007 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef _IOAT_REGISTERS_H_
+#define _IOAT_REGISTERS_H_
+
+#define IOAT_PCI_DMACTRL_OFFSET			0x48
+#define IOAT_PCI_DMACTRL_DMA_EN			0x00000001
+#define IOAT_PCI_DMACTRL_MSI_EN			0x00000002
+
+#define IOAT_PCI_DEVICE_ID_OFFSET		0x02
+#define IOAT_PCI_DMAUNCERRSTS_OFFSET		0x148
+#define IOAT_PCI_CHANERRMASK_INT_OFFSET		0x184
+
+/* MMIO Device Registers */
+#define IOAT_CHANCNT_OFFSET			0x00	/*  8-bit */
+
+#define IOAT_XFERCAP_OFFSET			0x01	/*  8-bit */
+#define IOAT_XFERCAP_4KB			12
+#define IOAT_XFERCAP_8KB			13
+#define IOAT_XFERCAP_16KB			14
+#define IOAT_XFERCAP_32KB			15
+#define IOAT_XFERCAP_32GB			0
+
+#define IOAT_GENCTRL_OFFSET			0x02	/*  8-bit */
+#define IOAT_GENCTRL_DEBUG_EN			0x01
+
+#define IOAT_INTRCTRL_OFFSET			0x03	/*  8-bit */
+#define IOAT_INTRCTRL_MASTER_INT_EN		0x01	/* Master Interrupt Enable */
+#define IOAT_INTRCTRL_INT_STATUS		0x02	/* ATTNSTATUS -or- Channel Int */
+#define IOAT_INTRCTRL_INT			0x04	/* INT_STATUS -and- MASTER_INT_EN */
+#define IOAT_INTRCTRL_MSIX_VECTOR_CONTROL	0x08	/* Enable all MSI-X vectors */
+
+#define IOAT_ATTNSTATUS_OFFSET			0x04	/* Each bit is a channel */
+
+#define IOAT_VER_OFFSET				0x08	/*  8-bit */
+#define IOAT_VER_MAJOR_MASK			0xF0
+#define IOAT_VER_MINOR_MASK			0x0F
+#define GET_IOAT_VER_MAJOR(x)			(((x) & IOAT_VER_MAJOR_MASK) >> 4)
+#define GET_IOAT_VER_MINOR(x)			((x) & IOAT_VER_MINOR_MASK)
+
+#define IOAT_PERPORTOFFSET_OFFSET		0x0A	/* 16-bit */
+
+#define IOAT_INTRDELAY_OFFSET			0x0C	/* 16-bit */
+#define IOAT_INTRDELAY_INT_DELAY_MASK		0x3FFF	/* Interrupt Delay Time */
+#define IOAT_INTRDELAY_COALESE_SUPPORT		0x8000	/* Interrupt Coalescing Supported */
+
+#define IOAT_DEVICE_STATUS_OFFSET		0x0E	/* 16-bit */
+#define IOAT_DEVICE_STATUS_DEGRADED_MODE	0x0001
+
+#define IOAT_CHANNEL_MMIO_SIZE			0x80	/* Each Channel MMIO space is this size */
+
+/* DMA Channel Registers */
+#define IOAT_CHANCTRL_OFFSET			0x00	/* 16-bit Channel Control Register */
+#define IOAT_CHANCTRL_CHANNEL_PRIORITY_MASK	0xF000
+#define IOAT_CHANCTRL_CHANNEL_IN_USE		0x0100
+#define IOAT_CHANCTRL_DESCRIPTOR_ADDR_SNOOP_CONTROL	0x0020
+#define IOAT_CHANCTRL_ERR_INT_EN		0x0010
+#define IOAT_CHANCTRL_ANY_ERR_ABORT_EN		0x0008
+#define IOAT_CHANCTRL_ERR_COMPLETION_EN		0x0004
+#define IOAT_CHANCTRL_INT_DISABLE		0x0001
+
+#define IOAT_DMA_COMP_OFFSET			0x02	/* 16-bit DMA channel compatibility */
+#define IOAT_DMA_COMP_V1			0x0001	/* Compatibility with DMA version 1 */
+#define IOAT_DMA_COMP_V2			0x0002	/* Compatibility with DMA version 2 */
+
+
+#define IOAT1_CHANSTS_OFFSET		0x04	/* 64-bit Channel Status Register */
+#define IOAT2_CHANSTS_OFFSET		0x08	/* 64-bit Channel Status Register */
+#define IOAT_CHANSTS_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET : IOAT2_CHANSTS_OFFSET)
+#define IOAT1_CHANSTS_OFFSET_LOW	0x04
+#define IOAT2_CHANSTS_OFFSET_LOW	0x08
+#define IOAT_CHANSTS_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET_LOW : IOAT2_CHANSTS_OFFSET_LOW)
+#define IOAT1_CHANSTS_OFFSET_HIGH	0x08
+#define IOAT2_CHANSTS_OFFSET_HIGH	0x0C
+#define IOAT_CHANSTS_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANSTS_OFFSET_HIGH : IOAT2_CHANSTS_OFFSET_HIGH)
+#define IOAT_CHANSTS_COMPLETED_DESCRIPTOR_ADDR	~0x3F
+#define IOAT_CHANSTS_SOFT_ERR			0x0000000000000010
+#define IOAT_CHANSTS_UNAFFILIATED_ERR		0x0000000000000008
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS	0x0000000000000007
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_ACTIVE	0x0
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_DONE	0x1
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_SUSPENDED	0x2
+#define IOAT_CHANSTS_DMA_TRANSFER_STATUS_HALTED	0x3
+
+
+
+#define IOAT_CHAN_DMACOUNT_OFFSET	0x06    /* 16-bit DMA Count register */
+
+#define IOAT_DCACTRL_OFFSET         0x30   /* 32 bit Direct Cache Access Control Register */
+#define IOAT_DCACTRL_CMPL_WRITE_ENABLE 0x10000
+#define IOAT_DCACTRL_TARGET_CPU_MASK   0xFFFF /* APIC ID */
+
+/* CB DCA Memory Space Registers */
+#define IOAT_DCAOFFSET_OFFSET       0x14
+/* CB_BAR + IOAT_DCAOFFSET value */
+#define IOAT_DCA_VER_OFFSET         0x00
+#define IOAT_DCA_VER_MAJOR_MASK     0xF0
+#define IOAT_DCA_VER_MINOR_MASK     0x0F
+
+#define IOAT_DCA_COMP_OFFSET        0x02
+#define IOAT_DCA_COMP_V1            0x1
+
+#define IOAT_FSB_CAPABILITY_OFFSET  0x04
+#define IOAT_FSB_CAPABILITY_PREFETCH    0x1
+
+#define IOAT_PCI_CAPABILITY_OFFSET  0x06
+#define IOAT_PCI_CAPABILITY_MEMWR   0x1
+
+#define IOAT_FSB_CAP_ENABLE_OFFSET  0x08
+#define IOAT_FSB_CAP_ENABLE_PREFETCH    0x1
+
+#define IOAT_PCI_CAP_ENABLE_OFFSET  0x0A
+#define IOAT_PCI_CAP_ENABLE_MEMWR   0x1
+
+#define IOAT_APICID_TAG_MAP_OFFSET  0x0C
+#define IOAT_APICID_TAG_MAP_TAG0    0x0000000F
+#define IOAT_APICID_TAG_MAP_TAG0_SHIFT 0
+#define IOAT_APICID_TAG_MAP_TAG1    0x000000F0
+#define IOAT_APICID_TAG_MAP_TAG1_SHIFT 4
+#define IOAT_APICID_TAG_MAP_TAG2    0x00000F00
+#define IOAT_APICID_TAG_MAP_TAG2_SHIFT 8
+#define IOAT_APICID_TAG_MAP_TAG3    0x0000F000
+#define IOAT_APICID_TAG_MAP_TAG3_SHIFT 12
+#define IOAT_APICID_TAG_MAP_TAG4    0x000F0000
+#define IOAT_APICID_TAG_MAP_TAG4_SHIFT 16
+#define IOAT_APICID_TAG_CB2_VALID   0x8080808080
+
+#define IOAT_DCA_GREQID_OFFSET      0x10
+#define IOAT_DCA_GREQID_SIZE        0x04
+#define IOAT_DCA_GREQID_MASK        0xFFFF
+#define IOAT_DCA_GREQID_IGNOREFUN   0x10000000
+#define IOAT_DCA_GREQID_VALID       0x20000000
+#define IOAT_DCA_GREQID_LASTID      0x80000000
+
+#define IOAT3_CSI_CAPABILITY_OFFSET 0x08
+#define IOAT3_CSI_CAPABILITY_PREFETCH    0x1
+
+#define IOAT3_PCI_CAPABILITY_OFFSET 0x0A
+#define IOAT3_PCI_CAPABILITY_MEMWR  0x1
+
+#define IOAT3_CSI_CONTROL_OFFSET    0x0C
+#define IOAT3_CSI_CONTROL_PREFETCH  0x1
+
+#define IOAT3_PCI_CONTROL_OFFSET    0x0E
+#define IOAT3_PCI_CONTROL_MEMWR     0x1
+
+#define IOAT3_APICID_TAG_MAP_OFFSET 0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_LOW  0x10
+#define IOAT3_APICID_TAG_MAP_OFFSET_HIGH 0x14
+
+#define IOAT3_DCA_GREQID_OFFSET     0x02
+
+#define IOAT1_CHAINADDR_OFFSET		0x0C	/* 64-bit Descriptor Chain Address Register */
+#define IOAT2_CHAINADDR_OFFSET		0x10	/* 64-bit Descriptor Chain Address Register */
+#define IOAT_CHAINADDR_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET : IOAT2_CHAINADDR_OFFSET)
+#define IOAT1_CHAINADDR_OFFSET_LOW	0x0C
+#define IOAT2_CHAINADDR_OFFSET_LOW	0x10
+#define IOAT_CHAINADDR_OFFSET_LOW(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_LOW : IOAT2_CHAINADDR_OFFSET_LOW)
+#define IOAT1_CHAINADDR_OFFSET_HIGH	0x10
+#define IOAT2_CHAINADDR_OFFSET_HIGH	0x14
+#define IOAT_CHAINADDR_OFFSET_HIGH(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHAINADDR_OFFSET_HIGH : IOAT2_CHAINADDR_OFFSET_HIGH)
+
+#define IOAT1_CHANCMD_OFFSET		0x14	/*  8-bit DMA Channel Command Register */
+#define IOAT2_CHANCMD_OFFSET		0x04	/*  8-bit DMA Channel Command Register */
+#define IOAT_CHANCMD_OFFSET(ver)		((ver) < IOAT_VER_2_0 \
+						? IOAT1_CHANCMD_OFFSET : IOAT2_CHANCMD_OFFSET)
+#define IOAT_CHANCMD_RESET			0x20
+#define IOAT_CHANCMD_RESUME			0x10
+#define IOAT_CHANCMD_ABORT			0x08
+#define IOAT_CHANCMD_SUSPEND			0x04
+#define IOAT_CHANCMD_APPEND			0x02
+#define IOAT_CHANCMD_START			0x01
+
+#define IOAT_CHANCMP_OFFSET			0x18	/* 64-bit Channel Completion Address Register */
+#define IOAT_CHANCMP_OFFSET_LOW			0x18
+#define IOAT_CHANCMP_OFFSET_HIGH		0x1C
+
+#define IOAT_CDAR_OFFSET			0x20	/* 64-bit Current Descriptor Address Register */
+#define IOAT_CDAR_OFFSET_LOW			0x20
+#define IOAT_CDAR_OFFSET_HIGH			0x24
+
+#define IOAT_CHANERR_OFFSET			0x28	/* 32-bit Channel Error Register */
+#define IOAT_CHANERR_DMA_TRANSFER_SRC_ADDR_ERR	0x0001
+#define IOAT_CHANERR_DMA_TRANSFER_DEST_ADDR_ERR	0x0002
+#define IOAT_CHANERR_NEXT_DESCRIPTOR_ADDR_ERR	0x0004
+#define IOAT_CHANERR_NEXT_DESCRIPTOR_ALIGNMENT_ERR	0x0008
+#define IOAT_CHANERR_CHAIN_ADDR_VALUE_ERR	0x0010
+#define IOAT_CHANERR_CHANCMD_ERR		0x0020
+#define IOAT_CHANERR_CHIPSET_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0040
+#define IOAT_CHANERR_DMA_UNCORRECTABLE_DATA_INTEGRITY_ERR	0x0080
+#define IOAT_CHANERR_READ_DATA_ERR		0x0100
+#define IOAT_CHANERR_WRITE_DATA_ERR		0x0200
+#define IOAT_CHANERR_DESCRIPTOR_CONTROL_ERR	0x0400
+#define IOAT_CHANERR_DESCRIPTOR_LENGTH_ERR	0x0800
+#define IOAT_CHANERR_COMPLETION_ADDR_ERR	0x1000
+#define IOAT_CHANERR_INT_CONFIGURATION_ERR	0x2000
+#define IOAT_CHANERR_SOFT_ERR			0x4000
+#define IOAT_CHANERR_UNAFFILIATED_ERR		0x8000
+
+#define IOAT_CHANERR_MASK_OFFSET		0x2C	/* 32-bit Channel Error Register */
+
+#endif /* _IOAT_REGISTERS_H_ */
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 0000000..6be3172
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1448 @@
+/*
+ * offload engine driver for the Intel Xscale series of i/o processors
+ * Copyright © 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <linux/ioport.h>
+
+#include <mach/adma.h>
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) \
+	container_of(dev, struct iop_adma_device, common)
+#define tx_to_iop_adma_slot(tx) \
+	container_of(tx, struct iop_adma_desc_slot, async_tx)
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+	int stride = slot->slots_per_op;
+
+	while (stride--) {
+		slot->slots_per_op = 0;
+		slot = list_entry(slot->slot_node.next,
+				struct iop_adma_desc_slot,
+				slot_node);
+	}
+}
+
+static dma_cookie_t
+iop_adma_run_tx_complete_actions(struct iop_adma_desc_slot *desc,
+	struct iop_adma_chan *iop_chan, dma_cookie_t cookie)
+{
+	BUG_ON(desc->async_tx.cookie < 0);
+	if (desc->async_tx.cookie > 0) {
+		cookie = desc->async_tx.cookie;
+		desc->async_tx.cookie = 0;
+
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		if (desc->async_tx.callback)
+			desc->async_tx.callback(
+				desc->async_tx.callback_param);
+
+		/* unmap dma addresses
+		 * (unmap_single vs unmap_page?)
+		 */
+		if (desc->group_head && desc->unmap_len) {
+			struct iop_adma_desc_slot *unmap = desc->group_head;
+			struct device *dev =
+				&iop_chan->device->pdev->dev;
+			u32 len = unmap->unmap_len;
+			enum dma_ctrl_flags flags = desc->async_tx.flags;
+			u32 src_cnt;
+			dma_addr_t addr;
+			dma_addr_t dest;
+
+			src_cnt = unmap->unmap_src_cnt;
+			dest = iop_desc_get_dest_addr(unmap, iop_chan);
+			if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+				enum dma_data_direction dir;
+
+				if (src_cnt > 1) /* is xor? */
+					dir = DMA_BIDIRECTIONAL;
+				else
+					dir = DMA_FROM_DEVICE;
+
+				dma_unmap_page(dev, dest, len, dir);
+			}
+
+			if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+				while (src_cnt--) {
+					addr = iop_desc_get_src_addr(unmap,
+								     iop_chan,
+								     src_cnt);
+					if (addr == dest)
+						continue;
+					dma_unmap_page(dev, addr, len,
+						       DMA_TO_DEVICE);
+				}
+			}
+			desc->group_head = NULL;
+		}
+	}
+
+	/* run dependent operations */
+	async_tx_run_dependencies(&desc->async_tx);
+
+	return cookie;
+}
+
+static int
+iop_adma_clean_slot(struct iop_adma_desc_slot *desc,
+	struct iop_adma_chan *iop_chan)
+{
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx))
+		return 0;
+
+	/* leave the last descriptor in the chain
+	 * so we can append to it
+	 */
+	if (desc->chain_node.next == &iop_chan->chain)
+		return 1;
+
+	dev_dbg(iop_chan->device->common.dev,
+		"\tfree slot: %d slots_per_op: %d\n",
+		desc->idx, desc->slots_per_op);
+
+	list_del(&desc->chain_node);
+	iop_adma_free_slots(desc);
+
+	return 0;
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *iter, *_iter, *grp_start = NULL;
+	dma_cookie_t cookie = 0;
+	u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+	int busy = iop_chan_is_busy(iop_chan);
+	int seen_current = 0, slot_cnt = 0, slots_per_op = 0;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		pr_debug("\tcookie: %d slot: %d busy: %d "
+			"this_desc: %#x next_desc: %#x ack: %d\n",
+			iter->async_tx.cookie, iter->idx, busy,
+			iter->async_tx.phys, iop_desc_get_next_desc(iter),
+			async_tx_test_ack(&iter->async_tx));
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy, or if it appears that the current descriptor
+		 * needs to be re-read (i.e. has been appended to)
+		 */
+		if (iter->async_tx.phys == current_desc) {
+			BUG_ON(seen_current++);
+			if (busy || iop_desc_get_next_desc(iter))
+				break;
+		}
+
+		/* detect the start of a group transaction */
+		if (!slot_cnt && !slots_per_op) {
+			slot_cnt = iter->slot_cnt;
+			slots_per_op = iter->slots_per_op;
+			if (slot_cnt <= slots_per_op) {
+				slot_cnt = 0;
+				slots_per_op = 0;
+			}
+		}
+
+		if (slot_cnt) {
+			pr_debug("\tgroup++\n");
+			if (!grp_start)
+				grp_start = iter;
+			slot_cnt -= slots_per_op;
+		}
+
+		/* all the members of a group are complete */
+		if (slots_per_op != 0 && slot_cnt == 0) {
+			struct iop_adma_desc_slot *grp_iter, *_grp_iter;
+			int end_of_chain = 0;
+			pr_debug("\tgroup end\n");
+
+			/* collect the total results */
+			if (grp_start->xor_check_result) {
+				u32 zero_sum_result = 0;
+				slot_cnt = grp_start->slot_cnt;
+				grp_iter = grp_start;
+
+				list_for_each_entry_from(grp_iter,
+					&iop_chan->chain, chain_node) {
+					zero_sum_result |=
+					    iop_desc_get_zero_result(grp_iter);
+					    pr_debug("\titer%d result: %d\n",
+					    grp_iter->idx, zero_sum_result);
+					slot_cnt -= slots_per_op;
+					if (slot_cnt == 0)
+						break;
+				}
+				pr_debug("\tgrp_start->xor_check_result: %p\n",
+					grp_start->xor_check_result);
+				*grp_start->xor_check_result = zero_sum_result;
+			}
+
+			/* clean up the group */
+			slot_cnt = grp_start->slot_cnt;
+			grp_iter = grp_start;
+			list_for_each_entry_safe_from(grp_iter, _grp_iter,
+				&iop_chan->chain, chain_node) {
+				cookie = iop_adma_run_tx_complete_actions(
+					grp_iter, iop_chan, cookie);
+
+				slot_cnt -= slots_per_op;
+				end_of_chain = iop_adma_clean_slot(grp_iter,
+					iop_chan);
+
+				if (slot_cnt == 0 || end_of_chain)
+					break;
+			}
+
+			/* the group should be complete at this point */
+			BUG_ON(slot_cnt);
+
+			slots_per_op = 0;
+			grp_start = NULL;
+			if (end_of_chain)
+				break;
+			else
+				continue;
+		} else if (slots_per_op) /* wait for group completion */
+			continue;
+
+		/* write back zero sum results (single descriptor case) */
+		if (iter->xor_check_result && iter->async_tx.cookie)
+			*iter->xor_check_result =
+				iop_desc_get_zero_result(iter);
+
+		cookie = iop_adma_run_tx_complete_actions(
+					iter, iop_chan, cookie);
+
+		if (iop_adma_clean_slot(iter, iop_chan))
+			break;
+	}
+
+	BUG_ON(!seen_current);
+
+	if (cookie > 0) {
+		iop_chan->completed_cookie = cookie;
+		pr_debug("\tcompleted cookie %d\n", cookie);
+	}
+}
+
+static void
+iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	spin_lock_bh(&iop_chan->lock);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_adma_tasklet(unsigned long data)
+{
+	struct iop_adma_chan *iop_chan = (struct iop_adma_chan *) data;
+
+	spin_lock(&iop_chan->lock);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+			int slots_per_op)
+{
+	struct iop_adma_desc_slot *iter, *_iter, *alloc_start = NULL;
+	LIST_HEAD(chain);
+	int slots_found, retry = 0;
+
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = iop_chan->last_used;
+	else
+		iter = list_entry(&iop_chan->all_slots,
+			struct iop_adma_desc_slot,
+			slot_node);
+
+	list_for_each_entry_safe_continue(
+		iter, _iter, &iop_chan->all_slots, slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			/* give up after finding the first busy slot
+			 * on the second pass through the list
+			 */
+			if (retry)
+				break;
+
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++) {
+			if (iop_desc_is_aligned(iter, slots_per_op))
+				alloc_start = iter;
+			else {
+				slots_found = 0;
+				continue;
+			}
+		}
+
+		if (slots_found == num_slots) {
+			struct iop_adma_desc_slot *alloc_tail = NULL;
+			struct iop_adma_desc_slot *last_used = NULL;
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+				dev_dbg(iop_chan->device->common.dev,
+					"allocated slot: %d "
+					"(desc %p phys: %#x) slots_per_op %d\n",
+					iter->idx, iter->hw_desc,
+					iter->async_tx.phys, slots_per_op);
+
+				/* pre-ack all but the last descriptor */
+				if (num_slots != slots_per_op)
+					async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct iop_adma_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->async_tx.tx_list);
+			iop_chan->last_used = last_used;
+			iop_desc_clear_next_desc(alloc_start);
+			iop_desc_clear_next_desc(alloc_tail);
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* perform direct reclaim if the allocation fails */
+	__iop_adma_slot_cleanup(iop_chan);
+
+	return NULL;
+}
+
+static dma_cookie_t
+iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
+	struct iop_adma_desc_slot *desc)
+{
+	dma_cookie_t cookie = iop_chan->common.cookie;
+	cookie++;
+	if (cookie < 0)
+		cookie = 1;
+	iop_chan->common.cookie = desc->async_tx.cookie = cookie;
+	return cookie;
+}
+
+static void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+	dev_dbg(iop_chan->device->common.dev, "pending: %d\n",
+		iop_chan->pending);
+
+	if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+static dma_cookie_t
+iop_adma_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct iop_adma_desc_slot *sw_desc = tx_to_iop_adma_slot(tx);
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(tx->chan);
+	struct iop_adma_desc_slot *grp_start, *old_chain_tail;
+	int slot_cnt;
+	int slots_per_op;
+	dma_cookie_t cookie;
+	dma_addr_t next_dma;
+
+	grp_start = sw_desc->group_head;
+	slot_cnt = grp_start->slot_cnt;
+	slots_per_op = grp_start->slots_per_op;
+
+	spin_lock_bh(&iop_chan->lock);
+	cookie = iop_desc_assign_cookie(iop_chan, sw_desc);
+
+	old_chain_tail = list_entry(iop_chan->chain.prev,
+		struct iop_adma_desc_slot, chain_node);
+	list_splice_init(&sw_desc->async_tx.tx_list,
+			 &old_chain_tail->chain_node);
+
+	/* fix up the hardware chain */
+	next_dma = grp_start->async_tx.phys;
+	iop_desc_set_next_desc(old_chain_tail, next_dma);
+	BUG_ON(iop_desc_get_next_desc(old_chain_tail) != next_dma); /* flush */
+
+	/* check for pre-chained descriptors */
+	iop_paranoia(iop_desc_get_next_desc(sw_desc));
+
+	/* increment the pending count by the number of slots
+	 * memcpy operations have a 1:1 (slot:operation) relation
+	 * other operations are heavier and will pop the threshold
+	 * more often.
+	 */
+	iop_chan->pending += slot_cnt;
+	iop_adma_check_threshold(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+
+	dev_dbg(iop_chan->device->common.dev, "%s cookie: %d slot: %d\n",
+		__func__, sw_desc->async_tx.cookie, sw_desc->idx);
+
+	return cookie;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/**
+ * iop_adma_alloc_chan_resources -  returns the number of allocated descriptors
+ * @chan - allocate descriptor resources for this channel
+ * @client - current client requesting the channel be ready for requests
+ *
+ * Note: We keep the slots for 1 operation on iop_chan->chain at all times.  To
+ * avoid deadlock, via async_xor, num_descs_in_pool must at a minimum be
+ * greater than 2x the number slots needed to satisfy a device->max_xor
+ * request.
+ * */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan,
+					 struct dma_client *client)
+{
+	char *hw_desc;
+	int idx;
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *slot = NULL;
+	int init = iop_chan->slots_allocated ? 0 : 1;
+	struct iop_adma_platform_data *plat_data =
+		iop_chan->device->pdev->dev.platform_data;
+	int num_descs_in_pool = plat_data->pool_size/IOP_ADMA_SLOT_SIZE;
+
+	/* Allocate descriptor slots */
+	do {
+		idx = iop_chan->slots_allocated;
+		if (idx == num_descs_in_pool)
+			break;
+
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "IOP ADMA Channel only initialized"
+				" %d descriptor slots", idx);
+			break;
+		}
+		hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = iop_adma_tx_submit;
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->async_tx.tx_list);
+		hw_desc = (char *) iop_chan->device->dma_desc_pool;
+		slot->async_tx.phys =
+			(dma_addr_t) &hw_desc[idx * IOP_ADMA_SLOT_SIZE];
+		slot->idx = idx;
+
+		spin_lock_bh(&iop_chan->lock);
+		iop_chan->slots_allocated++;
+		list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+		spin_unlock_bh(&iop_chan->lock);
+	} while (iop_chan->slots_allocated < num_descs_in_pool);
+
+	if (idx && !iop_chan->last_used)
+		iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+					struct iop_adma_desc_slot,
+					slot_node);
+
+	dev_dbg(iop_chan->device->common.dev,
+		"allocated %d descriptor slots last_used: %p\n",
+		iop_chan->slots_allocated, iop_chan->last_used);
+
+	/* initialize the channel and the chain with a null operation */
+	if (init) {
+		if (dma_has_cap(DMA_MEMCPY,
+			iop_chan->device->common.cap_mask))
+			iop_chan_start_null_memcpy(iop_chan);
+		else if (dma_has_cap(DMA_XOR,
+			iop_chan->device->common.cap_mask))
+			iop_chan_start_null_xor(iop_chan);
+		else
+			BUG();
+	}
+
+	return (idx > 0) ? idx : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_interrupt(struct dma_chan *chan, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_interrupt_slot_count(&slots_per_op, iop_chan);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_interrupt(grp_start, iop_chan);
+		grp_start->unmap_len = 0;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dma_dest,
+			 dma_addr_t dma_src, size_t len, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+
+	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+		__func__, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_memcpy(grp_start, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		iop_desc_set_memcpy_src_addr(grp_start, dma_src);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_memset(struct dma_chan *chan, dma_addr_t dma_dest,
+			 int value, size_t len, unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(unlikely(len > IOP_ADMA_MAX_BYTE_COUNT));
+
+	dev_dbg(iop_chan->device->common.dev, "%s len: %u\n",
+		__func__, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_memset(grp_start, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_block_fill_val(grp_start, value);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_xor(struct dma_chan *chan, dma_addr_t dma_dest,
+		      dma_addr_t *dma_src, unsigned int src_cnt, size_t len,
+		      unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+	BUG_ON(unlikely(len > IOP_ADMA_XOR_MAX_BYTE_COUNT));
+
+	dev_dbg(iop_chan->device->common.dev,
+		"%s src_cnt: %d len: %u flags: %lx\n",
+		__func__, src_cnt, len, flags);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_xor(grp_start, src_cnt, flags);
+		iop_desc_set_byte_count(grp_start, iop_chan, len);
+		iop_desc_set_dest_addr(grp_start, iop_chan, dma_dest);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_xor_src_addr(grp_start, src_cnt,
+						  dma_src[src_cnt]);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+iop_adma_prep_dma_zero_sum(struct dma_chan *chan, dma_addr_t *dma_src,
+			   unsigned int src_cnt, size_t len, u32 *result,
+			   unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	int slot_cnt, slots_per_op;
+
+	if (unlikely(!len))
+		return NULL;
+
+	dev_dbg(iop_chan->device->common.dev, "%s src_cnt: %d len: %u\n",
+		__func__, src_cnt, len);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		iop_desc_init_zero_sum(grp_start, src_cnt, flags);
+		iop_desc_set_zero_sum_byte_count(grp_start, len);
+		grp_start->xor_check_result = result;
+		pr_debug("\t%s: grp_start->xor_check_result: %p\n",
+			__func__, grp_start->xor_check_result);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		sw_desc->async_tx.flags = flags;
+		while (src_cnt--)
+			iop_desc_set_zero_sum_src_addr(grp_start, src_cnt,
+						       dma_src[src_cnt]);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	spin_lock_bh(&iop_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe_reverse(
+		iter, _iter, &iop_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		iop_chan->slots_allocated--;
+	}
+	iop_chan->last_used = NULL;
+
+	dev_dbg(iop_chan->device->common.dev, "%s slots_allocated %d\n",
+		__func__, iop_chan->slots_allocated);
+	spin_unlock_bh(&iop_chan->lock);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+}
+
+/**
+ * iop_adma_is_complete - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ */
+static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
+					dma_cookie_t cookie,
+					dma_cookie_t *done,
+					dma_cookie_t *used)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+	last_complete = iop_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	last_used = chan->cookie;
+	last_complete = iop_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static irqreturn_t iop_adma_eot_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	iop_adma_device_clear_eot_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+
+	dev_dbg(chan->device->common.dev, "%s\n", __func__);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	iop_adma_device_clear_eoc_status(chan);
+
+	return IRQ_HANDLED;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data)
+{
+	struct iop_adma_chan *chan = data;
+	unsigned long status = iop_chan_get_status(chan);
+
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		"error ( %s%s%s%s%s%s%s)\n",
+		iop_is_err_int_parity(status, chan) ? "int_parity " : "",
+		iop_is_err_mcu_abort(status, chan) ? "mcu_abort " : "",
+		iop_is_err_int_tabort(status, chan) ? "int_tabort " : "",
+		iop_is_err_int_mabort(status, chan) ? "int_mabort " : "",
+		iop_is_err_pci_tabort(status, chan) ? "pci_tabort " : "",
+		iop_is_err_pci_mabort(status, chan) ? "pci_mabort " : "",
+		iop_is_err_split_tx(status, chan) ? "split_tx " : "");
+
+	iop_adma_device_clear_err_status(chan);
+
+	BUG();
+
+	return IRQ_HANDLED;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+
+	if (iop_chan->pending) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+	int i;
+	void *src, *dest;
+	dma_addr_t src_dma, dest_dma;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	struct iop_adma_chan *iop_chan;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	src = kmalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+	dest = kzalloc(IOP_ADMA_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+		((u8 *) src)[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dest_dma = dma_map_single(dma_chan->device->dev, dest,
+				IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	src_dma = dma_map_single(dma_chan->device->dev, src,
+				IOP_ADMA_TEST_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				      IOP_ADMA_TEST_SIZE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(1);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+			DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	iop_chan = to_iop_adma_chan(dma_chan);
+	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+		IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src, dest, IOP_ADMA_TEST_SIZE)) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TEST];
+	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dma_srcs[IOP_ADMA_NUM_SRC_TEST + 1];
+	dma_addr_t dma_addr, dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 zero_sum_result;
+	int err = 0;
+	struct iop_adma_chan *iop_chan;
+
+	dev_dbg(device->common.dev, "%s\n", __func__);
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx])
+			while (src_idx--) {
+				__free_page(xor_srcs[src_idx]);
+				return -ENOMEM;
+			}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest)
+		while (src_idx--) {
+			__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+			(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan, NULL) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					   0, PAGE_SIZE, DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				   IOP_ADMA_NUM_SRC_TEST, PAGE_SIZE,
+				   DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) !=
+		DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	iop_chan = to_iop_adma_chan(dma_chan);
+	dma_sync_single_for_cpu(&iop_chan->device->pdev->dev, dest_dma,
+		PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_printk(KERN_ERR, dma_chan->device->dev,
+				"Self-test xor failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+	dma_sync_single_for_device(&iop_chan->device->pdev->dev, dest_dma,
+		PAGE_SIZE, DMA_TO_DEVICE);
+
+	/* skip zero sum if the capability is not present */
+	if (!dma_has_cap(DMA_ZERO_SUM, dma_chan->device->cap_mask))
+		goto free_resources;
+
+	/* zero sum the sources with the destintation page */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST; i++)
+		zero_sum_srcs[i] = xor_srcs[i];
+	zero_sum_srcs[i] = dest;
+
+	zero_sum_result = 1;
+
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
+					IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+					&zero_sum_result,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 0) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test memset */
+	dma_addr = dma_map_page(dma_chan->device->dev, dest, 0,
+			PAGE_SIZE, DMA_FROM_DEVICE);
+	tx = iop_adma_prep_dma_memset(dma_chan, dma_addr, 0, PAGE_SIZE,
+				      DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test memset timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i]) {
+			dev_printk(KERN_ERR, dma_chan->device->dev,
+				"Self-test memset failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+	/* test for non-zero parity sum */
+	zero_sum_result = 0;
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TEST + 1; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev,
+					   zero_sum_srcs[i], 0, PAGE_SIZE,
+					   DMA_TO_DEVICE);
+	tx = iop_adma_prep_dma_zero_sum(dma_chan, dma_srcs,
+					IOP_ADMA_NUM_SRC_TEST + 1, PAGE_SIZE,
+					&zero_sum_result,
+					DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
+
+	cookie = iop_adma_tx_submit(tx);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test non-zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 1) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			"Self-test non-zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	src_idx = IOP_ADMA_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int __devexit iop_adma_remove(struct platform_device *dev)
+{
+	struct iop_adma_device *device = platform_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+	struct iop_adma_chan *iop_chan;
+	int i;
+	struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+
+	dma_async_device_unregister(&device->common);
+
+	for (i = 0; i < 3; i++) {
+		unsigned int irq;
+		irq = platform_get_irq(dev, i);
+		free_irq(irq, device);
+	}
+
+	dma_free_coherent(&dev->dev, plat_data->pool_size,
+			device->dma_desc_pool_virt, device->dma_desc_pool);
+
+	do {
+		struct resource *res;
+		res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+		release_mem_region(res->start, res->end - res->start);
+	} while (0);
+
+	list_for_each_entry_safe(chan, _chan, &device->common.channels,
+				device_node) {
+		iop_chan = to_iop_adma_chan(chan);
+		list_del(&chan->device_node);
+		kfree(iop_chan);
+	}
+	kfree(device);
+
+	return 0;
+}
+
+static int __devinit iop_adma_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int ret = 0, i;
+	struct iop_adma_device *adev;
+	struct iop_adma_chan *iop_chan;
+	struct dma_device *dma_dev;
+	struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	if (!devm_request_mem_region(&pdev->dev, res->start,
+				res->end - res->start, pdev->name))
+		return -EBUSY;
+
+	adev = kzalloc(sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+	dma_dev = &adev->common;
+
+	/* allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+					plat_data->pool_size,
+					&adev->dma_desc_pool,
+					GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto err_free_adev;
+	}
+
+	dev_dbg(&pdev->dev, "%s: allocted descriptor pool virt %p phys %p\n",
+		__func__, adev->dma_desc_pool_virt,
+		(void *) adev->dma_desc_pool);
+
+	adev->id = plat_data->hw_id;
+
+	/* discover transaction capabilites from the platform data */
+	dma_dev->cap_mask = plat_data->cap_mask;
+
+	adev->pdev = pdev;
+	platform_set_drvdata(pdev, adev);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = iop_adma_free_chan_resources;
+	dma_dev->device_is_tx_complete = iop_adma_is_complete;
+	dma_dev->device_issue_pending = iop_adma_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = iop_adma_prep_dma_memcpy;
+	if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memset = iop_adma_prep_dma_memset;
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->max_xor = iop_adma_get_max_xor();
+		dma_dev->device_prep_dma_xor = iop_adma_prep_dma_xor;
+	}
+	if (dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_zero_sum =
+			iop_adma_prep_dma_zero_sum;
+	if (dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_interrupt =
+			iop_adma_prep_dma_interrupt;
+
+	iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL);
+	if (!iop_chan) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	iop_chan->device = adev;
+
+	iop_chan->mmr_base = devm_ioremap(&pdev->dev, res->start,
+					res->end - res->start);
+	if (!iop_chan->mmr_base) {
+		ret = -ENOMEM;
+		goto err_free_iop_chan;
+	}
+	tasklet_init(&iop_chan->irq_tasklet, iop_adma_tasklet, (unsigned long)
+		iop_chan);
+
+	/* clear errors before enabling interrupts */
+	iop_adma_device_clear_err_status(iop_chan);
+
+	for (i = 0; i < 3; i++) {
+		irq_handler_t handler[] = { iop_adma_eot_handler,
+					iop_adma_eoc_handler,
+					iop_adma_err_handler };
+		int irq = platform_get_irq(pdev, i);
+		if (irq < 0) {
+			ret = -ENXIO;
+			goto err_free_iop_chan;
+		} else {
+			ret = devm_request_irq(&pdev->dev, irq,
+					handler[i], 0, pdev->name, iop_chan);
+			if (ret)
+				goto err_free_iop_chan;
+		}
+	}
+
+	spin_lock_init(&iop_chan->lock);
+	INIT_LIST_HEAD(&iop_chan->chain);
+	INIT_LIST_HEAD(&iop_chan->all_slots);
+	INIT_RCU_HEAD(&iop_chan->common.rcu);
+	iop_chan->common.device = dma_dev;
+	list_add_tail(&iop_chan->common.device_node, &dma_dev->channels);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		ret = iop_adma_memcpy_self_test(adev);
+		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask) ||
+		dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)) {
+		ret = iop_adma_xor_zero_sum_self_test(adev);
+		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+		if (ret)
+			goto err_free_iop_chan;
+	}
+
+	dev_printk(KERN_INFO, &pdev->dev, "Intel(R) IOP: "
+	  "( %s%s%s%s%s%s%s%s%s%s)\n",
+	  dma_has_cap(DMA_PQ_XOR, dma_dev->cap_mask) ? "pq_xor " : "",
+	  dma_has_cap(DMA_PQ_UPDATE, dma_dev->cap_mask) ? "pq_update " : "",
+	  dma_has_cap(DMA_PQ_ZERO_SUM, dma_dev->cap_mask) ? "pq_zero_sum " : "",
+	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+	  dma_has_cap(DMA_DUAL_XOR, dma_dev->cap_mask) ? "dual_xor " : "",
+	  dma_has_cap(DMA_ZERO_SUM, dma_dev->cap_mask) ? "xor_zero_sum " : "",
+	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
+	  dma_has_cap(DMA_MEMCPY_CRC32C, dma_dev->cap_mask) ? "cpy+crc " : "",
+	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+	dma_async_device_register(dma_dev);
+	goto out;
+
+ err_free_iop_chan:
+	kfree(iop_chan);
+ err_free_dma:
+	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+			adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ err_free_adev:
+	kfree(adev);
+ out:
+	return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+
+		list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		iop_desc_init_memcpy(grp_start, 0);
+		iop_desc_set_byte_count(grp_start, iop_chan, 0);
+		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+		iop_desc_set_memcpy_src_addr(grp_start, 0);
+
+		cookie = iop_chan->common.cookie;
+		cookie++;
+		if (cookie <= 1)
+			cookie = 2;
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->completed_cookie = cookie - 1;
+		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_adma_device_clear_err_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+		/* 1/ don't add pre-chained descriptors
+		 * 2/ dummy read to flush next_desc write
+		 */
+		BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		dev_printk(KERN_ERR, iop_chan->device->common.dev,
+			 "failed to allocate null descriptor\n");
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc, *grp_start;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	dev_dbg(iop_chan->device->common.dev, "%s\n", __func__);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		grp_start = sw_desc->group_head;
+		list_splice_init(&sw_desc->async_tx.tx_list, &iop_chan->chain);
+		async_tx_ack(&sw_desc->async_tx);
+		iop_desc_init_null_xor(grp_start, 2, 0);
+		iop_desc_set_byte_count(grp_start, iop_chan, 0);
+		iop_desc_set_dest_addr(grp_start, iop_chan, 0);
+		iop_desc_set_xor_src_addr(grp_start, 0, 0);
+		iop_desc_set_xor_src_addr(grp_start, 1, 0);
+
+		cookie = iop_chan->common.cookie;
+		cookie++;
+		if (cookie <= 1)
+			cookie = 2;
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->completed_cookie = cookie - 1;
+		iop_chan->common.cookie = sw_desc->async_tx.cookie = cookie;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_adma_device_clear_err_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->async_tx.phys);
+
+		/* 1/ don't add pre-chained descriptors
+		 * 2/ dummy read to flush next_desc write
+		 */
+		BUG_ON(iop_desc_get_next_desc(sw_desc));
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		dev_printk(KERN_ERR, iop_chan->device->common.dev,
+			"failed to allocate null descriptor\n");
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+MODULE_ALIAS("platform:iop-adma");
+
+static struct platform_driver iop_adma_driver = {
+	.probe		= iop_adma_probe,
+	.remove		= iop_adma_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "iop-adma",
+	},
+};
+
+static int __init iop_adma_init (void)
+{
+	return platform_driver_register(&iop_adma_driver);
+}
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit iop_adma_exit (void)
+{
+	platform_driver_unregister(&iop_adma_driver);
+	return;
+}
+module_exit(iop_adma_exit);
+#endif
+
+module_init(iop_adma_init);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
new file mode 100644
index 0000000..9f6fe46
--- /dev/null
+++ b/drivers/dma/iovlock.c
@@ -0,0 +1,269 @@
+/*
+ * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
+ * Portions based on net/core/datagram.c and copyrighted by their authors.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This code allows the net stack to make use of a DMA engine for
+ * skb to iovec copies.
+ */
+
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
+#include <net/tcp.h> /* for memcpy_toiovec */
+#include <asm/io.h>
+#include <asm/uaccess.h>
+
+static int num_pages_spanned(struct iovec *iov)
+{
+	return
+	((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
+	((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/*
+ * Pin down all the iovec pages needed for len bytes.
+ * Return a struct dma_pinned_list to keep track of pages pinned down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
+{
+	struct dma_pinned_list *local_list;
+	struct page **pages;
+	int i;
+	int ret;
+	int nr_iovecs = 0;
+	int iovec_len_used = 0;
+	int iovec_pages_used = 0;
+
+	/* don't pin down non-user-based iovecs */
+	if (segment_eq(get_fs(), KERNEL_DS))
+		return NULL;
+
+	/* determine how many iovecs/pages there are, up front */
+	do {
+		iovec_len_used += iov[nr_iovecs].iov_len;
+		iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
+		nr_iovecs++;
+	} while (iovec_len_used < len);
+
+	/* single kmalloc for pinned list, page_list[], and the page arrays */
+	local_list = kmalloc(sizeof(*local_list)
+		+ (nr_iovecs * sizeof (struct dma_page_list))
+		+ (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+	if (!local_list)
+		goto out;
+
+	/* list of pages starts right after the page list array */
+	pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+	local_list->nr_iovecs = 0;
+
+	for (i = 0; i < nr_iovecs; i++) {
+		struct dma_page_list *page_list = &local_list->page_list[i];
+
+		len -= iov[i].iov_len;
+
+		if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
+			goto unpin;
+
+		page_list->nr_pages = num_pages_spanned(&iov[i]);
+		page_list->base_address = iov[i].iov_base;
+
+		page_list->pages = pages;
+		pages += page_list->nr_pages;
+
+		/* pin pages down */
+		down_read(&current->mm->mmap_sem);
+		ret = get_user_pages(
+			current,
+			current->mm,
+			(unsigned long) iov[i].iov_base,
+			page_list->nr_pages,
+			1,	/* write */
+			0,	/* force */
+			page_list->pages,
+			NULL);
+		up_read(&current->mm->mmap_sem);
+
+		if (ret != page_list->nr_pages)
+			goto unpin;
+
+		local_list->nr_iovecs = i + 1;
+	}
+
+	return local_list;
+
+unpin:
+	dma_unpin_iovec_pages(local_list);
+out:
+	return NULL;
+}
+
+void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
+{
+	int i, j;
+
+	if (!pinned_list)
+		return;
+
+	for (i = 0; i < pinned_list->nr_iovecs; i++) {
+		struct dma_page_list *page_list = &pinned_list->page_list[i];
+		for (j = 0; j < page_list->nr_pages; j++) {
+			set_page_dirty_lock(page_list->pages[j]);
+			page_cache_release(page_list->pages[j]);
+		}
+	}
+
+	kfree(pinned_list);
+}
+
+
+/*
+ * We have already pinned down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in pinned_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied into
+ *   by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
+{
+	int iov_byte_offset;
+	int copy;
+	dma_cookie_t dma_cookie = 0;
+	int iovec_idx;
+	int page_idx;
+
+	if (!chan)
+		return memcpy_toiovec(iov, kdata, len);
+
+	iovec_idx = 0;
+	while (iovec_idx < pinned_list->nr_iovecs) {
+		struct dma_page_list *page_list;
+
+		/* skip already used-up iovecs */
+		while (!iov[iovec_idx].iov_len)
+			iovec_idx++;
+
+		page_list = &pinned_list->page_list[iovec_idx];
+
+		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+		/* break up copies to not cross page boundary */
+		while (iov[iovec_idx].iov_len) {
+			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+			copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+			dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+					page_list->pages[page_idx],
+					iov_byte_offset,
+					kdata,
+					copy);
+
+			len -= copy;
+			iov[iovec_idx].iov_len -= copy;
+			iov[iovec_idx].iov_base += copy;
+
+			if (!len)
+				return dma_cookie;
+
+			kdata += copy;
+			iov_byte_offset = 0;
+			page_idx++;
+		}
+		iovec_idx++;
+	}
+
+	/* really bad if we ever run out of iovecs */
+	BUG();
+	return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
+	struct dma_pinned_list *pinned_list, struct page *page,
+	unsigned int offset, size_t len)
+{
+	int iov_byte_offset;
+	int copy;
+	dma_cookie_t dma_cookie = 0;
+	int iovec_idx;
+	int page_idx;
+	int err;
+
+	/* this needs as-yet-unimplemented buf-to-buff, so punt. */
+	/* TODO: use dma for this */
+	if (!chan || !pinned_list) {
+		u8 *vaddr = kmap(page);
+		err = memcpy_toiovec(iov, vaddr + offset, len);
+		kunmap(page);
+		return err;
+	}
+
+	iovec_idx = 0;
+	while (iovec_idx < pinned_list->nr_iovecs) {
+		struct dma_page_list *page_list;
+
+		/* skip already used-up iovecs */
+		while (!iov[iovec_idx].iov_len)
+			iovec_idx++;
+
+		page_list = &pinned_list->page_list[iovec_idx];
+
+		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
+		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
+			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
+
+		/* break up copies to not cross page boundary */
+		while (iov[iovec_idx].iov_len) {
+			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+			copy = min_t(int, copy, iov[iovec_idx].iov_len);
+
+			dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+					page_list->pages[page_idx],
+					iov_byte_offset,
+					page,
+					offset,
+					copy);
+
+			len -= copy;
+			iov[iovec_idx].iov_len -= copy;
+			iov[iovec_idx].iov_base += copy;
+
+			if (!len)
+				return dma_cookie;
+
+			offset += copy;
+			iov_byte_offset = 0;
+			page_idx++;
+		}
+		iovec_idx++;
+	}
+
+	/* really bad if we ever run out of iovecs */
+	BUG();
+	return -EFAULT;
+}
diff --git a/drivers/dma/mv_xor.c b/drivers/dma/mv_xor.c
new file mode 100644
index 0000000..bcda174
--- /dev/null
+++ b/drivers/dma/mv_xor.c
@@ -0,0 +1,1384 @@
+/*
+ * offload engine driver for the Marvell XOR engine
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/async_tx.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <linux/memory.h>
+#include <plat/mv_xor.h>
+#include "mv_xor.h"
+
+static void mv_xor_issue_pending(struct dma_chan *chan);
+
+#define to_mv_xor_chan(chan)		\
+	container_of(chan, struct mv_xor_chan, common)
+
+#define to_mv_xor_device(dev)		\
+	container_of(dev, struct mv_xor_device, common)
+
+#define to_mv_xor_slot(tx)		\
+	container_of(tx, struct mv_xor_desc_slot, async_tx)
+
+static void mv_desc_init(struct mv_xor_desc_slot *desc, unsigned long flags)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+
+	hw_desc->status = (1 << 31);
+	hw_desc->phy_next_desc = 0;
+	hw_desc->desc_command = (1 << 31);
+}
+
+static u32 mv_desc_get_dest_addr(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	return hw_desc->phy_dest_addr;
+}
+
+static u32 mv_desc_get_src_addr(struct mv_xor_desc_slot *desc,
+				int src_idx)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	return hw_desc->phy_src_addr[src_idx];
+}
+
+
+static void mv_desc_set_byte_count(struct mv_xor_desc_slot *desc,
+				   u32 byte_count)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->byte_count = byte_count;
+}
+
+static void mv_desc_set_next_desc(struct mv_xor_desc_slot *desc,
+				  u32 next_desc_addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	BUG_ON(hw_desc->phy_next_desc);
+	hw_desc->phy_next_desc = next_desc_addr;
+}
+
+static void mv_desc_clear_next_desc(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_next_desc = 0;
+}
+
+static void mv_desc_set_block_fill_val(struct mv_xor_desc_slot *desc, u32 val)
+{
+	desc->value = val;
+}
+
+static void mv_desc_set_dest_addr(struct mv_xor_desc_slot *desc,
+				  dma_addr_t addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_dest_addr = addr;
+}
+
+static int mv_chan_memset_slot_count(size_t len)
+{
+	return 1;
+}
+
+#define mv_chan_memcpy_slot_count(c) mv_chan_memset_slot_count(c)
+
+static void mv_desc_set_src_addr(struct mv_xor_desc_slot *desc,
+				 int index, dma_addr_t addr)
+{
+	struct mv_xor_desc *hw_desc = desc->hw_desc;
+	hw_desc->phy_src_addr[index] = addr;
+	if (desc->type == DMA_XOR)
+		hw_desc->desc_command |= (1 << index);
+}
+
+static u32 mv_chan_get_current_desc(struct mv_xor_chan *chan)
+{
+	return __raw_readl(XOR_CURR_DESC(chan));
+}
+
+static void mv_chan_set_next_descriptor(struct mv_xor_chan *chan,
+					u32 next_desc_addr)
+{
+	__raw_writel(next_desc_addr, XOR_NEXT_DESC(chan));
+}
+
+static void mv_chan_set_dest_pointer(struct mv_xor_chan *chan, u32 desc_addr)
+{
+	__raw_writel(desc_addr, XOR_DEST_POINTER(chan));
+}
+
+static void mv_chan_set_block_size(struct mv_xor_chan *chan, u32 block_size)
+{
+	__raw_writel(block_size, XOR_BLOCK_SIZE(chan));
+}
+
+static void mv_chan_set_value(struct mv_xor_chan *chan, u32 value)
+{
+	__raw_writel(value, XOR_INIT_VALUE_LOW(chan));
+	__raw_writel(value, XOR_INIT_VALUE_HIGH(chan));
+}
+
+static void mv_chan_unmask_interrupts(struct mv_xor_chan *chan)
+{
+	u32 val = __raw_readl(XOR_INTR_MASK(chan));
+	val |= XOR_INTR_MASK_VALUE << (chan->idx * 16);
+	__raw_writel(val, XOR_INTR_MASK(chan));
+}
+
+static u32 mv_chan_get_intr_cause(struct mv_xor_chan *chan)
+{
+	u32 intr_cause = __raw_readl(XOR_INTR_CAUSE(chan));
+	intr_cause = (intr_cause >> (chan->idx * 16)) & 0xFFFF;
+	return intr_cause;
+}
+
+static int mv_is_err_intr(u32 intr_cause)
+{
+	if (intr_cause & ((1<<4)|(1<<5)|(1<<6)|(1<<7)|(1<<8)|(1<<9)))
+		return 1;
+
+	return 0;
+}
+
+static void mv_xor_device_clear_eoc_cause(struct mv_xor_chan *chan)
+{
+	u32 val = (1 << (1 + (chan->idx * 16)));
+	dev_dbg(chan->device->common.dev, "%s, val 0x%08x\n", __func__, val);
+	__raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static void mv_xor_device_clear_err_status(struct mv_xor_chan *chan)
+{
+	u32 val = 0xFFFF0000 >> (chan->idx * 16);
+	__raw_writel(val, XOR_INTR_CAUSE(chan));
+}
+
+static int mv_can_chain(struct mv_xor_desc_slot *desc)
+{
+	struct mv_xor_desc_slot *chain_old_tail = list_entry(
+		desc->chain_node.prev, struct mv_xor_desc_slot, chain_node);
+
+	if (chain_old_tail->type != desc->type)
+		return 0;
+	if (desc->type == DMA_MEMSET)
+		return 0;
+
+	return 1;
+}
+
+static void mv_set_mode(struct mv_xor_chan *chan,
+			       enum dma_transaction_type type)
+{
+	u32 op_mode;
+	u32 config = __raw_readl(XOR_CONFIG(chan));
+
+	switch (type) {
+	case DMA_XOR:
+		op_mode = XOR_OPERATION_MODE_XOR;
+		break;
+	case DMA_MEMCPY:
+		op_mode = XOR_OPERATION_MODE_MEMCPY;
+		break;
+	case DMA_MEMSET:
+		op_mode = XOR_OPERATION_MODE_MEMSET;
+		break;
+	default:
+		dev_printk(KERN_ERR, chan->device->common.dev,
+			   "error: unsupported operation %d.\n",
+			   type);
+		BUG();
+		return;
+	}
+
+	config &= ~0x7;
+	config |= op_mode;
+	__raw_writel(config, XOR_CONFIG(chan));
+	chan->current_type = type;
+}
+
+static void mv_chan_activate(struct mv_xor_chan *chan)
+{
+	u32 activation;
+
+	dev_dbg(chan->device->common.dev, " activate chan.\n");
+	activation = __raw_readl(XOR_ACTIVATION(chan));
+	activation |= 0x1;
+	__raw_writel(activation, XOR_ACTIVATION(chan));
+}
+
+static char mv_chan_is_busy(struct mv_xor_chan *chan)
+{
+	u32 state = __raw_readl(XOR_ACTIVATION(chan));
+
+	state = (state >> 4) & 0x3;
+
+	return (state == 1) ? 1 : 0;
+}
+
+static int mv_chan_xor_slot_count(size_t len, int src_cnt)
+{
+	return 1;
+}
+
+/**
+ * mv_xor_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_free_slots(struct mv_xor_chan *mv_chan,
+			      struct mv_xor_desc_slot *slot)
+{
+	dev_dbg(mv_chan->device->common.dev, "%s %d slot %p\n",
+		__func__, __LINE__, slot);
+
+	slot->slots_per_op = 0;
+
+}
+
+/*
+ * mv_xor_start_new_chain - program the engine to operate on new chain headed by
+ * sw_desc
+ * Caller must hold &mv_chan->lock while calling this function
+ */
+static void mv_xor_start_new_chain(struct mv_xor_chan *mv_chan,
+				   struct mv_xor_desc_slot *sw_desc)
+{
+	dev_dbg(mv_chan->device->common.dev, "%s %d: sw_desc %p\n",
+		__func__, __LINE__, sw_desc);
+	if (sw_desc->type != mv_chan->current_type)
+		mv_set_mode(mv_chan, sw_desc->type);
+
+	if (sw_desc->type == DMA_MEMSET) {
+		/* for memset requests we need to program the engine, no
+		 * descriptors used.
+		 */
+		struct mv_xor_desc *hw_desc = sw_desc->hw_desc;
+		mv_chan_set_dest_pointer(mv_chan, hw_desc->phy_dest_addr);
+		mv_chan_set_block_size(mv_chan, sw_desc->unmap_len);
+		mv_chan_set_value(mv_chan, sw_desc->value);
+	} else {
+		/* set the hardware chain */
+		mv_chan_set_next_descriptor(mv_chan, sw_desc->async_tx.phys);
+	}
+	mv_chan->pending += sw_desc->slot_cnt;
+	mv_xor_issue_pending(&mv_chan->common);
+}
+
+static dma_cookie_t
+mv_xor_run_tx_complete_actions(struct mv_xor_desc_slot *desc,
+	struct mv_xor_chan *mv_chan, dma_cookie_t cookie)
+{
+	BUG_ON(desc->async_tx.cookie < 0);
+
+	if (desc->async_tx.cookie > 0) {
+		cookie = desc->async_tx.cookie;
+
+		/* call the callback (must not sleep or submit new
+		 * operations to this channel)
+		 */
+		if (desc->async_tx.callback)
+			desc->async_tx.callback(
+				desc->async_tx.callback_param);
+
+		/* unmap dma addresses
+		 * (unmap_single vs unmap_page?)
+		 */
+		if (desc->group_head && desc->unmap_len) {
+			struct mv_xor_desc_slot *unmap = desc->group_head;
+			struct device *dev =
+				&mv_chan->device->pdev->dev;
+			u32 len = unmap->unmap_len;
+			enum dma_ctrl_flags flags = desc->async_tx.flags;
+			u32 src_cnt;
+			dma_addr_t addr;
+			dma_addr_t dest;
+
+			src_cnt = unmap->unmap_src_cnt;
+			dest = mv_desc_get_dest_addr(unmap);
+			if (!(flags & DMA_COMPL_SKIP_DEST_UNMAP)) {
+				enum dma_data_direction dir;
+
+				if (src_cnt > 1) /* is xor ? */
+					dir = DMA_BIDIRECTIONAL;
+				else
+					dir = DMA_FROM_DEVICE;
+				dma_unmap_page(dev, dest, len, dir);
+			}
+
+			if (!(flags & DMA_COMPL_SKIP_SRC_UNMAP)) {
+				while (src_cnt--) {
+					addr = mv_desc_get_src_addr(unmap,
+								    src_cnt);
+					if (addr == dest)
+						continue;
+					dma_unmap_page(dev, addr, len,
+						       DMA_TO_DEVICE);
+				}
+			}
+			desc->group_head = NULL;
+		}
+	}
+
+	/* run dependent operations */
+	async_tx_run_dependencies(&desc->async_tx);
+
+	return cookie;
+}
+
+static int
+mv_xor_clean_completed_slots(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter, *_iter;
+
+	dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+				 completed_node) {
+
+		if (async_tx_test_ack(&iter->async_tx)) {
+			list_del(&iter->completed_node);
+			mv_xor_free_slots(mv_chan, iter);
+		}
+	}
+	return 0;
+}
+
+static int
+mv_xor_clean_slot(struct mv_xor_desc_slot *desc,
+	struct mv_xor_chan *mv_chan)
+{
+	dev_dbg(mv_chan->device->common.dev, "%s %d: desc %p flags %d\n",
+		__func__, __LINE__, desc, desc->async_tx.flags);
+	list_del(&desc->chain_node);
+	/* the client is allowed to attach dependent operations
+	 * until 'ack' is set
+	 */
+	if (!async_tx_test_ack(&desc->async_tx)) {
+		/* move this slot to the completed_slots */
+		list_add_tail(&desc->completed_node, &mv_chan->completed_slots);
+		return 0;
+	}
+
+	mv_xor_free_slots(mv_chan, desc);
+	return 0;
+}
+
+static void __mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+	struct mv_xor_desc_slot *iter, *_iter;
+	dma_cookie_t cookie = 0;
+	int busy = mv_chan_is_busy(mv_chan);
+	u32 current_desc = mv_chan_get_current_desc(mv_chan);
+	int seen_current = 0;
+
+	dev_dbg(mv_chan->device->common.dev, "%s %d\n", __func__, __LINE__);
+	dev_dbg(mv_chan->device->common.dev, "current_desc %x\n", current_desc);
+	mv_xor_clean_completed_slots(mv_chan);
+
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+
+	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+					chain_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+
+		/* do not advance past the current descriptor loaded into the
+		 * hardware channel, subsequent descriptors are either in
+		 * process or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy
+		 */
+		if (iter->async_tx.phys == current_desc) {
+			seen_current = 1;
+			if (busy)
+				break;
+		}
+
+		cookie = mv_xor_run_tx_complete_actions(iter, mv_chan, cookie);
+
+		if (mv_xor_clean_slot(iter, mv_chan))
+			break;
+	}
+
+	if ((busy == 0) && !list_empty(&mv_chan->chain)) {
+		struct mv_xor_desc_slot *chain_head;
+		chain_head = list_entry(mv_chan->chain.next,
+					struct mv_xor_desc_slot,
+					chain_node);
+
+		mv_xor_start_new_chain(mv_chan, chain_head);
+	}
+
+	if (cookie > 0)
+		mv_chan->completed_cookie = cookie;
+}
+
+static void
+mv_xor_slot_cleanup(struct mv_xor_chan *mv_chan)
+{
+	spin_lock_bh(&mv_chan->lock);
+	__mv_xor_slot_cleanup(mv_chan);
+	spin_unlock_bh(&mv_chan->lock);
+}
+
+static void mv_xor_tasklet(unsigned long data)
+{
+	struct mv_xor_chan *chan = (struct mv_xor_chan *) data;
+	__mv_xor_slot_cleanup(chan);
+}
+
+static struct mv_xor_desc_slot *
+mv_xor_alloc_slots(struct mv_xor_chan *mv_chan, int num_slots,
+		    int slots_per_op)
+{
+	struct mv_xor_desc_slot *iter, *_iter, *alloc_start = NULL;
+	LIST_HEAD(chain);
+	int slots_found, retry = 0;
+
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+retry:
+	slots_found = 0;
+	if (retry == 0)
+		iter = mv_chan->last_used;
+	else
+		iter = list_entry(&mv_chan->all_slots,
+			struct mv_xor_desc_slot,
+			slot_node);
+
+	list_for_each_entry_safe_continue(
+		iter, _iter, &mv_chan->all_slots, slot_node) {
+		prefetch(_iter);
+		prefetch(&_iter->async_tx);
+		if (iter->slots_per_op) {
+			/* give up after finding the first busy slot
+			 * on the second pass through the list
+			 */
+			if (retry)
+				break;
+
+			slots_found = 0;
+			continue;
+		}
+
+		/* start the allocation if the slot is correctly aligned */
+		if (!slots_found++)
+			alloc_start = iter;
+
+		if (slots_found == num_slots) {
+			struct mv_xor_desc_slot *alloc_tail = NULL;
+			struct mv_xor_desc_slot *last_used = NULL;
+			iter = alloc_start;
+			while (num_slots) {
+				int i;
+
+				/* pre-ack all but the last descriptor */
+				async_tx_ack(&iter->async_tx);
+
+				list_add_tail(&iter->chain_node, &chain);
+				alloc_tail = iter;
+				iter->async_tx.cookie = 0;
+				iter->slot_cnt = num_slots;
+				iter->xor_check_result = NULL;
+				for (i = 0; i < slots_per_op; i++) {
+					iter->slots_per_op = slots_per_op - i;
+					last_used = iter;
+					iter = list_entry(iter->slot_node.next,
+						struct mv_xor_desc_slot,
+						slot_node);
+				}
+				num_slots -= slots_per_op;
+			}
+			alloc_tail->group_head = alloc_start;
+			alloc_tail->async_tx.cookie = -EBUSY;
+			list_splice(&chain, &alloc_tail->async_tx.tx_list);
+			mv_chan->last_used = last_used;
+			mv_desc_clear_next_desc(alloc_start);
+			mv_desc_clear_next_desc(alloc_tail);
+			return alloc_tail;
+		}
+	}
+	if (!retry++)
+		goto retry;
+
+	/* try to free some slots if the allocation fails */
+	tasklet_schedule(&mv_chan->irq_tasklet);
+
+	return NULL;
+}
+
+static dma_cookie_t
+mv_desc_assign_cookie(struct mv_xor_chan *mv_chan,
+		      struct mv_xor_desc_slot *desc)
+{
+	dma_cookie_t cookie = mv_chan->common.cookie;
+
+	if (++cookie < 0)
+		cookie = 1;
+	mv_chan->common.cookie = desc->async_tx.cookie = cookie;
+	return cookie;
+}
+
+/************************ DMA engine API functions ****************************/
+static dma_cookie_t
+mv_xor_tx_submit(struct dma_async_tx_descriptor *tx)
+{
+	struct mv_xor_desc_slot *sw_desc = to_mv_xor_slot(tx);
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(tx->chan);
+	struct mv_xor_desc_slot *grp_start, *old_chain_tail;
+	dma_cookie_t cookie;
+	int new_hw_chain = 1;
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p: async_tx %p\n",
+		__func__, sw_desc, &sw_desc->async_tx);
+
+	grp_start = sw_desc->group_head;
+
+	spin_lock_bh(&mv_chan->lock);
+	cookie = mv_desc_assign_cookie(mv_chan, sw_desc);
+
+	if (list_empty(&mv_chan->chain))
+		list_splice_init(&sw_desc->async_tx.tx_list, &mv_chan->chain);
+	else {
+		new_hw_chain = 0;
+
+		old_chain_tail = list_entry(mv_chan->chain.prev,
+					    struct mv_xor_desc_slot,
+					    chain_node);
+		list_splice_init(&grp_start->async_tx.tx_list,
+				 &old_chain_tail->chain_node);
+
+		if (!mv_can_chain(grp_start))
+			goto submit_done;
+
+		dev_dbg(mv_chan->device->common.dev, "Append to last desc %x\n",
+			old_chain_tail->async_tx.phys);
+
+		/* fix up the hardware chain */
+		mv_desc_set_next_desc(old_chain_tail, grp_start->async_tx.phys);
+
+		/* if the channel is not busy */
+		if (!mv_chan_is_busy(mv_chan)) {
+			u32 current_desc = mv_chan_get_current_desc(mv_chan);
+			/*
+			 * and the curren desc is the end of the chain before
+			 * the append, then we need to start the channel
+			 */
+			if (current_desc == old_chain_tail->async_tx.phys)
+				new_hw_chain = 1;
+		}
+	}
+
+	if (new_hw_chain)
+		mv_xor_start_new_chain(mv_chan, grp_start);
+
+submit_done:
+	spin_unlock_bh(&mv_chan->lock);
+
+	return cookie;
+}
+
+/* returns the number of allocated descriptors */
+static int mv_xor_alloc_chan_resources(struct dma_chan *chan,
+				       struct dma_client *client)
+{
+	char *hw_desc;
+	int idx;
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *slot = NULL;
+	struct mv_xor_platform_data *plat_data =
+		mv_chan->device->pdev->dev.platform_data;
+	int num_descs_in_pool = plat_data->pool_size/MV_XOR_SLOT_SIZE;
+
+	/* Allocate descriptor slots */
+	idx = mv_chan->slots_allocated;
+	while (idx < num_descs_in_pool) {
+		slot = kzalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "MV XOR Channel only initialized"
+				" %d descriptor slots", idx);
+			break;
+		}
+		hw_desc = (char *) mv_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+
+		dma_async_tx_descriptor_init(&slot->async_tx, chan);
+		slot->async_tx.tx_submit = mv_xor_tx_submit;
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		INIT_LIST_HEAD(&slot->async_tx.tx_list);
+		hw_desc = (char *) mv_chan->device->dma_desc_pool;
+		slot->async_tx.phys =
+			(dma_addr_t) &hw_desc[idx * MV_XOR_SLOT_SIZE];
+		slot->idx = idx++;
+
+		spin_lock_bh(&mv_chan->lock);
+		mv_chan->slots_allocated = idx;
+		list_add_tail(&slot->slot_node, &mv_chan->all_slots);
+		spin_unlock_bh(&mv_chan->lock);
+	}
+
+	if (mv_chan->slots_allocated && !mv_chan->last_used)
+		mv_chan->last_used = list_entry(mv_chan->all_slots.next,
+					struct mv_xor_desc_slot,
+					slot_node);
+
+	dev_dbg(mv_chan->device->common.dev,
+		"allocated %d descriptor slots last_used: %p\n",
+		mv_chan->slots_allocated, mv_chan->last_used);
+
+	return mv_chan->slots_allocated ? : -ENOMEM;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
+		size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc, *grp_start;
+	int slot_cnt;
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s dest: %x src %x len: %u flags: %ld\n",
+		__func__, dest, src, len, flags);
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+	spin_lock_bh(&mv_chan->lock);
+	slot_cnt = mv_chan_memcpy_slot_count(len);
+	sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+	if (sw_desc) {
+		sw_desc->type = DMA_MEMCPY;
+		sw_desc->async_tx.flags = flags;
+		grp_start = sw_desc->group_head;
+		mv_desc_init(grp_start, flags);
+		mv_desc_set_byte_count(grp_start, len);
+		mv_desc_set_dest_addr(sw_desc->group_head, dest);
+		mv_desc_set_src_addr(grp_start, 0, src);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+	}
+	spin_unlock_bh(&mv_chan->lock);
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p async_tx %p\n",
+		__func__, sw_desc, sw_desc ? &sw_desc->async_tx : 0);
+
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_memset(struct dma_chan *chan, dma_addr_t dest, int value,
+		       size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc, *grp_start;
+	int slot_cnt;
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s dest: %x len: %u flags: %ld\n",
+		__func__, dest, len, flags);
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+	spin_lock_bh(&mv_chan->lock);
+	slot_cnt = mv_chan_memset_slot_count(len);
+	sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+	if (sw_desc) {
+		sw_desc->type = DMA_MEMSET;
+		sw_desc->async_tx.flags = flags;
+		grp_start = sw_desc->group_head;
+		mv_desc_init(grp_start, flags);
+		mv_desc_set_byte_count(grp_start, len);
+		mv_desc_set_dest_addr(sw_desc->group_head, dest);
+		mv_desc_set_block_fill_val(grp_start, value);
+		sw_desc->unmap_src_cnt = 1;
+		sw_desc->unmap_len = len;
+	}
+	spin_unlock_bh(&mv_chan->lock);
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p async_tx %p \n",
+		__func__, sw_desc, &sw_desc->async_tx);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static struct dma_async_tx_descriptor *
+mv_xor_prep_dma_xor(struct dma_chan *chan, dma_addr_t dest, dma_addr_t *src,
+		    unsigned int src_cnt, size_t len, unsigned long flags)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *sw_desc, *grp_start;
+	int slot_cnt;
+
+	if (unlikely(len < MV_XOR_MIN_BYTE_COUNT))
+		return NULL;
+
+	BUG_ON(unlikely(len > MV_XOR_MAX_BYTE_COUNT));
+
+	dev_dbg(mv_chan->device->common.dev,
+		"%s src_cnt: %d len: dest %x %u flags: %ld\n",
+		__func__, src_cnt, len, dest, flags);
+
+	spin_lock_bh(&mv_chan->lock);
+	slot_cnt = mv_chan_xor_slot_count(len, src_cnt);
+	sw_desc = mv_xor_alloc_slots(mv_chan, slot_cnt, 1);
+	if (sw_desc) {
+		sw_desc->type = DMA_XOR;
+		sw_desc->async_tx.flags = flags;
+		grp_start = sw_desc->group_head;
+		mv_desc_init(grp_start, flags);
+		/* the byte count field is the same as in memcpy desc*/
+		mv_desc_set_byte_count(grp_start, len);
+		mv_desc_set_dest_addr(sw_desc->group_head, dest);
+		sw_desc->unmap_src_cnt = src_cnt;
+		sw_desc->unmap_len = len;
+		while (src_cnt--)
+			mv_desc_set_src_addr(grp_start, src_cnt, src[src_cnt]);
+	}
+	spin_unlock_bh(&mv_chan->lock);
+	dev_dbg(mv_chan->device->common.dev,
+		"%s sw_desc %p async_tx %p \n",
+		__func__, sw_desc, &sw_desc->async_tx);
+	return sw_desc ? &sw_desc->async_tx : NULL;
+}
+
+static void mv_xor_free_chan_resources(struct dma_chan *chan)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	struct mv_xor_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	mv_xor_slot_cleanup(mv_chan);
+
+	spin_lock_bh(&mv_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &mv_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe(iter, _iter, &mv_chan->completed_slots,
+				 completed_node) {
+		in_use_descs++;
+		list_del(&iter->completed_node);
+	}
+	list_for_each_entry_safe_reverse(
+		iter, _iter, &mv_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		mv_chan->slots_allocated--;
+	}
+	mv_chan->last_used = NULL;
+
+	dev_dbg(mv_chan->device->common.dev, "%s slots_allocated %d\n",
+		__func__, mv_chan->slots_allocated);
+	spin_unlock_bh(&mv_chan->lock);
+
+	if (in_use_descs)
+		dev_err(mv_chan->device->common.dev,
+			"freeing %d in use descriptors!\n", in_use_descs);
+}
+
+/**
+ * mv_xor_is_complete - poll the status of an XOR transaction
+ * @chan: XOR channel handle
+ * @cookie: XOR transaction identifier
+ */
+static enum dma_status mv_xor_is_complete(struct dma_chan *chan,
+					  dma_cookie_t cookie,
+					  dma_cookie_t *done,
+					  dma_cookie_t *used)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+	last_complete = mv_chan->completed_cookie;
+	mv_chan->is_complete_cookie = cookie;
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS) {
+		mv_xor_clean_completed_slots(mv_chan);
+		return ret;
+	}
+	mv_xor_slot_cleanup(mv_chan);
+
+	last_used = chan->cookie;
+	last_complete = mv_chan->completed_cookie;
+
+	if (done)
+		*done = last_complete;
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+static void mv_dump_xor_regs(struct mv_xor_chan *chan)
+{
+	u32 val;
+
+	val = __raw_readl(XOR_CONFIG(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "config       0x%08x.\n", val);
+
+	val = __raw_readl(XOR_ACTIVATION(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "activation   0x%08x.\n", val);
+
+	val = __raw_readl(XOR_INTR_CAUSE(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "intr cause   0x%08x.\n", val);
+
+	val = __raw_readl(XOR_INTR_MASK(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "intr mask    0x%08x.\n", val);
+
+	val = __raw_readl(XOR_ERROR_CAUSE(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "error cause  0x%08x.\n", val);
+
+	val = __raw_readl(XOR_ERROR_ADDR(chan));
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "error addr   0x%08x.\n", val);
+}
+
+static void mv_xor_err_interrupt_handler(struct mv_xor_chan *chan,
+					 u32 intr_cause)
+{
+	if (intr_cause & (1 << 4)) {
+	     dev_dbg(chan->device->common.dev,
+		     "ignore this error\n");
+	     return;
+	}
+
+	dev_printk(KERN_ERR, chan->device->common.dev,
+		   "error on chan %d. intr cause 0x%08x.\n",
+		   chan->idx, intr_cause);
+
+	mv_dump_xor_regs(chan);
+	BUG();
+}
+
+static irqreturn_t mv_xor_interrupt_handler(int irq, void *data)
+{
+	struct mv_xor_chan *chan = data;
+	u32 intr_cause = mv_chan_get_intr_cause(chan);
+
+	dev_dbg(chan->device->common.dev, "intr cause %x\n", intr_cause);
+
+	if (mv_is_err_intr(intr_cause))
+		mv_xor_err_interrupt_handler(chan, intr_cause);
+
+	tasklet_schedule(&chan->irq_tasklet);
+
+	mv_xor_device_clear_eoc_cause(chan);
+
+	return IRQ_HANDLED;
+}
+
+static void mv_xor_issue_pending(struct dma_chan *chan)
+{
+	struct mv_xor_chan *mv_chan = to_mv_xor_chan(chan);
+
+	if (mv_chan->pending >= MV_XOR_THRESHOLD) {
+		mv_chan->pending = 0;
+		mv_chan_activate(mv_chan);
+	}
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define MV_XOR_TEST_SIZE 2000
+
+static int __devinit mv_xor_memcpy_self_test(struct mv_xor_device *device)
+{
+	int i;
+	void *src, *dest;
+	dma_addr_t src_dma, dest_dma;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	struct dma_async_tx_descriptor *tx;
+	int err = 0;
+	struct mv_xor_chan *mv_chan;
+
+	src = kmalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+	if (!src)
+		return -ENOMEM;
+
+	dest = kzalloc(sizeof(u8) * MV_XOR_TEST_SIZE, GFP_KERNEL);
+	if (!dest) {
+		kfree(src);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < MV_XOR_TEST_SIZE; i++)
+		((u8 *) src)[i] = (u8)i;
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	dest_dma = dma_map_single(dma_chan->device->dev, dest,
+				  MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+
+	src_dma = dma_map_single(dma_chan->device->dev, src,
+				 MV_XOR_TEST_SIZE, DMA_TO_DEVICE);
+
+	tx = mv_xor_prep_dma_memcpy(dma_chan, dest_dma, src_dma,
+				    MV_XOR_TEST_SIZE, 0);
+	cookie = mv_xor_tx_submit(tx);
+	mv_xor_issue_pending(dma_chan);
+	async_tx_ack(tx);
+	msleep(1);
+
+	if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+	    DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			   "Self-test copy timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	mv_chan = to_mv_xor_chan(dma_chan);
+	dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+				MV_XOR_TEST_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src, dest, MV_XOR_TEST_SIZE)) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			   "Self-test copy failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	mv_xor_free_chan_resources(dma_chan);
+out:
+	kfree(src);
+	kfree(dest);
+	return err;
+}
+
+#define MV_XOR_NUM_SRC_TEST 4 /* must be <= 15 */
+static int __devinit
+mv_xor_xor_self_test(struct mv_xor_device *device)
+{
+	int i, src_idx;
+	struct page *dest;
+	struct page *xor_srcs[MV_XOR_NUM_SRC_TEST];
+	dma_addr_t dma_srcs[MV_XOR_NUM_SRC_TEST];
+	dma_addr_t dest_dma;
+	struct dma_async_tx_descriptor *tx;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	int err = 0;
+	struct mv_xor_chan *mv_chan;
+
+	for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx])
+			while (src_idx--) {
+				__free_page(xor_srcs[src_idx]);
+				return -ENOMEM;
+			}
+	}
+
+	dest = alloc_page(GFP_KERNEL);
+	if (!dest)
+		while (src_idx--) {
+			__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < MV_XOR_NUM_SRC_TEST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) |
+		(cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest), 0, PAGE_SIZE);
+
+	dma_chan = container_of(device->common.channels.next,
+				struct dma_chan,
+				device_node);
+	if (mv_xor_alloc_chan_resources(dma_chan, NULL) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	dest_dma = dma_map_page(dma_chan->device->dev, dest, 0, PAGE_SIZE,
+				DMA_FROM_DEVICE);
+
+	for (i = 0; i < MV_XOR_NUM_SRC_TEST; i++)
+		dma_srcs[i] = dma_map_page(dma_chan->device->dev, xor_srcs[i],
+					   0, PAGE_SIZE, DMA_TO_DEVICE);
+
+	tx = mv_xor_prep_dma_xor(dma_chan, dest_dma, dma_srcs,
+				 MV_XOR_NUM_SRC_TEST, PAGE_SIZE, 0);
+
+	cookie = mv_xor_tx_submit(tx);
+	mv_xor_issue_pending(dma_chan);
+	async_tx_ack(tx);
+	msleep(8);
+
+	if (mv_xor_is_complete(dma_chan, cookie, NULL, NULL) !=
+	    DMA_SUCCESS) {
+		dev_printk(KERN_ERR, dma_chan->device->dev,
+			   "Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	mv_chan = to_mv_xor_chan(dma_chan);
+	dma_sync_single_for_cpu(&mv_chan->device->pdev->dev, dest_dma,
+				PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest);
+		if (ptr[i] != cmp_word) {
+			dev_printk(KERN_ERR, dma_chan->device->dev,
+				   "Self-test xor failed compare, disabling."
+				   " index %d, data %x, expected %x\n", i,
+				   ptr[i], cmp_word);
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+free_resources:
+	mv_xor_free_chan_resources(dma_chan);
+out:
+	src_idx = MV_XOR_NUM_SRC_TEST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest);
+	return err;
+}
+
+static int __devexit mv_xor_remove(struct platform_device *dev)
+{
+	struct mv_xor_device *device = platform_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+	struct mv_xor_chan *mv_chan;
+	struct mv_xor_platform_data *plat_data = dev->dev.platform_data;
+
+	dma_async_device_unregister(&device->common);
+
+	dma_free_coherent(&dev->dev, plat_data->pool_size,
+			device->dma_desc_pool_virt, device->dma_desc_pool);
+
+	list_for_each_entry_safe(chan, _chan, &device->common.channels,
+				device_node) {
+		mv_chan = to_mv_xor_chan(chan);
+		list_del(&chan->device_node);
+	}
+
+	return 0;
+}
+
+static int __devinit mv_xor_probe(struct platform_device *pdev)
+{
+	int ret = 0;
+	int irq;
+	struct mv_xor_device *adev;
+	struct mv_xor_chan *mv_chan;
+	struct dma_device *dma_dev;
+	struct mv_xor_platform_data *plat_data = pdev->dev.platform_data;
+
+
+	adev = devm_kzalloc(&pdev->dev, sizeof(*adev), GFP_KERNEL);
+	if (!adev)
+		return -ENOMEM;
+
+	dma_dev = &adev->common;
+
+	/* allocate coherent memory for hardware descriptors
+	 * note: writecombine gives slightly better performance, but
+	 * requires that we explicitly flush the writes
+	 */
+	adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+							  plat_data->pool_size,
+							  &adev->dma_desc_pool,
+							  GFP_KERNEL);
+	if (!adev->dma_desc_pool_virt)
+		return -ENOMEM;
+
+	adev->id = plat_data->hw_id;
+
+	/* discover transaction capabilites from the platform data */
+	dma_dev->cap_mask = plat_data->cap_mask;
+	adev->pdev = pdev;
+	platform_set_drvdata(pdev, adev);
+
+	adev->shared = platform_get_drvdata(plat_data->shared);
+
+	INIT_LIST_HEAD(&dma_dev->channels);
+
+	/* set base routines */
+	dma_dev->device_alloc_chan_resources = mv_xor_alloc_chan_resources;
+	dma_dev->device_free_chan_resources = mv_xor_free_chan_resources;
+	dma_dev->device_is_tx_complete = mv_xor_is_complete;
+	dma_dev->device_issue_pending = mv_xor_issue_pending;
+	dma_dev->dev = &pdev->dev;
+
+	/* set prep routines based on capability */
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memcpy = mv_xor_prep_dma_memcpy;
+	if (dma_has_cap(DMA_MEMSET, dma_dev->cap_mask))
+		dma_dev->device_prep_dma_memset = mv_xor_prep_dma_memset;
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		dma_dev->max_xor = 8;                  ;
+		dma_dev->device_prep_dma_xor = mv_xor_prep_dma_xor;
+	}
+
+	mv_chan = devm_kzalloc(&pdev->dev, sizeof(*mv_chan), GFP_KERNEL);
+	if (!mv_chan) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	mv_chan->device = adev;
+	mv_chan->idx = plat_data->hw_id;
+	mv_chan->mmr_base = adev->shared->xor_base;
+
+	if (!mv_chan->mmr_base) {
+		ret = -ENOMEM;
+		goto err_free_dma;
+	}
+	tasklet_init(&mv_chan->irq_tasklet, mv_xor_tasklet, (unsigned long)
+		     mv_chan);
+
+	/* clear errors before enabling interrupts */
+	mv_xor_device_clear_err_status(mv_chan);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		ret = irq;
+		goto err_free_dma;
+	}
+	ret = devm_request_irq(&pdev->dev, irq,
+			       mv_xor_interrupt_handler,
+			       0, dev_name(&pdev->dev), mv_chan);
+	if (ret)
+		goto err_free_dma;
+
+	mv_chan_unmask_interrupts(mv_chan);
+
+	mv_set_mode(mv_chan, DMA_MEMCPY);
+
+	spin_lock_init(&mv_chan->lock);
+	INIT_LIST_HEAD(&mv_chan->chain);
+	INIT_LIST_HEAD(&mv_chan->completed_slots);
+	INIT_LIST_HEAD(&mv_chan->all_slots);
+	INIT_RCU_HEAD(&mv_chan->common.rcu);
+	mv_chan->common.device = dma_dev;
+
+	list_add_tail(&mv_chan->common.device_node, &dma_dev->channels);
+
+	if (dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask)) {
+		ret = mv_xor_memcpy_self_test(adev);
+		dev_dbg(&pdev->dev, "memcpy self test returned %d\n", ret);
+		if (ret)
+			goto err_free_dma;
+	}
+
+	if (dma_has_cap(DMA_XOR, dma_dev->cap_mask)) {
+		ret = mv_xor_xor_self_test(adev);
+		dev_dbg(&pdev->dev, "xor self test returned %d\n", ret);
+		if (ret)
+			goto err_free_dma;
+	}
+
+	dev_printk(KERN_INFO, &pdev->dev, "Marvell XOR: "
+	  "( %s%s%s%s)\n",
+	  dma_has_cap(DMA_XOR, dma_dev->cap_mask) ? "xor " : "",
+	  dma_has_cap(DMA_MEMSET, dma_dev->cap_mask)  ? "fill " : "",
+	  dma_has_cap(DMA_MEMCPY, dma_dev->cap_mask) ? "cpy " : "",
+	  dma_has_cap(DMA_INTERRUPT, dma_dev->cap_mask) ? "intr " : "");
+
+	dma_async_device_register(dma_dev);
+	goto out;
+
+ err_free_dma:
+	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+			adev->dma_desc_pool_virt, adev->dma_desc_pool);
+ out:
+	return ret;
+}
+
+static void
+mv_xor_conf_mbus_windows(struct mv_xor_shared_private *msp,
+			 struct mbus_dram_target_info *dram)
+{
+	void __iomem *base = msp->xor_base;
+	u32 win_enable = 0;
+	int i;
+
+	for (i = 0; i < 8; i++) {
+		writel(0, base + WINDOW_BASE(i));
+		writel(0, base + WINDOW_SIZE(i));
+		if (i < 4)
+			writel(0, base + WINDOW_REMAP_HIGH(i));
+	}
+
+	for (i = 0; i < dram->num_cs; i++) {
+		struct mbus_dram_window *cs = dram->cs + i;
+
+		writel((cs->base & 0xffff0000) |
+		       (cs->mbus_attr << 8) |
+		       dram->mbus_dram_target_id, base + WINDOW_BASE(i));
+		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
+
+		win_enable |= (1 << i);
+		win_enable |= 3 << (16 + (2 * i));
+	}
+
+	writel(win_enable, base + WINDOW_BAR_ENABLE(0));
+	writel(win_enable, base + WINDOW_BAR_ENABLE(1));
+}
+
+static struct platform_driver mv_xor_driver = {
+	.probe		= mv_xor_probe,
+	.remove		= mv_xor_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= MV_XOR_NAME,
+	},
+};
+
+static int mv_xor_shared_probe(struct platform_device *pdev)
+{
+	struct mv_xor_platform_shared_data *msd = pdev->dev.platform_data;
+	struct mv_xor_shared_private *msp;
+	struct resource *res;
+
+	dev_printk(KERN_NOTICE, &pdev->dev, "Marvell shared XOR driver\n");
+
+	msp = devm_kzalloc(&pdev->dev, sizeof(*msp), GFP_KERNEL);
+	if (!msp)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	msp->xor_base = devm_ioremap(&pdev->dev, res->start,
+				     res->end - res->start + 1);
+	if (!msp->xor_base)
+		return -EBUSY;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
+	if (!res)
+		return -ENODEV;
+
+	msp->xor_high_base = devm_ioremap(&pdev->dev, res->start,
+					  res->end - res->start + 1);
+	if (!msp->xor_high_base)
+		return -EBUSY;
+
+	platform_set_drvdata(pdev, msp);
+
+	/*
+	 * (Re-)program MBUS remapping windows if we are asked to.
+	 */
+	if (msd != NULL && msd->dram != NULL)
+		mv_xor_conf_mbus_windows(msp, msd->dram);
+
+	return 0;
+}
+
+static int mv_xor_shared_remove(struct platform_device *pdev)
+{
+	return 0;
+}
+
+static struct platform_driver mv_xor_shared_driver = {
+	.probe		= mv_xor_shared_probe,
+	.remove		= mv_xor_shared_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= MV_XOR_SHARED_NAME,
+	},
+};
+
+
+static int __init mv_xor_init(void)
+{
+	int rc;
+
+	rc = platform_driver_register(&mv_xor_shared_driver);
+	if (!rc) {
+		rc = platform_driver_register(&mv_xor_driver);
+		if (rc)
+			platform_driver_unregister(&mv_xor_shared_driver);
+	}
+	return rc;
+}
+module_init(mv_xor_init);
+
+/* it's currently unsafe to unload this module */
+#if 0
+static void __exit mv_xor_exit(void)
+{
+	platform_driver_unregister(&mv_xor_driver);
+	platform_driver_unregister(&mv_xor_shared_driver);
+	return;
+}
+
+module_exit(mv_xor_exit);
+#endif
+
+MODULE_AUTHOR("Saeed Bishara <saeed@marvell.com>");
+MODULE_DESCRIPTION("DMA engine driver for Marvell's XOR engine");
+MODULE_LICENSE("GPL");
diff --git a/drivers/dma/mv_xor.h b/drivers/dma/mv_xor.h
new file mode 100644
index 0000000..06cafe1
--- /dev/null
+++ b/drivers/dma/mv_xor.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright (C) 2007, 2008, Marvell International Ltd.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#ifndef MV_XOR_H
+#define MV_XOR_H
+
+#include <linux/types.h>
+#include <linux/io.h>
+#include <linux/dmaengine.h>
+#include <linux/interrupt.h>
+
+#define USE_TIMER
+#define MV_XOR_SLOT_SIZE		64
+#define MV_XOR_THRESHOLD		1
+
+#define XOR_OPERATION_MODE_XOR		0
+#define XOR_OPERATION_MODE_MEMCPY	2
+#define XOR_OPERATION_MODE_MEMSET	4
+
+#define XOR_CURR_DESC(chan)	(chan->mmr_base + 0x210 + (chan->idx * 4))
+#define XOR_NEXT_DESC(chan)	(chan->mmr_base + 0x200 + (chan->idx * 4))
+#define XOR_BYTE_COUNT(chan)	(chan->mmr_base + 0x220 + (chan->idx * 4))
+#define XOR_DEST_POINTER(chan)	(chan->mmr_base + 0x2B0 + (chan->idx * 4))
+#define XOR_BLOCK_SIZE(chan)	(chan->mmr_base + 0x2C0 + (chan->idx * 4))
+#define XOR_INIT_VALUE_LOW(chan)	(chan->mmr_base + 0x2E0)
+#define XOR_INIT_VALUE_HIGH(chan)	(chan->mmr_base + 0x2E4)
+
+#define XOR_CONFIG(chan)	(chan->mmr_base + 0x10 + (chan->idx * 4))
+#define XOR_ACTIVATION(chan)	(chan->mmr_base + 0x20 + (chan->idx * 4))
+#define XOR_INTR_CAUSE(chan)	(chan->mmr_base + 0x30)
+#define XOR_INTR_MASK(chan)	(chan->mmr_base + 0x40)
+#define XOR_ERROR_CAUSE(chan)	(chan->mmr_base + 0x50)
+#define XOR_ERROR_ADDR(chan)	(chan->mmr_base + 0x60)
+#define XOR_INTR_MASK_VALUE	0x3F5
+
+#define WINDOW_BASE(w)		(0x250 + ((w) << 2))
+#define WINDOW_SIZE(w)		(0x270 + ((w) << 2))
+#define WINDOW_REMAP_HIGH(w)	(0x290 + ((w) << 2))
+#define WINDOW_BAR_ENABLE(chan)	(0x240 + ((chan) << 2))
+
+struct mv_xor_shared_private {
+	void __iomem	*xor_base;
+	void __iomem	*xor_high_base;
+};
+
+
+/**
+ * struct mv_xor_device - internal representation of a XOR device
+ * @pdev: Platform device
+ * @id: HW XOR Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct mv_xor_device {
+	struct platform_device		*pdev;
+	int				id;
+	dma_addr_t			dma_desc_pool;
+	void				*dma_desc_pool_virt;
+	struct dma_device		common;
+	struct mv_xor_shared_private	*shared;
+};
+
+/**
+ * struct mv_xor_chan - internal representation of a XOR channel
+ * @pending: allows batching of hardware operations
+ * @completed_cookie: identifier for the most recently completed operation
+ * @lock: serializes enqueue/dequeue operations to the descriptors pool
+ * @mmr_base: memory mapped register base
+ * @idx: the index of the xor channel
+ * @chain: device chain view of the descriptors
+ * @completed_slots: slots completed by HW but still need to be acked
+ * @device: parent device
+ * @common: common dmaengine channel object members
+ * @last_used: place holder for allocation to continue from where it left off
+ * @all_slots: complete domain of slots usable by the channel
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ * @irq_tasklet: bottom half where mv_xor_slot_cleanup runs
+ */
+struct mv_xor_chan {
+	int			pending;
+	dma_cookie_t		completed_cookie;
+	spinlock_t		lock; /* protects the descriptor slot pool */
+	void __iomem		*mmr_base;
+	unsigned int		idx;
+	enum dma_transaction_type	current_type;
+	struct list_head	chain;
+	struct list_head	completed_slots;
+	struct mv_xor_device	*device;
+	struct dma_chan		common;
+	struct mv_xor_desc_slot	*last_used;
+	struct list_head	all_slots;
+	int			slots_allocated;
+	struct tasklet_struct	irq_tasklet;
+#ifdef USE_TIMER
+	unsigned long		cleanup_time;
+	u32			current_on_last_cleanup;
+	dma_cookie_t		is_complete_cookie;
+#endif
+};
+
+/**
+ * struct mv_xor_desc_slot - software descriptor
+ * @slot_node: node on the mv_xor_chan.all_slots list
+ * @chain_node: node on the mv_xor_chan.chain list
+ * @completed_node: node on the mv_xor_chan.completed_slots list
+ * @hw_desc: virtual address of the hardware descriptor chain
+ * @phys: hardware address of the hardware descriptor chain
+ * @group_head: first operation in a transaction
+ * @slot_cnt: total slots used in an transaction (group of operations)
+ * @slots_per_op: number of slots per operation
+ * @idx: pool index
+ * @unmap_src_cnt: number of xor sources
+ * @unmap_len: transaction bytecount
+ * @async_tx: support for the async_tx api
+ * @group_list: list of slots that make up a multi-descriptor transaction
+ *	for example transfer lengths larger than the supported hw max
+ * @xor_check_result: result of zero sum
+ * @crc32_result: result crc calculation
+ */
+struct mv_xor_desc_slot {
+	struct list_head	slot_node;
+	struct list_head	chain_node;
+	struct list_head	completed_node;
+	enum dma_transaction_type	type;
+	void			*hw_desc;
+	struct mv_xor_desc_slot	*group_head;
+	u16			slot_cnt;
+	u16			slots_per_op;
+	u16			idx;
+	u16			unmap_src_cnt;
+	u32			value;
+	size_t			unmap_len;
+	struct dma_async_tx_descriptor	async_tx;
+	union {
+		u32		*xor_check_result;
+		u32		*crc32_result;
+	};
+#ifdef USE_TIMER
+	unsigned long		arrival_time;
+	struct timer_list	timeout;
+#endif
+};
+
+/* This structure describes XOR descriptor size 64bytes	*/
+struct mv_xor_desc {
+	u32 status;		/* descriptor execution status */
+	u32 crc32_result;	/* result of CRC-32 calculation */
+	u32 desc_command;	/* type of operation to be carried out */
+	u32 phy_next_desc;	/* next descriptor address pointer */
+	u32 byte_count;		/* size of src/dst blocks in bytes */
+	u32 phy_dest_addr;	/* destination block address */
+	u32 phy_src_addr[8];	/* source block addresses */
+	u32 reserved0;
+	u32 reserved1;
+};
+
+#define to_mv_sw_desc(addr_hw_desc)		\
+	container_of(addr_hw_desc, struct mv_xor_desc_slot, hw_desc)
+
+#define mv_hw_desc_slot_idx(hw_desc, idx)	\
+	((void *)(((unsigned long)hw_desc) + ((idx) << 5)))
+
+#define MV_XOR_MIN_BYTE_COUNT	(128)
+#define XOR_MAX_BYTE_COUNT	((16 * 1024 * 1024) - 1)
+#define MV_XOR_MAX_BYTE_COUNT	XOR_MAX_BYTE_COUNT
+
+
+#endif