LKML Archive on lore.kernel.org
help / color / mirror / Atom feed
From: Dan Williams <dan.j.williams@intel.com>
To: neilb@suse.de, linux-raid@vger.kernel.org
Cc: akpm@osdl.org, linux-kernel@vger.kernel.org, christopher.leech@intel.com
Subject: [PATCH 16/19] dmaengine: Driver for the Intel IOP 32x, 33x, and 13xx RAID engines
Date: Mon, 11 Sep 2006 16:19:00 -0700	[thread overview]
Message-ID: <20060911231859.4737.46229.stgit@dwillia2-linux.ch.intel.com> (raw)
In-Reply-To: <1158015632.4241.31.camel@dwillia2-linux.ch.intel.com>

From: Dan Williams <dan.j.williams@intel.com>

This is a driver for the iop DMA/AAU/ADMA units which are capable of pq_xor,
pq_update, pq_zero_sum, xor, dual_xor, xor_zero_sum, fill, copy+crc, and copy
operations.

Changelog:
* fixed a slot allocation bug in do_iop13xx_adma_xor that caused too few
slots to be requested eventually leading to data corruption
* enabled the slot allocation routine to attempt to free slots before
returning -ENOMEM
* switched the cleanup routine to solely use the software chain and the
status register to determine if a descriptor is complete.  This is
necessary to support other IOP engines that do not have status writeback
capability
* make the driver iop generic
* modified the allocation routines to understand allocating a group of
slots for a single operation
* added a null xor initialization operation for the xor only channel on
iop3xx
* add software emulation of zero sum on iop32x
* support xor operations on buffers larger than the hardware maximum
* add architecture specific raid5-dma support functions

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---

 drivers/dma/Kconfig                 |   27 +
 drivers/dma/Makefile                |    1 
 drivers/dma/iop-adma.c              | 1501 +++++++++++++++++++++++++++++++++++
 include/asm-arm/hardware/iop_adma.h |   98 ++
 4 files changed, 1624 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index fced8c3..3556143 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -7,8 +7,8 @@ menu "DMA Engine support"
 config DMA_ENGINE
 	bool "Support for DMA engines"
 	---help---
-	  DMA engines offload copy operations from the CPU to dedicated
-	  hardware, allowing the copies to happen asynchronously.
+          DMA engines offload block memory operations from the CPU to dedicated
+          hardware, allowing the operations to happen asynchronously.
 
 comment "DMA Clients"
 
@@ -28,9 +28,19 @@ config RAID5_DMA
 	default y
 	---help---
 	  This enables the use of DMA engines in the MD-RAID5 driver to
-	  offload stripe cache operations, freeing CPU cycles.
+	  offload stripe cache operations (i.e. xor, memcpy), freeing CPU cycles.
 	  say Y here
 
+config RAID5_DMA_WAIT_VIA_REQUEUE
+	bool "raid5-dma: Non-blocking channel switching"
+	depends on RAID5_DMA_ARCH_NEEDS_CHAN_SWITCH && RAID5_DMA && BROKEN
+	default n
+	---help---
+	  This enables the raid5-dma driver to continue to operate on incoming
+	  stripes when it determines that the current stripe must wait for a
+	  a hardware channel to finish operations.  This code is a work in
+	  progress, only say Y to debug the implementation, otherwise say N.
+
 comment "DMA Devices"
 
 config INTEL_IOATDMA
@@ -40,4 +50,15 @@ config INTEL_IOATDMA
 	---help---
 	  Enable support for the Intel(R) I/OAT DMA engine.
 
+config INTEL_IOP_ADMA
+        tristate "Intel IOP ADMA support"
+        depends on DMA_ENGINE && (ARCH_IOP32X || ARCH_IOP33X || ARCH_IOP13XX)
+	select RAID5_DMA_ARCH_NEEDS_CHAN_SWITCH if (ARCH_IOP32X || ARCH_IOP33X)
+        default m
+        ---help---
+          Enable support for the Intel(R) IOP Series RAID engines.
+
+config RAID5_DMA_ARCH_NEEDS_CHAN_SWITCH
+	bool
+
 endmenu
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index 4e36d6e..233eae7 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -2,3 +2,4 @@ obj-$(CONFIG_DMA_ENGINE) += dmaengine.o
 obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_RAID5_DMA) += raid5-dma.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioatdma.o
+obj-$(CONFIG_INTEL_IOP_ADMA) += iop-adma.o
diff --git a/drivers/dma/iop-adma.c b/drivers/dma/iop-adma.c
new file mode 100644
index 0000000..51f1c54
--- /dev/null
+++ b/drivers/dma/iop-adma.c
@@ -0,0 +1,1501 @@
+/*
+ * Copyright(c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+
+/*
+ * This driver supports the asynchrounous DMA copy and RAID engines available
+ * on the Intel Xscale(R) family of I/O Processors (IOP 32x, 33x, 134x)
+ */
+
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/dmaengine.h>
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/spinlock.h>
+#include <linux/interrupt.h>
+#include <linux/platform_device.h>
+#include <asm/arch/adma.h>
+#include <asm/memory.h>
+
+#define to_iop_adma_chan(chan) container_of(chan, struct iop_adma_chan, common)
+#define to_iop_adma_device(dev) container_of(dev, struct iop_adma_device, common)
+#define to_iop_adma_slot(lh) container_of(lh, struct iop_adma_desc_slot, slot_node)
+
+#define IOP_ADMA_DEBUG 0
+#define PRINTK(x...) ((void)(IOP_ADMA_DEBUG && printk(x)))
+
+/* software zero sum implemenation bits for iop32x */
+#ifdef CONFIG_ARCH_IOP32X
+char iop32x_zero_result_buffer[PAGE_SIZE] __attribute__((aligned(256)));
+u32 *iop32x_zero_sum_output;
+#endif
+
+/**
+ * iop_adma_free_slots - flags descriptor slots for reuse
+ * @slot: Slot to free
+ * Caller must hold &iop_chan->lock while calling this function
+ */
+static inline void iop_adma_free_slots(struct iop_adma_desc_slot *slot)
+{
+	int stride = slot->stride;
+	while (stride--) {
+		slot->stride = 0;
+		slot = list_entry(slot->slot_node.next,
+				struct iop_adma_desc_slot,
+				slot_node);
+	}
+}
+
+static void __iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *iter, *_iter;
+	dma_cookie_t cookie = 0;
+	struct device *dev = &iop_chan->device->pdev->dev;
+	u32 current_desc = iop_chan_get_current_descriptor(iop_chan);
+	int busy = iop_chan_is_busy(iop_chan);
+	int seen_current = 0;
+
+	/* free completed slots from the chain starting with
+	 * the oldest descriptor
+	 */
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		PRINTK("%s: [%d] cookie: %d busy: %x next: %x\n",
+			__FUNCTION__, iter->idx, iter->cookie, busy,
+			iop_desc_get_next_desc(iter, iop_chan));
+
+		/* do not advance past the current descriptor loaded into the 
+		 * hardware channel, subsequent descriptors are either in process
+		 * or have not been submitted
+		 */
+		if (seen_current)
+			break;
+
+		/* stop the search if we reach the current descriptor and the
+		 * channel is busy, or if it appears that the current descriptor
+		 * needs to be re-read (i.e. has been appended to)
+		 */
+		if (iter->phys == current_desc) {
+			BUG_ON(seen_current++);
+			if (busy || iop_desc_get_next_desc(iter, iop_chan))
+				break;
+		}
+
+		/* if we are tracking a group of zero-result descriptors add
+		 * the current result to the accumulator
+		 */
+		if (iop_chan->zero_sum_group) {
+			iop_chan->result_accumulator |=
+				iop_desc_get_zero_result(iter);
+			PRINTK("%s: add to zero sum group acc: %d this: %d\n", __FUNCTION__,
+				iop_chan->result_accumulator, iop_desc_get_zero_result(iter));
+		}
+
+		if (iter->cookie) {
+			u32 src_cnt = iter->src_cnt;
+			u32 len = iop_desc_get_byte_count(iter, iop_chan);
+			dma_addr_t addr;
+
+			cookie = iter->cookie;
+			iter->cookie = 0;
+
+			/* the first and last descriptor in a zero sum group
+			 * will have 'xor_check_result' set
+			 */
+			if (iter->xor_check_result) {
+				if (iter->slot_cnt > iter->slots_per_op) {
+					if (!iop_chan->zero_sum_group) {
+						iop_chan->zero_sum_group = 1;
+						iop_chan->result_accumulator |=
+							iop_desc_get_zero_result(iter);
+					}
+					PRINTK("%s: start zero sum group acc: %d this: %d\n", __FUNCTION__,
+						iop_chan->result_accumulator, iop_desc_get_zero_result(iter));
+				} else {
+					if (!iop_chan->zero_sum_group)
+						iop_chan->result_accumulator |=
+							iop_desc_get_zero_result(iter);
+					else
+						iop_chan->zero_sum_group = 0;
+	
+					*iter->xor_check_result = iop_chan->result_accumulator;
+					iop_chan->result_accumulator = 0;
+
+					PRINTK("%s: end zero sum group acc: %d this: %d\n", __FUNCTION__,
+						*iter->xor_check_result, iop_desc_get_zero_result(iter));
+				}
+			}
+
+			/* unmap dma ranges */
+			switch (iter->flags & (DMA_DEST_BUF | DMA_DEST_PAGE |
+				DMA_DEST_DMA)) {
+			case DMA_DEST_BUF:
+				addr = iop_desc_get_dest_addr(iter, iop_chan);
+				dma_unmap_single(dev, addr, len, DMA_FROM_DEVICE);
+				break;
+			case DMA_DEST_PAGE:
+				addr = iop_desc_get_dest_addr(iter, iop_chan);
+				dma_unmap_page(dev, addr, len, DMA_FROM_DEVICE);
+				break;
+			case DMA_DEST_DMA:
+				break;
+			}
+
+			switch (iter->flags & (DMA_SRC_BUF |
+					DMA_SRC_PAGE | DMA_SRC_DMA |
+					DMA_SRC_PAGES | DMA_SRC_DMA_LIST)) {
+			case DMA_SRC_BUF:
+				addr = iop_desc_get_src_addr(iter, iop_chan, 0);
+				dma_unmap_single(dev, addr, len, DMA_TO_DEVICE);
+				break;
+			case DMA_SRC_PAGE:
+				addr = iop_desc_get_src_addr(iter, iop_chan, 0);
+				dma_unmap_page(dev, addr, len, DMA_TO_DEVICE);
+				break;
+			case DMA_SRC_PAGES:
+				while(src_cnt--) {
+					addr = iop_desc_get_src_addr(iter,
+								iop_chan,
+								src_cnt);
+					dma_unmap_page(dev, addr, len,
+						DMA_TO_DEVICE);
+				}
+				break;
+			case DMA_SRC_DMA:
+			case DMA_SRC_DMA_LIST:
+				break;
+			}
+		}
+
+		/* leave the last descriptor in the chain 
+		 * so we can append to it
+		 */
+		if (iter->chain_node.next == &iop_chan->chain)
+			break;
+
+		PRINTK("iop adma%d: cleanup %d stride %d\n",
+		iop_chan->device->id, iter->idx, iter->stride);
+
+		list_del(&iter->chain_node);
+		iop_adma_free_slots(iter);
+	}
+
+	BUG_ON(!seen_current);
+
+	if (cookie) {
+		iop_chan->completed_cookie = cookie;
+
+		PRINTK("iop adma%d: completed cookie %d\n",
+		iop_chan->device->id, cookie);
+	}
+}
+
+static inline void iop_adma_slot_cleanup(struct iop_adma_chan *iop_chan)
+{
+	spin_lock_bh(&iop_chan->lock);
+	__iop_adma_slot_cleanup(iop_chan);
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static struct iop_adma_desc_slot *
+__iop_adma_alloc_slots(struct iop_adma_chan *iop_chan, int num_slots,
+			int slots_per_op, int recurse)
+{
+	struct iop_adma_desc_slot *iter = NULL, *alloc_start = NULL;
+	int i;
+
+	/* start search from the last allocated descrtiptor
+	 * if a contiguous allocation can not be found start searching
+	 * from the beginning of the list
+	 */
+	for (i = 0; i < 2; i++) {
+		int slots_found = 0;
+		if (i == 0)
+			iter = iop_chan->last_used;
+		else {
+			iter = list_entry(&iop_chan->all_slots,
+				struct iop_adma_desc_slot,
+				slot_node);
+		}
+
+		list_for_each_entry_continue(iter, &iop_chan->all_slots, slot_node) {
+			if (iter->stride) {
+				/* give up after finding the first busy slot
+				 * on the second pass through the list
+				 */
+				if (i == 1)
+					break;
+
+				slots_found = 0;
+				continue;
+			}
+
+			/* start the allocation if the slot is correctly aligned */
+			if (!slots_found++) {
+				if (iop_desc_is_aligned(iter, slots_per_op))
+					alloc_start = iter;
+				else {
+					slots_found = 0;
+					continue;
+				}
+			}
+
+			if (slots_found == num_slots) {
+				iter = alloc_start;
+				while (num_slots) {
+					PRINTK("iop adma%d: allocated [%d] "
+						"(desc %p phys: %#x) stride %d\n",
+						iop_chan->device->id,
+						iter->idx, iter->hw_desc, iter->phys,
+						slots_per_op);
+					iop_chan->last_used = iter;
+					list_add_tail(&iter->chain_node,
+							&iop_chan->chain);
+					iter->slot_cnt = num_slots;
+					iter->slots_per_op = slots_per_op;
+					iter->xor_check_result = NULL;
+					iter->cookie = 0;
+					for (i = 0; i < slots_per_op; i++) {
+						iter->stride = slots_per_op - i;
+						iter = list_entry(iter->slot_node.next,
+								struct iop_adma_desc_slot,
+								slot_node);
+					}
+					num_slots -= slots_per_op;
+				}
+				return alloc_start;
+			}
+		}
+	}
+
+	/* try once to free some slots if the allocation fails */
+	if (recurse) {
+		__iop_adma_slot_cleanup(iop_chan);
+		return __iop_adma_alloc_slots(iop_chan, num_slots, slots_per_op, 0);
+	} else
+		return NULL;
+}
+
+static struct iop_adma_desc_slot *
+iop_adma_alloc_slots(struct iop_adma_chan *iop_chan,
+			int num_slots,
+			int slots_per_op)
+{
+	return __iop_adma_alloc_slots(iop_chan, num_slots, slots_per_op, 1);
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan);
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan);
+
+/* returns the actual number of allocated descriptors */
+static int iop_adma_alloc_chan_resources(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *slot = NULL;
+	char *hw_desc;
+	int i;
+	int init = iop_chan->slots_allocated ? 0 : 1;
+	struct iop_adma_platform_data *plat_data;
+	
+	plat_data = iop_chan->device->pdev->dev.platform_data;
+
+	spin_lock_bh(&iop_chan->lock);
+	/* Allocate descriptor slots */
+	i = iop_chan->slots_allocated;
+	for (; i < (plat_data->pool_size/IOP_ADMA_SLOT_SIZE); i++) {
+		slot = kmalloc(sizeof(*slot), GFP_KERNEL);
+		if (!slot) {
+			printk(KERN_INFO "IOP ADMA Channel only initialized"
+				" %d descriptor slots", i--);
+			break;
+		}
+		hw_desc = (char *) iop_chan->device->dma_desc_pool_virt;
+		slot->hw_desc = (void *) &hw_desc[i * IOP_ADMA_SLOT_SIZE];
+
+		INIT_LIST_HEAD(&slot->chain_node);
+		INIT_LIST_HEAD(&slot->slot_node);
+		hw_desc = (char *) iop_chan->device->dma_desc_pool;
+		slot->phys = (dma_addr_t) &hw_desc[i * IOP_ADMA_SLOT_SIZE];
+		slot->stride = 0;
+		slot->cookie = 0;
+		slot->xor_check_result = NULL;
+		slot->idx = i;
+		list_add_tail(&slot->slot_node, &iop_chan->all_slots);
+	}
+	if (i && !iop_chan->last_used)
+		iop_chan->last_used = list_entry(iop_chan->all_slots.next,
+					struct iop_adma_desc_slot,
+					slot_node);
+
+	iop_chan->slots_allocated = i;
+	PRINTK("iop adma%d: allocated %d descriptor slots last_used: %p\n",
+		iop_chan->device->id, i, iop_chan->last_used);
+	spin_unlock_bh(&iop_chan->lock);
+
+	/* initialize the channel and the chain with a null operation */
+	if (init) {
+		if (iop_chan->device->common.capabilities & DMA_MEMCPY)
+			iop_chan_start_null_memcpy(iop_chan);
+		else if (iop_chan->device->common.capabilities & DMA_XOR)
+			iop_chan_start_null_xor(iop_chan);
+		else
+			BUG();
+	}
+
+	return (i > 0) ? i : -ENOMEM;
+}
+
+/* chain the descriptors */
+static inline void iop_chan_chain_desc(struct iop_adma_chan *iop_chan,
+					struct iop_adma_desc_slot *desc)
+{
+	struct iop_adma_desc_slot *prev = list_entry(desc->chain_node.prev,
+						struct iop_adma_desc_slot,
+						chain_node);
+	iop_desc_set_next_desc(prev, iop_chan, desc->phys);
+}
+
+static inline void iop_desc_assign_cookie(struct iop_adma_chan *iop_chan,
+					struct iop_adma_desc_slot *desc)
+{
+	dma_cookie_t cookie = iop_chan->common.cookie;
+	cookie++;
+	if (cookie < 0)
+		cookie = 1;
+	iop_chan->common.cookie = desc->cookie = cookie;
+	PRINTK("iop adma%d: %s cookie %d slot %d\n",
+	iop_chan->device->id, __FUNCTION__, cookie, desc->idx);
+}
+
+static inline void iop_adma_check_threshold(struct iop_adma_chan *iop_chan)
+{
+	if (iop_chan->pending >= IOP_ADMA_THRESHOLD) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+}
+
+static dma_cookie_t do_iop_adma_memcpy(struct dma_chan *chan,
+                                       union dmaengine_addr dest,
+					unsigned int dest_off,
+                                       union dmaengine_addr src,
+					unsigned int src_off,
+                                       size_t len,
+                                       unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_cookie_t ret = -ENOMEM;
+	struct iop_adma_desc_slot *sw_desc;
+	int slot_cnt, slots_per_op;
+
+	if (!chan || !dest.dma || !src.dma)
+		return -EFAULT;
+	if (!len)
+		return iop_chan->common.cookie;
+
+	PRINTK("iop adma%d: %s len: %u flags: %#lx\n",
+	iop_chan->device->id, __FUNCTION__, len, flags);
+
+	switch (flags & (DMA_SRC_BUF | DMA_SRC_PAGE | DMA_SRC_DMA)) {
+	case DMA_SRC_BUF:
+		src.dma = dma_map_single(&iop_chan->device->pdev->dev,
+			src.buf, len, DMA_TO_DEVICE);
+		break;
+	case DMA_SRC_PAGE:
+		src.dma = dma_map_page(&iop_chan->device->pdev->dev,
+			src.pg, src_off, len, DMA_TO_DEVICE);
+		break;
+	case DMA_SRC_DMA:
+		break;
+	default:
+		return -EFAULT;
+	}
+
+	switch (flags & (DMA_DEST_BUF | DMA_DEST_PAGE | DMA_DEST_DMA)) {
+	case DMA_DEST_BUF:
+		dest.dma = dma_map_single(&iop_chan->device->pdev->dev,
+			dest.buf, len, DMA_FROM_DEVICE);
+		break;
+	case DMA_DEST_PAGE:
+		dest.dma = dma_map_page(&iop_chan->device->pdev->dev,
+			dest.pg, dest_off, len, DMA_FROM_DEVICE);
+		break;
+	case DMA_DEST_DMA:
+		break;
+	default:
+		return -EFAULT;
+	}
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		iop_desc_init_memcpy(sw_desc);
+		iop_desc_set_byte_count(sw_desc, iop_chan, len);
+		iop_desc_set_dest_addr(sw_desc, iop_chan, dest.dma);
+		iop_desc_set_memcpy_src_addr(sw_desc, src.dma, slot_cnt, slots_per_op);
+
+		iop_chan_chain_desc(iop_chan, sw_desc);
+		iop_desc_assign_cookie(iop_chan, sw_desc);
+
+		sw_desc->flags = flags;
+		iop_chan->pending++;
+		ret = sw_desc->cookie;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	iop_adma_check_threshold(iop_chan);
+
+	return ret;
+}
+
+static dma_cookie_t do_iop_adma_memset(struct dma_chan *chan,
+                                       union dmaengine_addr dest,
+					unsigned int dest_off,
+                                       int val,
+                                       size_t len,
+                                       unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_cookie_t ret = -ENOMEM;
+	struct iop_adma_desc_slot *sw_desc;
+	int slot_cnt, slots_per_op;
+
+	if (!chan || !dest.dma)
+		return -EFAULT;
+	if (!len)
+		return iop_chan->common.cookie;
+
+	PRINTK("iop adma%d: %s len: %u flags: %#lx\n",
+	iop_chan->device->id, __FUNCTION__, len, flags);
+
+	switch (flags & (DMA_DEST_BUF | DMA_DEST_PAGE | DMA_DEST_DMA)) {
+	case DMA_DEST_BUF:
+		dest.dma = dma_map_single(&iop_chan->device->pdev->dev,
+			dest.buf, len, DMA_FROM_DEVICE);
+		break;
+	case DMA_DEST_PAGE:
+		dest.dma = dma_map_page(&iop_chan->device->pdev->dev,
+			dest.pg, dest_off, len, DMA_FROM_DEVICE);
+		break;
+	case DMA_DEST_DMA:
+		break;
+	default:
+		return -EFAULT;
+	}
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memset_slot_count(len, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		iop_desc_init_memset(sw_desc);
+		iop_desc_set_byte_count(sw_desc, iop_chan, len);
+		iop_desc_set_block_fill_val(sw_desc, val);
+		iop_desc_set_dest_addr(sw_desc, iop_chan, dest.dma);
+
+		iop_chan_chain_desc(iop_chan, sw_desc);
+		iop_desc_assign_cookie(iop_chan, sw_desc);
+
+		sw_desc->flags = flags;
+		iop_chan->pending++;
+		ret = sw_desc->cookie;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	iop_adma_check_threshold(iop_chan);
+
+	return ret;
+}
+
+/**
+ * do_iop_adma_xor - xor from source pages to a dest page
+ * @chan: common channel handle
+ * @dest: DMAENGINE destination address
+ * @dest_off: offset into the destination page
+ * @src: DMAENGINE source addresses
+ * @src_cnt: number of source pages
+ * @src_off: offset into the source pages
+ * @len: transaction length in bytes
+ * @flags: DMAENGINE address type flags
+ */
+static dma_cookie_t do_iop_adma_xor(struct dma_chan *chan,
+					union dmaengine_addr dest,
+					unsigned int dest_off,
+					union dmaengine_addr src,
+					unsigned int src_cnt,
+					unsigned int src_off,
+					size_t len,
+					unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct device *dev = &iop_chan->device->pdev->dev;
+	struct iop_adma_desc_slot *sw_desc;
+	dma_cookie_t ret = -ENOMEM;
+	int slot_cnt, slots_per_op;
+
+	if (!chan || !dest.dma || !src.dma_list)
+		return -EFAULT;
+
+	if (!len)
+		return iop_chan->common.cookie;
+
+	PRINTK("iop adma%d: %s src_cnt: %d len: %u flags: %lx\n",
+	iop_chan->device->id, __FUNCTION__, src_cnt, len, flags);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		#ifdef CONFIG_ARCH_IOP32X
+		if ((flags & DMA_DEST_BUF) &&
+			dest.buf == (void *) iop32x_zero_result_buffer) {
+			PRINTK("%s: iop32x zero sum emulation requested\n",
+				__FUNCTION__);
+			sw_desc->xor_check_result = iop32x_zero_sum_output;
+		}
+		#endif
+
+		iop_desc_init_xor(sw_desc, src_cnt);
+		iop_desc_set_byte_count(sw_desc, iop_chan, len);
+
+		switch (flags & (DMA_DEST_BUF | DMA_DEST_PAGE |
+				DMA_DEST_PAGES | DMA_DEST_DMA |
+				DMA_DEST_DMA_LIST)) {
+		case DMA_DEST_PAGE:
+			dest.dma = dma_map_page(dev, dest.pg, dest_off, len,
+					DMA_FROM_DEVICE);
+			break;
+		case DMA_DEST_BUF:
+			dest.dma = dma_map_single(dev, dest.buf, len,
+					DMA_FROM_DEVICE);
+			break;
+		}
+
+		iop_desc_set_dest_addr(sw_desc, iop_chan, dest.dma);
+
+		switch (flags & (DMA_SRC_BUF | DMA_SRC_PAGE |
+				DMA_SRC_PAGES | DMA_SRC_DMA |
+				DMA_SRC_DMA_LIST)) {
+		case DMA_SRC_PAGES:
+			while (src_cnt--) {
+				dma_addr_t addr = dma_map_page(dev,
+							src.pgs[src_cnt],
+							src_off, len,
+							DMA_TO_DEVICE);
+				iop_desc_set_xor_src_addr(sw_desc,
+							src_cnt,
+							addr,
+							slot_cnt,
+							slots_per_op);
+			}
+			break;
+		case DMA_SRC_DMA_LIST:
+			while (src_cnt--) {
+				iop_desc_set_xor_src_addr(sw_desc,
+							src_cnt,
+							src.dma_list[src_cnt],
+							slot_cnt,
+							slots_per_op);
+			}
+			break;
+		}
+
+		iop_chan_chain_desc(iop_chan, sw_desc);
+		iop_desc_assign_cookie(iop_chan, sw_desc);
+
+		sw_desc->flags = flags;
+		iop_chan->pending++;
+		ret = sw_desc->cookie;
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	iop_adma_check_threshold(iop_chan);
+
+	return ret;
+}
+
+/**
+ * do_iop_adma_zero_sum - xor the sources together and report whether
+ *				the sum is zero
+ * @chan: common channel handle
+ * @src: DMAENGINE source addresses
+ * @src_cnt: number of sources
+ * @src_off: offset into the sources
+ * @len: transaction length in bytes
+ * @flags: DMAENGINE address type flags
+ * @result: set to 1 if sum is zero else 0
+ */
+#ifndef CONFIG_ARCH_IOP32X
+static dma_cookie_t do_iop_adma_zero_sum(struct dma_chan *chan,
+					union dmaengine_addr src,
+					unsigned int src_cnt,
+					unsigned int src_off,
+					size_t len,
+					u32 *result,
+					unsigned long flags)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *sw_desc;
+	dma_cookie_t ret = -ENOMEM;
+	int slot_cnt, slots_per_op;
+
+	if (!chan || !src.dma_list || !result)
+		return -EFAULT;
+
+	if (!len)
+		return iop_chan->common.cookie;
+
+	PRINTK("iop adma%d: %s src_cnt: %d len: %u flags: %lx\n",
+	iop_chan->device->id, __FUNCTION__, src_cnt, len, flags);
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_zero_sum_slot_count(len, src_cnt, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		struct device *dev = &iop_chan->device->pdev->dev;
+		iop_chan->pending += iop_desc_init_zero_sum(sw_desc, src_cnt,
+					slot_cnt, slots_per_op);
+
+		switch (flags & (DMA_SRC_BUF | DMA_SRC_PAGE |
+				DMA_SRC_PAGES | DMA_SRC_DMA |
+				DMA_SRC_DMA_LIST)) {
+		case DMA_SRC_PAGES:
+			while (src_cnt--) {
+				dma_addr_t addr = dma_map_page(dev,
+						src.pgs[src_cnt],
+						src_off, len,
+						DMA_TO_DEVICE);
+				iop_desc_set_zero_sum_src_addr(sw_desc,
+						src_cnt,
+						addr,
+						slot_cnt,
+						slots_per_op);
+			}
+			break;
+		case DMA_SRC_DMA_LIST:
+			while (src_cnt--) {
+				iop_desc_set_zero_sum_src_addr(sw_desc,
+						src_cnt,
+						src.dma_list[src_cnt],
+						slot_cnt,
+						slots_per_op);
+			}
+			break;
+		}
+
+		iop_desc_set_zero_sum_byte_count(sw_desc, len, slots_per_op);
+
+		/* assign a cookie to the first descriptor so
+		 * the buffers are unmapped
+		 */
+		iop_desc_assign_cookie(iop_chan, sw_desc);
+		sw_desc->flags = flags;
+
+		/* assign cookie to the last descriptor in the group
+		 * so the xor_check_result is updated. Also, set the
+		 * xor_check_result ptr of the first and last descriptor
+		 * so the cleanup routine can sum the group of results
+		 */
+		if (slot_cnt > slots_per_op) {
+			struct iop_adma_desc_slot *desc;
+			desc = list_entry(iop_chan->chain.prev,
+				struct iop_adma_desc_slot,
+				chain_node);
+			iop_desc_assign_cookie(iop_chan, desc);
+			sw_desc->xor_check_result = result;
+			desc->xor_check_result = result;
+			ret = desc->cookie;
+		} else {
+			sw_desc->xor_check_result = result;
+			ret = sw_desc->cookie;
+		}
+
+		/* add the group to the chain */
+		iop_chan_chain_desc(iop_chan, sw_desc);
+	}
+	spin_unlock_bh(&iop_chan->lock);
+
+	iop_adma_check_threshold(iop_chan);
+
+	return ret;
+}
+#else
+/* iop32x does not support zero sum in hardware, so we simulate 
+ * it in software.  It only supports a PAGE_SIZE length which is
+ * enough to support md raid.
+ */
+static dma_cookie_t do_iop_adma_zero_sum(struct dma_chan *chan,
+					union dmaengine_addr src,
+					unsigned int src_cnt,
+					unsigned int src_off,
+					size_t len,
+					u32 *result,
+					unsigned long flags)
+{
+	static union dmaengine_addr dest_addr = { .buf = iop32x_zero_result_buffer };
+	static dma_cookie_t last_zero_result_cookie = 0;
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_cookie_t ret;
+
+	if (!chan || !src.dma_list || !result)
+		return -EFAULT;
+
+	if (!len)
+		return iop_chan->common.cookie;
+
+	if (len > sizeof(iop32x_zero_result_buffer)) {
+		printk(KERN_ERR "iop32x performs zero sum with a %d byte buffer, %d"
+		  	" bytes is too large\n", sizeof(iop32x_zero_result_buffer),
+		  	len);
+		BUG();
+		return -EFAULT;
+	}
+
+	/* we only have 1 result buffer, it can not be shared */
+	if (last_zero_result_cookie) {
+		PRINTK("%s: waiting for last_zero_result_cookie: %d\n",
+			__FUNCTION__, last_zero_result_cookie);
+		dma_sync_wait(chan, last_zero_result_cookie);
+		last_zero_result_cookie = 0;
+	}
+
+	PRINTK("iop adma%d: %s src_cnt: %d len: %u flags: %lx\n",
+	iop_chan->device->id, __FUNCTION__, src_cnt, len, flags);
+
+	flags |= DMA_DEST_BUF;
+	iop32x_zero_sum_output = result;
+
+	ret = do_iop_adma_xor(chan, dest_addr, 0, src, src_cnt, src_off,
+				len, flags);
+
+	if (ret > 0)
+		last_zero_result_cookie = ret;
+
+	return ret;
+}
+#endif
+
+static void iop_adma_free_chan_resources(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	struct iop_adma_desc_slot *iter, *_iter;
+	int in_use_descs = 0;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	spin_lock_bh(&iop_chan->lock);
+	list_for_each_entry_safe(iter, _iter, &iop_chan->chain,
+					chain_node) {
+		in_use_descs++;
+		list_del(&iter->chain_node);
+	}
+	list_for_each_entry_safe_reverse(iter, _iter, &iop_chan->all_slots, slot_node) {
+		list_del(&iter->slot_node);
+		kfree(iter);
+		iop_chan->slots_allocated--;
+	}
+	iop_chan->last_used = NULL;
+
+	PRINTK("iop adma%d %s slots_allocated %d\n", iop_chan->device->id,
+		__FUNCTION__, iop_chan->slots_allocated);
+	spin_unlock_bh(&iop_chan->lock);
+
+	/* one is ok since we left it on there on purpose */
+	if (in_use_descs > 1)
+		printk(KERN_ERR "IOP: Freeing %d in use descriptors!\n",
+			in_use_descs - 1);
+}
+
+/**
+ * iop_adma_is_complete - poll the status of an ADMA transaction
+ * @chan: ADMA channel handle
+ * @cookie: ADMA transaction identifier
+ */
+static enum dma_status iop_adma_is_complete(struct dma_chan *chan,
+                                            dma_cookie_t cookie,
+                                            dma_cookie_t *done,
+                                            dma_cookie_t *used)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_cookie_t last_used;
+	dma_cookie_t last_complete;
+	enum dma_status ret;
+
+	last_used = chan->cookie;
+	last_complete = iop_chan->completed_cookie;
+
+	if (done)
+		*done= last_complete;
+	if (used)
+		*used = last_used;
+
+	ret = dma_async_is_complete(cookie, last_complete, last_used);
+	if (ret == DMA_SUCCESS)
+		return ret;
+
+	iop_adma_slot_cleanup(iop_chan);
+
+	last_used = chan->cookie;
+	last_complete = iop_chan->completed_cookie;
+
+	if (done)
+		*done= last_complete;
+	if (used)
+		*used = last_used;
+
+	return dma_async_is_complete(cookie, last_complete, last_used);
+}
+
+/* to do: can we use these interrupts to implement 'sleep until completed' */
+static irqreturn_t iop_adma_eot_handler(int irq, void *data, struct pt_regs *regs)
+{
+	return IRQ_NONE;
+}
+
+static irqreturn_t iop_adma_eoc_handler(int irq, void *data, struct pt_regs *regs)
+{
+	return IRQ_NONE;
+}
+
+static irqreturn_t iop_adma_err_handler(int irq, void *data, struct pt_regs *regs)
+{
+	return IRQ_NONE;
+}
+
+static void iop_adma_issue_pending(struct dma_chan *chan)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	spin_lock(&iop_chan->lock);
+	if (iop_chan->pending) {
+		iop_chan->pending = 0;
+		iop_chan_append(iop_chan);
+	}
+	spin_unlock(&iop_chan->lock);
+}
+
+/*
+ * Perform a transaction to verify the HW works.
+ */
+#define IOP_ADMA_TEST_SIZE 2000
+
+static int __devinit iop_adma_memcpy_self_test(struct iop_adma_device *device)
+{
+	int i;
+	union dmaengine_addr src;
+	union dmaengine_addr dest;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	int err = 0;
+
+	src.buf = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, SLAB_KERNEL);
+	if (!src.buf)
+		return -ENOMEM;
+	dest.buf = kzalloc(sizeof(u8) * IOP_ADMA_TEST_SIZE, SLAB_KERNEL);
+	if (!dest.buf) {
+		kfree(src.buf);
+		return -ENOMEM;
+	}
+
+	/* Fill in src buffer */
+	for (i = 0; i < IOP_ADMA_TEST_SIZE; i++)
+		((u8 *) src.buf)[i] = (u8)i;
+
+	memset(dest.buf, 0, IOP_ADMA_TEST_SIZE);
+
+	/* Start copy, using first DMA channel */
+	dma_chan = container_of(device->common.channels.next,
+	                        struct dma_chan,
+	                        device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	cookie = do_iop_adma_memcpy(dma_chan, dest, 0, src, 0,
+		IOP_ADMA_TEST_SIZE, DMA_SRC_BUF | DMA_DEST_BUF);
+	iop_adma_issue_pending(dma_chan);
+	msleep(1);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		printk(KERN_ERR "iop adma%d: Self-test copy timed out, disabling\n",
+			device->id);
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	consistent_sync(dest.buf, IOP_ADMA_TEST_SIZE, DMA_FROM_DEVICE);
+	if (memcmp(src.buf, dest.buf, IOP_ADMA_TEST_SIZE)) {
+		printk(KERN_ERR "iop adma%d: Self-test copy failed compare, disabling\n",
+			device->id);
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	kfree(src.buf);
+	kfree(dest.buf);
+	return err;
+}
+
+#define IOP_ADMA_NUM_SRC_TST 4 /* must be <= 15 */
+static int __devinit iop_adma_xor_zero_sum_self_test(struct iop_adma_device *device)
+{
+	int i, src_idx;
+	struct page *xor_srcs[IOP_ADMA_NUM_SRC_TST];
+	struct page *zero_sum_srcs[IOP_ADMA_NUM_SRC_TST + 1];
+	union dmaengine_addr dest;
+	union dmaengine_addr src;
+	struct dma_chan *dma_chan;
+	dma_cookie_t cookie;
+	u8 cmp_byte = 0;
+	u32 cmp_word;
+	u32 zero_sum_result;
+	int err = 0;
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TST; src_idx++) {
+		xor_srcs[src_idx] = alloc_page(GFP_KERNEL);
+		if (!xor_srcs[src_idx])
+			while (src_idx--) {
+				__free_page(xor_srcs[src_idx]);
+				return -ENOMEM;
+			}
+	}
+
+	dest.pg = alloc_page(GFP_KERNEL);
+	if (!dest.pg)
+		while (src_idx--) {
+			__free_page(xor_srcs[src_idx]);
+			return -ENOMEM;
+		}
+
+	/* Fill in src buffers */
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TST; src_idx++) {
+		u8 *ptr = page_address(xor_srcs[src_idx]);
+		for (i = 0; i < PAGE_SIZE; i++)
+			ptr[i] = (1 << src_idx);
+	}
+
+	for (src_idx = 0; src_idx < IOP_ADMA_NUM_SRC_TST; src_idx++)
+		cmp_byte ^= (u8) (1 << src_idx);
+
+	cmp_word = (cmp_byte << 24) | (cmp_byte << 16) | (cmp_byte << 8) | cmp_byte;
+
+	memset(page_address(dest.pg), 0, PAGE_SIZE);
+
+	dma_chan = container_of(device->common.channels.next,
+	                        struct dma_chan,
+	                        device_node);
+	if (iop_adma_alloc_chan_resources(dma_chan) < 1) {
+		err = -ENODEV;
+		goto out;
+	}
+
+	/* test xor */
+	src.pgs = xor_srcs;
+	cookie = do_iop_adma_xor(dma_chan, dest, 0, src,
+		IOP_ADMA_NUM_SRC_TST, 0, PAGE_SIZE, DMA_DEST_PAGE | DMA_SRC_PAGES);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		printk(KERN_ERR "iop_adma: Self-test xor timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	consistent_sync(page_address(dest.pg), PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < (PAGE_SIZE / sizeof(u32)); i++) {
+		u32 *ptr = page_address(dest.pg);
+		if (ptr[i] != cmp_word) {
+			printk(KERN_ERR "iop_adma: Self-test xor failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+	/* zero sum the sources with the destintation page */
+	for (i = 0; i < IOP_ADMA_NUM_SRC_TST; i++)
+		zero_sum_srcs[i] = xor_srcs[i];
+	zero_sum_srcs[i] = dest.pg;
+	src.pgs = zero_sum_srcs;
+
+	zero_sum_result = 1;
+	cookie = do_iop_adma_zero_sum(dma_chan, src, IOP_ADMA_NUM_SRC_TST + 1,
+				0, PAGE_SIZE, &zero_sum_result, DMA_SRC_PAGES);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		printk(KERN_ERR "iop_adma: Self-test zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 0) {
+		printk(KERN_ERR "iop_adma: Self-test zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	/* test memset */
+	cookie = do_iop_adma_memset(dma_chan, dest, 0, 0, PAGE_SIZE, DMA_DEST_PAGE);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		printk(KERN_ERR "iop_adma: Self-test memset timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	consistent_sync(page_address(dest.pg), PAGE_SIZE, DMA_FROM_DEVICE);
+	for (i = 0; i < PAGE_SIZE/sizeof(u32); i++) {
+		u32 *ptr = page_address(dest.pg);
+		if (ptr[i]) {
+			printk(KERN_ERR "iop_adma: Self-test memset failed compare, disabling\n");
+			err = -ENODEV;
+			goto free_resources;
+		}
+	}
+
+	/* test for non-zero parity sum */
+	zero_sum_result = 0;
+	cookie = do_iop_adma_zero_sum(dma_chan, src, IOP_ADMA_NUM_SRC_TST + 1,
+				0, PAGE_SIZE, &zero_sum_result, DMA_SRC_PAGES);
+	iop_adma_issue_pending(dma_chan);
+	msleep(8);
+
+	if (iop_adma_is_complete(dma_chan, cookie, NULL, NULL) != DMA_SUCCESS) {
+		printk(KERN_ERR "iop_adma: Self-test non-zero sum timed out, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+	if (zero_sum_result != 1) {
+		printk(KERN_ERR "iop_adma: Self-test non-zero sum failed compare, disabling\n");
+		err = -ENODEV;
+		goto free_resources;
+	}
+
+free_resources:
+	iop_adma_free_chan_resources(dma_chan);
+out:
+	src_idx = IOP_ADMA_NUM_SRC_TST;
+	while (src_idx--)
+		__free_page(xor_srcs[src_idx]);
+	__free_page(dest.pg);
+	return err;
+}
+
+static int __devexit iop_adma_remove(struct platform_device *dev)
+{
+	struct iop_adma_device *device = platform_get_drvdata(dev);
+	struct dma_chan *chan, *_chan;
+	struct iop_adma_chan *iop_chan;
+	int i;
+	struct iop_adma_platform_data *plat_data = dev->dev.platform_data;
+
+
+	dma_async_device_unregister(&device->common);
+
+	for (i = 0; i < 3; i++) {
+		unsigned int irq;
+		irq = platform_get_irq(dev, i);
+		free_irq(irq, device);
+	}
+
+	dma_free_coherent(&dev->dev, plat_data->pool_size,
+			device->dma_desc_pool_virt, device->dma_desc_pool);
+
+	do {
+		struct resource *res;
+		res = platform_get_resource(dev, IORESOURCE_MEM, 0);
+		release_mem_region(res->start, res->end - res->start);
+	} while (0);
+
+	list_for_each_entry_safe(chan, _chan, &device->common.channels,
+				device_node) {
+		iop_chan = to_iop_adma_chan(chan);
+		list_del(&chan->device_node);
+		kfree(iop_chan);
+	}
+	kfree(device);
+
+	return 0;
+}
+
+static dma_addr_t iop_adma_map_page(struct dma_chan *chan, struct page *page,
+					unsigned long offset, size_t size,
+					int direction)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	return dma_map_page(&iop_chan->device->pdev->dev, page, offset, size,
+			direction);
+}
+
+static dma_addr_t iop_adma_map_single(struct dma_chan *chan, void *cpu_addr,
+					size_t size, int direction)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	return dma_map_single(&iop_chan->device->pdev->dev, cpu_addr, size,
+			direction);
+}
+
+static void iop_adma_unmap_page(struct dma_chan *chan, dma_addr_t handle,
+				size_t size, int direction)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_unmap_page(&iop_chan->device->pdev->dev, handle, size, direction);
+}
+
+static void iop_adma_unmap_single(struct dma_chan *chan, dma_addr_t handle,
+				size_t size, int direction)
+{
+	struct iop_adma_chan *iop_chan = to_iop_adma_chan(chan);
+	dma_unmap_single(&iop_chan->device->pdev->dev, handle, size, direction);
+}
+
+extern dma_cookie_t dma_async_do_memcpy_err(struct dma_chan *chan,
+		union dmaengine_addr dest, unsigned int dest_off,
+		union dmaengine_addr src, unsigned int src_off,
+                size_t len, unsigned long flags);
+
+extern dma_cookie_t dma_async_do_xor_err(struct dma_chan *chan,
+		union dmaengine_addr dest, unsigned int dest_off,
+		union dmaengine_addr src, unsigned int src_cnt,
+		unsigned int src_off, size_t len, unsigned long flags);
+
+extern dma_cookie_t dma_async_do_zero_sum_err(struct dma_chan *chan,
+		union dmaengine_addr src, unsigned int src_cnt,
+		unsigned int src_off, size_t len, u32 *result,
+		unsigned long flags);
+
+extern dma_cookie_t dma_async_do_memset_err(struct dma_chan *chan,
+                union dmaengine_addr dest, unsigned int dest_off,
+                int val, size_t len, unsigned long flags);
+
+static int __devinit iop_adma_probe(struct platform_device *pdev)
+{
+	struct resource *res;
+	int ret=0, irq_eot=0, irq_eoc=0, irq_err=0, irq, i;
+	struct iop_adma_device *adev;
+	struct iop_adma_chan *iop_chan;
+	struct iop_adma_platform_data *plat_data = pdev->dev.platform_data;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -ENODEV;
+
+	if (!request_mem_region(res->start, res->end - res->start, pdev->name))
+		return -EBUSY;
+
+	if ((adev = kzalloc(sizeof(*adev), GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto err_adev_alloc;
+	}
+
+	if ((adev->dma_desc_pool_virt = dma_alloc_writecombine(&pdev->dev,
+					plat_data->pool_size,
+					&adev->dma_desc_pool,
+					GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto err_dma_alloc;
+	}
+
+	PRINTK("%s: allocted descriptor pool virt %p phys %p\n",
+	__FUNCTION__, adev->dma_desc_pool_virt, (void *) adev->dma_desc_pool);
+
+	adev->id = plat_data->hw_id;
+	adev->common.capabilities = plat_data->capabilities;
+
+	for (i = 0; i < 3; i++) {
+		irq = platform_get_irq(pdev, i);
+		if (irq < 0)
+			ret = -ENXIO;
+		else {
+			switch (i) {
+			case 0:
+				irq_eot = irq;
+				ret = request_irq(irq, iop_adma_eot_handler,
+					 0, pdev->name, adev);
+				if (ret) {
+					ret = -EIO;
+					goto err_irq0;
+				}
+				break;
+			case 1:
+				irq_eoc = irq;
+				ret = request_irq(irq, iop_adma_eoc_handler,
+					0, pdev->name, adev);
+				if (ret) {
+					ret = -EIO;
+					goto err_irq1;
+				}
+				break;
+			case 2:
+				irq_err = irq;
+				ret = request_irq(irq, iop_adma_err_handler,
+					0, pdev->name, adev);
+				if (ret) {
+					ret = -EIO;
+					goto err_irq2;
+				}
+				break;
+			}
+		}
+	}
+
+	adev->pdev = pdev;
+	platform_set_drvdata(pdev, adev);
+
+	INIT_LIST_HEAD(&adev->common.channels);
+	adev->common.device_alloc_chan_resources = iop_adma_alloc_chan_resources;
+	adev->common.device_free_chan_resources = iop_adma_free_chan_resources;
+	adev->common.device_operation_complete = iop_adma_is_complete;
+	adev->common.device_issue_pending = iop_adma_issue_pending;
+	adev->common.map_page = iop_adma_map_page;
+	adev->common.map_single = iop_adma_map_single;
+	adev->common.unmap_page = iop_adma_unmap_page;
+	adev->common.unmap_single = iop_adma_unmap_single;
+
+	if (adev->common.capabilities & DMA_MEMCPY)
+		adev->common.device_do_dma_memcpy = do_iop_adma_memcpy;
+	else
+		adev->common.device_do_dma_memcpy = dma_async_do_memcpy_err;
+
+	if (adev->common.capabilities & DMA_MEMSET)
+		adev->common.device_do_dma_memset = do_iop_adma_memset;
+	else
+		adev->common.device_do_dma_memset = dma_async_do_memset_err;
+
+	if (adev->common.capabilities & DMA_XOR)
+		adev->common.device_do_dma_xor = do_iop_adma_xor;
+	else
+		adev->common.device_do_dma_xor = dma_async_do_xor_err;
+
+	if (adev->common.capabilities & DMA_ZERO_SUM)
+		adev->common.device_do_dma_zero_sum = do_iop_adma_zero_sum;
+	else
+		adev->common.device_do_dma_zero_sum = dma_async_do_zero_sum_err;
+
+	if ((iop_chan = kzalloc(sizeof(*iop_chan), GFP_KERNEL)) == NULL) {
+		ret = -ENOMEM;
+		goto err_chan_alloc;
+	}
+
+	spin_lock_init(&iop_chan->lock);
+	iop_chan->device = adev;
+	INIT_LIST_HEAD(&iop_chan->chain);
+	INIT_LIST_HEAD(&iop_chan->all_slots);
+	iop_chan->last_used = NULL;
+	dma_async_chan_init(&iop_chan->common, &adev->common);
+
+	if (adev->common.capabilities & DMA_MEMCPY) {
+		ret = iop_adma_memcpy_self_test(adev);
+		PRINTK("iop adma%d: memcpy self test returned %d\n", adev->id, ret);
+		if (ret)
+			goto err_self_test;
+	}
+
+	if (adev->common.capabilities & (DMA_XOR + DMA_ZERO_SUM + DMA_MEMSET)) {
+		ret = iop_adma_xor_zero_sum_self_test(adev);
+		PRINTK("iop adma%d: xor self test returned %d\n", adev->id, ret);
+		if (ret)
+			goto err_self_test;
+	}
+
+	printk(KERN_INFO "Intel(R) IOP ADMA Engine found [%d]: "
+		"( %s%s%s%s%s%s%s%s%s)\n",
+		adev->id,
+		adev->common.capabilities & DMA_PQ_XOR	      ? "pq_xor " : "",
+		adev->common.capabilities & DMA_PQ_UPDATE     ? "pq_update " : "",
+		adev->common.capabilities & DMA_PQ_ZERO_SUM   ? "pq_zero_sum " : "",
+		adev->common.capabilities & DMA_XOR	      ? "xor " : "",
+		adev->common.capabilities & DMA_DUAL_XOR      ? "dual_xor " : "",
+		adev->common.capabilities & DMA_ZERO_SUM      ? "xor_zero_sum " : "",
+		adev->common.capabilities & DMA_MEMSET	      ? "memset " : "",
+		adev->common.capabilities & DMA_MEMCPY_CRC32C ? "memcpy+crc " : "",
+		adev->common.capabilities & DMA_MEMCPY	      ? "memcpy " : "");
+
+	dma_async_device_register(&adev->common);
+	goto out;
+
+err_self_test:
+	kfree(iop_chan);
+err_chan_alloc:
+err_irq2:
+	free_irq(irq_eoc, adev);
+err_irq1:
+	free_irq(irq_eot, adev);
+err_irq0:
+	dma_free_coherent(&adev->pdev->dev, plat_data->pool_size,
+			adev->dma_desc_pool_virt, adev->dma_desc_pool);
+err_dma_alloc:
+	kfree(adev);
+err_adev_alloc:
+	release_mem_region(res->start, res->end - res->start);
+out:
+	return ret;
+}
+
+static void iop_chan_start_null_memcpy(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_memcpy_slot_count(0, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		iop_desc_init_memcpy(sw_desc);
+		iop_desc_set_byte_count(sw_desc, iop_chan, 0);
+		iop_desc_set_dest_addr(sw_desc, iop_chan, 0);
+		iop_desc_set_memcpy_src_addr(sw_desc, 0, slot_cnt, slots_per_op);
+
+		cookie = iop_chan->common.cookie;
+		cookie++;
+		if (cookie <= 1)
+			cookie = 2;
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->completed_cookie = cookie - 1;
+		iop_chan->common.cookie = sw_desc->cookie = cookie;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_chan_clear_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->phys);
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		printk(KERN_ERR "iop adma%d failed to allocate null descriptor\n",
+			iop_chan->device->id);
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static void iop_chan_start_null_xor(struct iop_adma_chan *iop_chan)
+{
+	struct iop_adma_desc_slot *sw_desc;
+	dma_cookie_t cookie;
+	int slot_cnt, slots_per_op;
+
+	spin_lock_bh(&iop_chan->lock);
+	slot_cnt = iop_chan_xor_slot_count(0, 2, &slots_per_op);
+	sw_desc = iop_adma_alloc_slots(iop_chan, slot_cnt, slots_per_op);
+	if (sw_desc) {
+		iop_desc_init_null_xor(sw_desc, 2);
+		iop_desc_set_byte_count(sw_desc, iop_chan, 0);
+		iop_desc_set_dest_addr(sw_desc, iop_chan, 0);
+		iop_desc_set_xor_src_addr(sw_desc, 0, 0, slot_cnt, slots_per_op);
+		iop_desc_set_xor_src_addr(sw_desc, 1, 0, slot_cnt, slots_per_op);
+
+		cookie = iop_chan->common.cookie;
+		cookie++;
+		if (cookie <= 1)
+			cookie = 2;
+
+		/* initialize the completed cookie to be less than
+		 * the most recently used cookie
+		 */
+		iop_chan->completed_cookie = cookie - 1;
+		iop_chan->common.cookie = sw_desc->cookie = cookie;
+
+		/* channel should not be busy */
+		BUG_ON(iop_chan_is_busy(iop_chan));
+
+		/* clear any prior error-status bits */
+		iop_chan_clear_status(iop_chan);
+
+		/* disable operation */
+		iop_chan_disable(iop_chan);
+
+		/* set the descriptor address */
+		iop_chan_set_next_descriptor(iop_chan, sw_desc->phys);
+
+		/* run the descriptor */
+		iop_chan_enable(iop_chan);
+	} else
+		printk(KERN_ERR "iop adma%d failed to allocate null descriptor\n",
+			iop_chan->device->id);
+	spin_unlock_bh(&iop_chan->lock);
+}
+
+static struct platform_driver iop_adma_driver = {
+	.probe		= iop_adma_probe,
+	.remove		= iop_adma_remove,
+	.driver		= {
+		.owner	= THIS_MODULE,
+		.name	= "IOP-ADMA",
+	},
+};
+
+static int __init iop_adma_init (void)
+{
+	return platform_driver_register(&iop_adma_driver);
+}
+
+static void __exit iop_adma_exit (void)
+{
+	platform_driver_unregister(&iop_adma_driver);
+	return;
+}
+
+void __arch_raid5_dma_chan_request(struct dma_client *client)
+{
+	iop_raid5_dma_chan_request(client);
+}
+
+struct dma_chan *__arch_raid5_dma_next_channel(struct dma_client *client)
+{
+	return iop_raid5_dma_next_channel(client);
+}
+
+struct dma_chan *__arch_raid5_dma_check_channel(struct dma_chan *chan,
+						dma_cookie_t cookie,
+						struct dma_client *client,
+						unsigned long capabilities)
+{
+	return iop_raid5_dma_check_channel(chan, cookie, client, capabilities);
+}
+
+EXPORT_SYMBOL_GPL(__arch_raid5_dma_chan_request);
+EXPORT_SYMBOL_GPL(__arch_raid5_dma_next_channel);
+EXPORT_SYMBOL_GPL(__arch_raid5_dma_check_channel);
+
+module_init(iop_adma_init);
+module_exit(iop_adma_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_DESCRIPTION("IOP ADMA Engine Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/asm-arm/hardware/iop_adma.h b/include/asm-arm/hardware/iop_adma.h
new file mode 100644
index 0000000..62bbbdf
--- /dev/null
+++ b/include/asm-arm/hardware/iop_adma.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright(c) 2006 Intel Corporation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59
+ * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+ *
+ * The full GNU General Public License is included in this distribution in the
+ * file called COPYING.
+ */
+#ifndef IOP_ADMA_H
+#define IOP_ADMA_H
+#include <linux/types.h>
+#include <linux/dmaengine.h>
+
+#define IOP_ADMA_SLOT_SIZE 32
+#define IOP_ADMA_THRESHOLD 20
+
+/**
+ * struct iop_adma_device - internal representation of an ADMA device
+ * @pdev: Platform device
+ * @id: HW ADMA Device selector
+ * @dma_desc_pool: base of DMA descriptor region (DMA address)
+ * @dma_desc_pool_virt: base of DMA descriptor region (CPU address)
+ * @common: embedded struct dma_device
+ */
+struct iop_adma_device {
+	struct platform_device *pdev;
+	int id;
+	dma_addr_t dma_desc_pool;
+	void *dma_desc_pool_virt;
+	struct dma_device common;
+};
+
+/**
+ * struct iop_adma_device - internal representation of an ADMA device
+ * @lock: serializes enqueue/dequeue operations to the slot pool
+ * @device: parent device
+ * @chain: device chain view of the descriptors
+ * @common: common dmaengine channel object members
+ * @all_slots: complete domain of slots usable by the channel
+ * @pending: allows batching of hardware operations
+ * @result_accumulator: allows zero result sums of buffers > the hw maximum
+ * @zero_sum_group: flag to the clean up routine to collect zero sum results
+ * @completed_cookie: identifier for the most recently completed operation
+ * @slots_allocated: records the actual size of the descriptor slot pool
+ */
+struct iop_adma_chan {
+	spinlock_t lock;
+	struct iop_adma_device *device;
+	struct list_head chain;
+	struct dma_chan common;
+	struct list_head all_slots;
+	struct iop_adma_desc_slot *last_used;
+	int pending;
+	u8 result_accumulator;
+	u8 zero_sum_group;
+	dma_cookie_t completed_cookie;
+	int slots_allocated;
+};
+
+struct iop_adma_desc_slot {
+	void *hw_desc;
+	struct list_head slot_node;
+	struct list_head chain_node;
+	dma_cookie_t cookie;
+	dma_addr_t phys;
+	u16 stride;
+	u16 idx;
+	u16 slot_cnt;
+	u8 src_cnt;
+	u8 slots_per_op;
+	unsigned long flags;
+	union {
+		u32 *xor_check_result;
+		u32 *crc32_result;
+	};
+};
+
+struct iop_adma_platform_data {
+        int hw_id;
+        unsigned long capabilities;
+        size_t pool_size;
+};
+
+#define to_iop_sw_desc(addr_hw_desc) container_of(addr_hw_desc, struct iop_adma_desc_slot, hw_desc)
+#define iop_hw_desc_slot_idx(hw_desc, idx) ( (void *) (((unsigned long) hw_desc) + ((idx) << 5)) )
+#endif

  parent reply	other threads:[~2006-09-11 23:19 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-09-11 23:00 [PATCH 00/19] Hardware Accelerated MD RAID5: Introduction Dan Williams
2006-09-11 23:17 ` [PATCH 01/19] raid5: raid5_do_soft_block_ops Dan Williams
2006-09-11 23:34   ` Jeff Garzik
2006-09-11 23:17 ` [PATCH 02/19] raid5: move write operations to a workqueue Dan Williams
2006-09-11 23:36   ` Jeff Garzik
2006-09-11 23:17 ` [PATCH 03/19] raid5: move check parity " Dan Williams
2006-09-11 23:17 ` [PATCH 04/19] raid5: move compute block " Dan Williams
2006-09-11 23:18 ` [PATCH 05/19] raid5: move read completion copies " Dan Williams
2006-09-11 23:18 ` [PATCH 06/19] raid5: move the reconstruct write expansion operation " Dan Williams
2006-09-11 23:18 ` [PATCH 07/19] raid5: remove compute_block and compute_parity5 Dan Williams
2006-09-11 23:18 ` [PATCH 08/19] dmaengine: enable multiple clients and operations Dan Williams
2006-09-11 23:44   ` Jeff Garzik
2006-09-12  0:14     ` Dan Williams
2006-09-12  0:52       ` Roland Dreier
2006-09-12  6:18         ` Dan Williams
2006-09-12  9:15           ` Evgeniy Polyakov
2006-09-13  4:04           ` Jeff Garzik
2006-09-15 16:38     ` Olof Johansson
2006-09-15 19:44       ` [PATCH] dmaengine: clean up and abstract function types (was Re: [PATCH 08/19] dmaengine: enable multiple clients and operations) Olof Johansson
2006-09-15 20:02         ` [PATCH] [v2] " Olof Johansson
2006-09-18 22:56         ` [PATCH] " Dan Williams
2006-09-19  1:05           ` Olof Johansson
2006-09-19 11:20             ` Alan Cox
2006-09-19 16:32               ` Olof Johansson
2006-09-11 23:18 ` [PATCH 09/19] dmaengine: reduce backend address permutations Dan Williams
2006-09-15 14:46   ` Olof Johansson
2006-09-11 23:18 ` [PATCH 10/19] dmaengine: expose per channel dma mapping characteristics to clients Dan Williams
2006-09-11 23:18 ` [PATCH 11/19] dmaengine: add memset as an asynchronous dma operation Dan Williams
2006-09-11 23:50   ` Jeff Garzik
2006-09-11 23:18 ` [PATCH 12/19] dmaengine: dma_async_memcpy_err for DMA engines that do not support memcpy Dan Williams
2006-09-11 23:51   ` Jeff Garzik
2006-09-11 23:18 ` [PATCH 13/19] dmaengine: add support for dma xor zero sum operations Dan Williams
2006-09-11 23:18 ` [PATCH 14/19] dmaengine: add dma_sync_wait Dan Williams
2006-09-11 23:52   ` Jeff Garzik
2006-09-11 23:18 ` [PATCH 15/19] dmaengine: raid5 dma client Dan Williams
2006-09-11 23:54   ` Jeff Garzik
2006-09-11 23:19 ` Dan Williams [this message]
2006-09-15 14:57   ` [PATCH 16/19] dmaengine: Driver for the Intel IOP 32x, 33x, and 13xx RAID engines Olof Johansson
2006-09-11 23:19 ` [PATCH 17/19] iop3xx: define IOP3XX_REG_ADDR[32|16|8] and clean up DMA/AAU defs Dan Williams
2006-09-11 23:55   ` Jeff Garzik
2006-09-11 23:19 ` [PATCH 18/19] iop3xx: Give Linux control over PCI (ATU) initialization Dan Williams
2006-09-11 23:56   ` Jeff Garzik
2006-09-11 23:19 ` [PATCH 19/19] iop3xx: IOP 32x and 33x support for the iop-adma driver Dan Williams
2006-09-11 23:38 ` [PATCH 00/19] Hardware Accelerated MD RAID5: Introduction Jeff Garzik
2006-09-11 23:53   ` Dan Williams
2006-09-12  2:41     ` Jeff Garzik
2006-09-12  5:47       ` Dan Williams
2006-09-13  4:05         ` Jeff Garzik
2006-09-13  7:15 ` Jakob Oestergaard
2006-09-13 19:17   ` Dan Williams
2006-09-14  7:42     ` Jakob Oestergaard
2006-10-11  1:46       ` Dan Williams
2006-10-08 22:18 ` Neil Brown
2006-10-10 18:23   ` Dan Williams
2006-10-11  2:44     ` Neil Brown

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060911231859.4737.46229.stgit@dwillia2-linux.ch.intel.com \
    --to=dan.j.williams@intel.com \
    --cc=akpm@osdl.org \
    --cc=christopher.leech@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-raid@vger.kernel.org \
    --cc=neilb@suse.de \
    --subject='Re: [PATCH 16/19] dmaengine: Driver for the Intel IOP 32x, 33x, and 13xx RAID engines' \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).