From dd3dd384df7f9f77fba6875a606e5a663510cd1d Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 7 Jan 2009 23:14:39 +0800
Subject: Blackfin arch: rewrite dma_memcpy() and dma in/out functions

- unify all dma in/out functions (takes ~35 lines of code now)
- unify dma_memcpy with dma in/out functions (1 place that touches MDMA0
  registers)
- add support for 32bit transfers
- cleanup dma_memcpy code to be much more readable
- irqs are disabled only while programming MDMA registers rather than
  the entire transaction

Signed-off-by: Mike Frysinger <vapier.adi@gmail.com>
Signed-off-by: Bryan Wu <cooloney@kernel.org>
---
 arch/blackfin/include/asm/dma.h     |   1 +
 arch/blackfin/kernel/bfin_dma_5xx.c | 519 +++++++++++-------------------------
 arch/blackfin/kernel/setup.c        |   2 +
 3 files changed, 156 insertions(+), 366 deletions(-)

diff --git a/arch/blackfin/include/asm/dma.h b/arch/blackfin/include/asm/dma.h
index d059b2d..ca8252a 100644
--- a/arch/blackfin/include/asm/dma.h
+++ b/arch/blackfin/include/asm/dma.h
@@ -178,6 +178,7 @@ void dma_enable_irq(unsigned int channel);
 void clear_dma_irqstat(unsigned int channel);
 void *dma_memcpy(void *dest, const void *src, size_t count);
 void *safe_dma_memcpy(void *dest, const void *src, size_t count);
+void blackfin_dma_early_init(void);
 
 extern int channel2irq(unsigned int channel);
 extern struct dma_register *dma_io_base_addr[MAX_DMA_CHANNELS];
diff --git a/arch/blackfin/kernel/bfin_dma_5xx.c b/arch/blackfin/kernel/bfin_dma_5xx.c
index bafb6ae..dff979b 100644
--- a/arch/blackfin/kernel/bfin_dma_5xx.c
+++ b/arch/blackfin/kernel/bfin_dma_5xx.c
@@ -1,44 +1,24 @@
 /*
- * File:         arch/blackfin/kernel/bfin_dma_5xx.c
- * Based on:
- * Author:
+ * bfin_dma_5xx.c - Blackfin DMA implementation
  *
- * Created:
- * Description:  This file contains the simple DMA Implementation for Blackfin
- *
- * Modified:
- *               Copyright 2004-2006 Analog Devices Inc.
- *
- * Bugs:         Enter bugs at http://blackfin.uclinux.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, see the file COPYING, or write
- * to the Free Software Foundation, Inc.,
- * 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+ * Copyright 2004-2006 Analog Devices Inc.
+ * Licensed under the GPL-2 or later.
  */
 
 #include <linux/errno.h>
+#include <linux/interrupt.h>
+#include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/param.h>
 #include <linux/proc_fs.h>
 #include <linux/sched.h>
 #include <linux/seq_file.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
+#include <linux/spinlock.h>
 
 #include <asm/blackfin.h>
-#include <asm/dma.h>
 #include <asm/cacheflush.h>
+#include <asm/dma.h>
+#include <asm/uaccess.h>
 
 /**************************************************************************
  * Global Variables
@@ -82,12 +62,11 @@ static int __init blackfin_dma_init(void)
 arch_initcall(blackfin_dma_init);
 
 #ifdef CONFIG_PROC_FS
-
 static int proc_dma_show(struct seq_file *m, void *v)
 {
 	int i;
 
-	for (i = 0 ; i < MAX_DMA_CHANNELS; ++i)
+	for (i = 0; i < MAX_DMA_CHANNELS; ++i)
 		if (dma_ch[i].chan_status != DMA_CHANNEL_FREE)
 			seq_printf(m, "%2d: %s\n", i, dma_ch[i].device_id);
 
@@ -438,385 +417,193 @@ void blackfin_dma_resume(void)
 }
 #endif
 
-static void *__dma_memcpy(void *dest, const void *src, size_t size)
+/**
+ *	blackfin_dma_early_init - minimal DMA init
+ *
+ * Setup a few DMA registers so we can safely do DMA transfers early on in
+ * the kernel booting process.  Really this just means using dma_memcpy().
+ */
+void __init blackfin_dma_early_init(void)
 {
-	int direction;	/* 1 - address decrease, 0 - address increase */
-	int flag_align;	/* 1 - address aligned,  0 - address unaligned */
-	int flag_2D;	/* 1 - 2D DMA needed,	 0 - 1D DMA needed */
-	unsigned long flags;
-
-	if (size <= 0)
-		return NULL;
-
-	local_irq_save(flags);
-
-	if ((unsigned long)src < memory_end)
-		blackfin_dcache_flush_range((unsigned int)src,
-					    (unsigned int)(src + size));
-
-	if ((unsigned long)dest < memory_end)
-		blackfin_dcache_invalidate_range((unsigned int)dest,
-						 (unsigned int)(dest + size));
-
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	if ((unsigned long)src < (unsigned long)dest)
-		direction = 1;
-	else
-		direction = 0;
-
-	if ((((unsigned long)dest % 2) == 0) && (((unsigned long)src % 2) == 0)
-	    && ((size % 2) == 0))
-		flag_align = 1;
-	else
-		flag_align = 0;
-
-	if (size > 0x10000)	/* size > 64K */
-		flag_2D = 1;
-	else
-		flag_2D = 0;
-
-	/* Setup destination and source start address */
-	if (direction) {
-		if (flag_align) {
-			bfin_write_MDMA_D0_START_ADDR(dest + size - 2);
-			bfin_write_MDMA_S0_START_ADDR(src + size - 2);
-		} else {
-			bfin_write_MDMA_D0_START_ADDR(dest + size - 1);
-			bfin_write_MDMA_S0_START_ADDR(src + size - 1);
-		}
-	} else {
-		bfin_write_MDMA_D0_START_ADDR(dest);
-		bfin_write_MDMA_S0_START_ADDR(src);
-	}
-
-	/* Setup destination and source xcount */
-	if (flag_2D) {
-		if (flag_align) {
-			bfin_write_MDMA_D0_X_COUNT(1024 / 2);
-			bfin_write_MDMA_S0_X_COUNT(1024 / 2);
-		} else {
-			bfin_write_MDMA_D0_X_COUNT(1024);
-			bfin_write_MDMA_S0_X_COUNT(1024);
-		}
-		bfin_write_MDMA_D0_Y_COUNT(size >> 10);
-		bfin_write_MDMA_S0_Y_COUNT(size >> 10);
-	} else {
-		if (flag_align) {
-			bfin_write_MDMA_D0_X_COUNT(size / 2);
-			bfin_write_MDMA_S0_X_COUNT(size / 2);
-		} else {
-			bfin_write_MDMA_D0_X_COUNT(size);
-			bfin_write_MDMA_S0_X_COUNT(size);
-		}
-	}
-
-	/* Setup destination and source xmodify and ymodify */
-	if (direction) {
-		if (flag_align) {
-			bfin_write_MDMA_D0_X_MODIFY(-2);
-			bfin_write_MDMA_S0_X_MODIFY(-2);
-			if (flag_2D) {
-				bfin_write_MDMA_D0_Y_MODIFY(-2);
-				bfin_write_MDMA_S0_Y_MODIFY(-2);
-			}
-		} else {
-			bfin_write_MDMA_D0_X_MODIFY(-1);
-			bfin_write_MDMA_S0_X_MODIFY(-1);
-			if (flag_2D) {
-				bfin_write_MDMA_D0_Y_MODIFY(-1);
-				bfin_write_MDMA_S0_Y_MODIFY(-1);
-			}
-		}
-	} else {
-		if (flag_align) {
-			bfin_write_MDMA_D0_X_MODIFY(2);
-			bfin_write_MDMA_S0_X_MODIFY(2);
-			if (flag_2D) {
-				bfin_write_MDMA_D0_Y_MODIFY(2);
-				bfin_write_MDMA_S0_Y_MODIFY(2);
-			}
-		} else {
-			bfin_write_MDMA_D0_X_MODIFY(1);
-			bfin_write_MDMA_S0_X_MODIFY(1);
-			if (flag_2D) {
-				bfin_write_MDMA_D0_Y_MODIFY(1);
-				bfin_write_MDMA_S0_Y_MODIFY(1);
-			}
-		}
-	}
-
-	/* Enable source DMA */
-	if (flag_2D) {
-		if (flag_align) {
-			bfin_write_MDMA_S0_CONFIG(DMAEN | DMA2D | WDSIZE_16);
-			bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | DMA2D | WDSIZE_16);
-		} else {
-			bfin_write_MDMA_S0_CONFIG(DMAEN | DMA2D);
-			bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | DMA2D);
-		}
-	} else {
-		if (flag_align) {
-			bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_16);
-			bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_16);
-		} else {
-			bfin_write_MDMA_S0_CONFIG(DMAEN);
-			bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN);
-		}
-	}
-
-	SSYNC();
-
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE))
-		;
-
-	bfin_write_MDMA_D0_IRQ_STATUS(bfin_read_MDMA_D0_IRQ_STATUS() |
-				      (DMA_DONE | DMA_ERR));
-
 	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-
-	local_irq_restore(flags);
-
-	return dest;
 }
 
-void *dma_memcpy(void *dest, const void *src, size_t size)
-{
-	size_t bulk;
-	size_t rest;
-	void * addr;
-
-	bulk = (size >> 16) << 16;
-	rest = size - bulk;
-	if (bulk)
-		__dma_memcpy(dest, src, bulk);
-	__dma_memcpy(dest+bulk, src+bulk, rest);
-	return dest;
-}
-EXPORT_SYMBOL(dma_memcpy);
-
 /**
- *	safe_dma_memcpy - DMA memcpy w/argument checking
+ *	__dma_memcpy - program the MDMA registers
  *
- * Verify arguments are safe before heading to dma_memcpy().
+ * Actually program MDMA0 and wait for the transfer to finish.  Disable IRQs
+ * while programming registers so that everything is fully configured.  Wait
+ * for DMA to finish with IRQs enabled.  If interrupted, the initial DMA_DONE
+ * check will make sure we don't clobber any existing transfer.
  */
-void *safe_dma_memcpy(void *dest, const void *src, size_t size)
-{
-	if (!access_ok(VERIFY_WRITE, dst, size))
-		return NULL;
-	if (!access_ok(VERIFY_READ, src, size))
-		return NULL;
-	return dma_memcpy(dst, src, size);
-}
-EXPORT_SYMBOL(safe_dma_memcpy);
-
-void dma_outsb(unsigned long addr, const void *buf, unsigned short len)
+static void __dma_memcpy(u32 daddr, s16 dmod, u32 saddr, s16 smod, size_t cnt, u32 conf)
 {
+	static DEFINE_SPINLOCK(mdma_lock);
 	unsigned long flags;
 
-	local_irq_save(flags);
-
-	blackfin_dcache_flush_range((unsigned int)buf,
-			 (unsigned int)(buf) + len);
+	spin_lock_irqsave(&mdma_lock, flags);
+
+	if (bfin_read_MDMA_S0_CONFIG())
+		while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE))
+			continue;
+
+	if (conf & DMA2D) {
+		/* For larger bit sizes, we've already divided down cnt so it
+		 * is no longer a multiple of 64k.  So we have to break down
+		 * the limit here so it is a multiple of the incoming size.
+		 * There is no limitation here in terms of total size other
+		 * than the hardware though as the bits lost in the shift are
+		 * made up by MODIFY (== we can hit the whole address space).
+		 * X: (2^(16 - 0)) * 1 == (2^(16 - 1)) * 2 == (2^(16 - 2)) * 4
+		 */
+		u32 shift = abs(dmod) >> 1;
+		size_t ycnt = cnt >> (16 - shift);
+		cnt = 1 << (16 - shift);
+		bfin_write_MDMA_D0_Y_COUNT(ycnt);
+		bfin_write_MDMA_S0_Y_COUNT(ycnt);
+		bfin_write_MDMA_D0_Y_MODIFY(dmod);
+		bfin_write_MDMA_S0_Y_MODIFY(smod);
+	}
 
-	bfin_write_MDMA_D0_START_ADDR(addr);
-	bfin_write_MDMA_D0_X_COUNT(len);
-	bfin_write_MDMA_D0_X_MODIFY(0);
+	bfin_write_MDMA_D0_START_ADDR(daddr);
+	bfin_write_MDMA_D0_X_COUNT(cnt);
+	bfin_write_MDMA_D0_X_MODIFY(dmod);
 	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
 
-	bfin_write_MDMA_S0_START_ADDR(buf);
-	bfin_write_MDMA_S0_X_COUNT(len);
-	bfin_write_MDMA_S0_X_MODIFY(1);
+	bfin_write_MDMA_S0_START_ADDR(saddr);
+	bfin_write_MDMA_S0_X_COUNT(cnt);
+	bfin_write_MDMA_S0_X_MODIFY(smod);
 	bfin_write_MDMA_S0_IRQ_STATUS(DMA_DONE | DMA_ERR);
 
-	bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_8);
-	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_8);
+	bfin_write_MDMA_S0_CONFIG(DMAEN | conf);
+	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | conf);
+
+	spin_unlock_irqrestore(&mdma_lock, flags);
 
 	SSYNC();
 
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE));
+	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE))
+		if (bfin_read_MDMA_S0_CONFIG())
+			continue;
+		else
+			return;
 
 	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
 
 	bfin_write_MDMA_S0_CONFIG(0);
 	bfin_write_MDMA_D0_CONFIG(0);
-	local_irq_restore(flags);
-
 }
-EXPORT_SYMBOL(dma_outsb);
 
-
-void dma_insb(unsigned long addr, void *buf, unsigned short len)
+/**
+ *	_dma_memcpy - translate C memcpy settings into MDMA settings
+ *
+ * Handle all the high level steps before we touch the MDMA registers.  So
+ * handle caching, tweaking of sizes, and formatting of addresses.
+ */
+static void *_dma_memcpy(void *pdst, const void *psrc, size_t size)
 {
-	unsigned long flags;
+	u32 conf, shift;
+	s16 mod;
+	unsigned long dst = (unsigned long)pdst;
+	unsigned long src = (unsigned long)psrc;
 
-	blackfin_dcache_invalidate_range((unsigned int)buf,
-			 (unsigned int)(buf) + len);
+	if (size == 0)
+		return NULL;
 
-	local_irq_save(flags);
-	bfin_write_MDMA_D0_START_ADDR(buf);
-	bfin_write_MDMA_D0_X_COUNT(len);
-	bfin_write_MDMA_D0_X_MODIFY(1);
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	if (bfin_addr_dcachable(src))
+		blackfin_dcache_flush_range(src, src + size);
 
-	bfin_write_MDMA_S0_START_ADDR(addr);
-	bfin_write_MDMA_S0_X_COUNT(len);
-	bfin_write_MDMA_S0_X_MODIFY(0);
-	bfin_write_MDMA_S0_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	if (bfin_addr_dcachable(dst))
+		blackfin_dcache_invalidate_range(dst, dst + size);
 
-	bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_8);
-	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_8);
-
-	SSYNC();
+	if (dst % 4 == 0 && src % 4 == 0 && size % 4 == 0) {
+		conf = WDSIZE_32;
+		shift = 2;
+	} else if (dst % 2 == 0 && src % 2 == 0 && size % 2 == 0) {
+		conf = WDSIZE_16;
+		shift = 1;
+	} else {
+		conf = WDSIZE_8;
+		shift = 0;
+	}
 
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE));
+	/* If the two memory regions have a chance of overlapping, make
+	 * sure the memcpy still works as expected.  Do this by having the
+	 * copy run backwards instead.
+	 */
+	mod = 1 << shift;
+	if (src < dst) {
+		mod *= -1;
+		dst += size + mod;
+		src += size + mod;
+	}
+	size >>= shift;
 
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
+	if (size > 0x10000)
+		conf |= DMA2D;
 
-	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-	local_irq_restore(flags);
+	__dma_memcpy(dst, mod, src, mod, size, conf);
 
+	return pdst;
 }
-EXPORT_SYMBOL(dma_insb);
 
-void dma_outsw(unsigned long addr, const void  *buf, unsigned short len)
+/**
+ *	dma_memcpy - DMA memcpy under mutex lock
+ *
+ * Do not check arguments before starting the DMA memcpy.  Break the transfer
+ * up into two pieces.  The first transfer is in multiples of 64k and the
+ * second transfer is the piece smaller than 64k.
+ */
+void *dma_memcpy(void *dst, const void *src, size_t size)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	blackfin_dcache_flush_range((unsigned int)buf,
-			 (unsigned int)(buf) + len * sizeof(short));
-
-	bfin_write_MDMA_D0_START_ADDR(addr);
-	bfin_write_MDMA_D0_X_COUNT(len);
-	bfin_write_MDMA_D0_X_MODIFY(0);
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_START_ADDR(buf);
-	bfin_write_MDMA_S0_X_COUNT(len);
-	bfin_write_MDMA_S0_X_MODIFY(2);
-	bfin_write_MDMA_S0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_16);
-	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_16);
-
-	SSYNC();
-
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE));
-
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-	local_irq_restore(flags);
-
+	size_t bulk, rest;
+	bulk = size & ~0xffff;
+	rest = size - bulk;
+	if (bulk)
+		_dma_memcpy(dst, src, bulk);
+	_dma_memcpy(dst + bulk, src + bulk, rest);
+	return dst;
 }
-EXPORT_SYMBOL(dma_outsw);
+EXPORT_SYMBOL(dma_memcpy);
 
-void dma_insw(unsigned long addr, void *buf, unsigned short len)
+/**
+ *	safe_dma_memcpy - DMA memcpy w/argument checking
+ *
+ * Verify arguments are safe before heading to dma_memcpy().
+ */
+void *safe_dma_memcpy(void *dst, const void *src, size_t size)
 {
-	unsigned long flags;
-
-	blackfin_dcache_invalidate_range((unsigned int)buf,
-			 (unsigned int)(buf) + len * sizeof(short));
-
-	local_irq_save(flags);
-
-	bfin_write_MDMA_D0_START_ADDR(buf);
-	bfin_write_MDMA_D0_X_COUNT(len);
-	bfin_write_MDMA_D0_X_MODIFY(2);
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_START_ADDR(addr);
-	bfin_write_MDMA_S0_X_COUNT(len);
-	bfin_write_MDMA_S0_X_MODIFY(0);
-	bfin_write_MDMA_S0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_16);
-	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_16);
-
-	SSYNC();
-
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE));
-
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-	local_irq_restore(flags);
-
+	if (!access_ok(VERIFY_WRITE, dst, size))
+		return NULL;
+	if (!access_ok(VERIFY_READ, src, size))
+		return NULL;
+	return dma_memcpy(dst, src, size);
 }
-EXPORT_SYMBOL(dma_insw);
+EXPORT_SYMBOL(safe_dma_memcpy);
 
-void dma_outsl(unsigned long addr, const void *buf, unsigned short len)
+static void _dma_out(unsigned long addr, unsigned long buf, unsigned short len,
+                     u16 size, u16 dma_size)
 {
-	unsigned long flags;
-
-	local_irq_save(flags);
-
-	blackfin_dcache_flush_range((unsigned int)buf,
-			 (unsigned int)(buf) + len * sizeof(long));
-
-	bfin_write_MDMA_D0_START_ADDR(addr);
-	bfin_write_MDMA_D0_X_COUNT(len);
-	bfin_write_MDMA_D0_X_MODIFY(0);
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_START_ADDR(buf);
-	bfin_write_MDMA_S0_X_COUNT(len);
-	bfin_write_MDMA_S0_X_MODIFY(4);
-	bfin_write_MDMA_S0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_32);
-	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_32);
-
-	SSYNC();
-
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE));
-
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-	local_irq_restore(flags);
-
+	blackfin_dcache_flush_range(buf, buf + len * size);
+	__dma_memcpy(addr, 0, buf, size, len, dma_size);
 }
-EXPORT_SYMBOL(dma_outsl);
 
-void dma_insl(unsigned long addr, void *buf, unsigned short len)
+static void _dma_in(unsigned long addr, unsigned long buf, unsigned short len,
+                    u16 size, u16 dma_size)
 {
-	unsigned long flags;
-
-	blackfin_dcache_invalidate_range((unsigned int)buf,
-			 (unsigned int)(buf) + len * sizeof(long));
-
-	local_irq_save(flags);
-
-	bfin_write_MDMA_D0_START_ADDR(buf);
-	bfin_write_MDMA_D0_X_COUNT(len);
-	bfin_write_MDMA_D0_X_MODIFY(4);
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_START_ADDR(addr);
-	bfin_write_MDMA_S0_X_COUNT(len);
-	bfin_write_MDMA_S0_X_MODIFY(0);
-	bfin_write_MDMA_S0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(DMAEN | WDSIZE_32);
-	bfin_write_MDMA_D0_CONFIG(WNR | DI_EN | DMAEN | WDSIZE_32);
-
-	SSYNC();
-
-	while (!(bfin_read_MDMA_D0_IRQ_STATUS() & DMA_DONE));
-
-	bfin_write_MDMA_D0_IRQ_STATUS(DMA_DONE | DMA_ERR);
-
-	bfin_write_MDMA_S0_CONFIG(0);
-	bfin_write_MDMA_D0_CONFIG(0);
-	local_irq_restore(flags);
-
+	blackfin_dcache_invalidate_range(buf, buf + len * size);
+	__dma_memcpy(buf, size, addr, 0, len, dma_size);
 }
-EXPORT_SYMBOL(dma_insl);
+
+#define MAKE_DMA_IO(io, bwl, isize, dmasize, cnst) \
+void dma_##io##s##bwl(unsigned long addr, cnst void *buf, unsigned short len) \
+{ \
+	_dma_##io(addr, (unsigned long)buf, len, isize, WDSIZE_##dmasize); \
+} \
+EXPORT_SYMBOL(dma_##io##s##bwl)
+MAKE_DMA_IO(out, b, 1,  8, const);
+MAKE_DMA_IO(in,  b, 1,  8, );
+MAKE_DMA_IO(out, w, 2, 16, const);
+MAKE_DMA_IO(in,  w, 2, 16, );
+MAKE_DMA_IO(out, l, 4, 32, const);
+MAKE_DMA_IO(in,  l, 4, 32, );
diff --git a/arch/blackfin/kernel/setup.c b/arch/blackfin/kernel/setup.c
index b147ed9..56b8b4c 100644
--- a/arch/blackfin/kernel/setup.c
+++ b/arch/blackfin/kernel/setup.c
@@ -154,6 +154,8 @@ void __init bfin_relocate_l1_mem(void)
 	unsigned long l1_data_b_length;
 	unsigned long l2_length;
 
+	blackfin_dma_early_init();
+
 	l1_code_length = _etext_l1 - _stext_l1;
 	if (l1_code_length > L1_CODE_LENGTH)
 		panic("L1 Instruction SRAM Overflow\n");
-- 
cgit v1.1