summaryrefslogtreecommitdiffstats
path: root/lib/libc/mips/string/bcopy.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/mips/string/bcopy.S')
-rw-r--r--lib/libc/mips/string/bcopy.S300
1 files changed, 300 insertions, 0 deletions
diff --git a/lib/libc/mips/string/bcopy.S b/lib/libc/mips/string/bcopy.S
new file mode 100644
index 0000000..6287d06
--- /dev/null
+++ b/lib/libc/mips/string/bcopy.S
@@ -0,0 +1,300 @@
+/* $NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1993 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+/*
+ * File: mips_bcopy.s
+ * Author: Chris Maeda
+ * Date: June 1993
+ *
+ * Fast copy routine. Derived from aligned_block_copy.
+ */
+
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#if defined(LIBC_SCCS) && !defined(lint)
+ ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
+ ASMSTR("$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $")
+#endif /* LIBC_SCCS and not lint */
+
+#ifdef __ABICALLS__
+ .abicalls
+#endif
+
+/*
+ * bcopy(caddr_t src, caddr_t dst, unsigned int len)
+ *
+ * a0 src address
+ * a1 dst address
+ * a2 length
+ */
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+#ifdef MEMCOPY
+#define FUNCTION memcpy
+#else
+#define FUNCTION memmove
+#endif
+#define SRCREG a1
+#define DSTREG a0
+#else
+#define FUNCTION bcopy
+#define SRCREG a0
+#define DSTREG a1
+#endif
+
+#define SIZEREG a2
+
+LEAF(FUNCTION)
+ .set noat
+ .set noreorder
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+ /* set up return value, while we still can */
+ move v0,DSTREG
+#endif
+ /*
+ * Make sure we can copy forwards.
+ */
+ sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
+ bne t0,zero,6f # copy backwards
+
+ /*
+ * There are four alignment cases (with frequency)
+ * (Based on measurements taken with a DECstation 5000/200
+ * inside a Mach kernel.)
+ *
+ * aligned -> aligned (mostly)
+ * unaligned -> aligned (sometimes)
+ * aligned,unaligned -> unaligned (almost never)
+ *
+ * Note that we could add another case that checks if
+ * the destination and source are unaligned but the
+ * copy is alignable. eg if src and dest are both
+ * on a halfword boundary.
+ */
+ andi t1,DSTREG,3 # get last 3 bits of dest
+ bne t1,zero,3f
+ andi t0,SRCREG,3 # get last 3 bits of src
+ bne t0,zero,5f
+
+ /*
+ * Forward aligned->aligned copy, 8*4 bytes at a time.
+ */
+ li AT,-32
+ and t0,SIZEREG,AT # count truncated to multiple of 32 */
+ addu a3,SRCREG,t0 # run fast loop up to this address
+ sltu AT,SRCREG,a3 # any work to do?
+ beq AT,zero,2f
+ subu SIZEREG,t0
+
+ /*
+ * loop body
+ */
+1: # cp
+ lw t3,0(SRCREG)
+ lw v1,4(SRCREG)
+ lw t0,8(SRCREG)
+ lw t1,12(SRCREG)
+ addu SRCREG,32
+ sw t3,0(DSTREG)
+ sw v1,4(DSTREG)
+ sw t0,8(DSTREG)
+ sw t1,12(DSTREG)
+ lw t1,-4(SRCREG)
+ lw t0,-8(SRCREG)
+ lw v1,-12(SRCREG)
+ lw t3,-16(SRCREG)
+ addu DSTREG,32
+ sw t1,-4(DSTREG)
+ sw t0,-8(DSTREG)
+ sw v1,-12(DSTREG)
+ bne SRCREG,a3,1b
+ sw t3,-16(DSTREG)
+
+ /*
+ * Copy a word at a time, no loop unrolling.
+ */
+2: # wordcopy
+ andi t2,SIZEREG,3 # get byte count / 4
+ subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
+ beq t2,zero,3f
+ addu t0,SRCREG,t2 # stop at t0
+ subu SIZEREG,SIZEREG,t2
+1:
+ lw t3,0(SRCREG)
+ addu SRCREG,4
+ sw t3,0(DSTREG)
+ bne SRCREG,t0,1b
+ addu DSTREG,4
+
+3: # bytecopy
+ beq SIZEREG,zero,4f # nothing left to do?
+ nop
+1:
+ lb t3,0(SRCREG)
+ addu SRCREG,1
+ sb t3,0(DSTREG)
+ subu SIZEREG,1
+ bgtz SIZEREG,1b
+ addu DSTREG,1
+
+4: # copydone
+ j ra
+ nop
+
+ /*
+ * Copy from unaligned source to aligned dest.
+ */
+5: # destaligned
+ andi t0,SIZEREG,3 # t0 = bytecount mod 4
+ subu a3,SIZEREG,t0 # number of words to transfer
+ beq a3,zero,3b
+ nop
+ move SIZEREG,t0 # this many to do after we are done
+ addu a3,SRCREG,a3 # stop point
+
+1:
+#ifdef __MIPSEB__
+ lwl t3,0(SRCREG)
+ lwr t3,3(SRCREG)
+#else
+ lwr t3,0(SRCREG)
+ lwl t3,3(SRCREG)
+#endif
+ addi SRCREG,4
+ sw t3,0(DSTREG)
+ bne SRCREG,a3,1b
+ addi DSTREG,4
+
+ j 3b
+ nop
+
+6: # backcopy -- based on above
+ addu SRCREG,SIZEREG
+ addu DSTREG,SIZEREG
+ andi t1,DSTREG,3 # get last 3 bits of dest
+ bne t1,zero,3f
+ andi t0,SRCREG,3 # get last 3 bits of src
+ bne t0,zero,5f
+
+ /*
+ * Forward aligned->aligned copy, 8*4 bytes at a time.
+ */
+ li AT,-32
+ and t0,SIZEREG,AT # count truncated to multiple of 32
+ beq t0,zero,2f # any work to do?
+ subu SIZEREG,t0
+ subu a3,SRCREG,t0
+
+ /*
+ * loop body
+ */
+1: # cp
+ lw t3,-16(SRCREG)
+ lw v1,-12(SRCREG)
+ lw t0,-8(SRCREG)
+ lw t1,-4(SRCREG)
+ subu SRCREG,32
+ sw t3,-16(DSTREG)
+ sw v1,-12(DSTREG)
+ sw t0,-8(DSTREG)
+ sw t1,-4(DSTREG)
+ lw t1,12(SRCREG)
+ lw t0,8(SRCREG)
+ lw v1,4(SRCREG)
+ lw t3,0(SRCREG)
+ subu DSTREG,32
+ sw t1,12(DSTREG)
+ sw t0,8(DSTREG)
+ sw v1,4(DSTREG)
+ bne SRCREG,a3,1b
+ sw t3,0(DSTREG)
+
+ /*
+ * Copy a word at a time, no loop unrolling.
+ */
+2: # wordcopy
+ andi t2,SIZEREG,3 # get byte count / 4
+ subu t2,SIZEREG,t2 # t2 = number of words to copy * 4
+ beq t2,zero,3f
+ subu t0,SRCREG,t2 # stop at t0
+ subu SIZEREG,SIZEREG,t2
+1:
+ lw t3,-4(SRCREG)
+ subu SRCREG,4
+ sw t3,-4(DSTREG)
+ bne SRCREG,t0,1b
+ subu DSTREG,4
+
+3: # bytecopy
+ beq SIZEREG,zero,4f # nothing left to do?
+ nop
+1:
+ lb t3,-1(SRCREG)
+ subu SRCREG,1
+ sb t3,-1(DSTREG)
+ subu SIZEREG,1
+ bgtz SIZEREG,1b
+ subu DSTREG,1
+
+4: # copydone
+ j ra
+ nop
+
+ /*
+ * Copy from unaligned source to aligned dest.
+ */
+5: # destaligned
+ andi t0,SIZEREG,3 # t0 = bytecount mod 4
+ subu a3,SIZEREG,t0 # number of words to transfer
+ beq a3,zero,3b
+ nop
+ move SIZEREG,t0 # this many to do after we are done
+ subu a3,SRCREG,a3 # stop point
+
+1:
+#ifdef __MIPSEB__
+ lwl t3,-4(SRCREG)
+ lwr t3,-1(SRCREG)
+#else
+ lwr t3,-4(SRCREG)
+ lwl t3,-1(SRCREG)
+#endif
+ subu SRCREG,4
+ sw t3,-4(DSTREG)
+ bne SRCREG,a3,1b
+ subu DSTREG,4
+
+ j 3b
+ nop
+
+ .set reorder
+ .set at
+ END(FUNCTION)
OpenPOWER on IntegriCloud