diff options
Diffstat (limited to 'lib/libc/mips/string/bcopy.S')
-rw-r--r-- | lib/libc/mips/string/bcopy.S | 300 |
1 files changed, 300 insertions, 0 deletions
diff --git a/lib/libc/mips/string/bcopy.S b/lib/libc/mips/string/bcopy.S new file mode 100644 index 0000000..6287d06 --- /dev/null +++ b/lib/libc/mips/string/bcopy.S @@ -0,0 +1,300 @@ +/* $NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $ */ + +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +/* + * File: mips_bcopy.s + * Author: Chris Maeda + * Date: June 1993 + * + * Fast copy routine. Derived from aligned_block_copy. + */ + + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#if defined(LIBC_SCCS) && !defined(lint) + ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") + ASMSTR("$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $") +#endif /* LIBC_SCCS and not lint */ + +#ifdef __ABICALLS__ + .abicalls +#endif + +/* + * bcopy(caddr_t src, caddr_t dst, unsigned int len) + * + * a0 src address + * a1 dst address + * a2 length + */ + +#if defined(MEMCOPY) || defined(MEMMOVE) +#ifdef MEMCOPY +#define FUNCTION memcpy +#else +#define FUNCTION memmove +#endif +#define SRCREG a1 +#define DSTREG a0 +#else +#define FUNCTION bcopy +#define SRCREG a0 +#define DSTREG a1 +#endif + +#define SIZEREG a2 + +LEAF(FUNCTION) + .set noat + .set noreorder + +#if defined(MEMCOPY) || defined(MEMMOVE) + /* set up return value, while we still can */ + move v0,DSTREG +#endif + /* + * Make sure we can copy forwards. + */ + sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG + bne t0,zero,6f # copy backwards + + /* + * There are four alignment cases (with frequency) + * (Based on measurements taken with a DECstation 5000/200 + * inside a Mach kernel.) + * + * aligned -> aligned (mostly) + * unaligned -> aligned (sometimes) + * aligned,unaligned -> unaligned (almost never) + * + * Note that we could add another case that checks if + * the destination and source are unaligned but the + * copy is alignable. eg if src and dest are both + * on a halfword boundary. + */ + andi t1,DSTREG,3 # get last 3 bits of dest + bne t1,zero,3f + andi t0,SRCREG,3 # get last 3 bits of src + bne t0,zero,5f + + /* + * Forward aligned->aligned copy, 8*4 bytes at a time. + */ + li AT,-32 + and t0,SIZEREG,AT # count truncated to multiple of 32 */ + addu a3,SRCREG,t0 # run fast loop up to this address + sltu AT,SRCREG,a3 # any work to do? + beq AT,zero,2f + subu SIZEREG,t0 + + /* + * loop body + */ +1: # cp + lw t3,0(SRCREG) + lw v1,4(SRCREG) + lw t0,8(SRCREG) + lw t1,12(SRCREG) + addu SRCREG,32 + sw t3,0(DSTREG) + sw v1,4(DSTREG) + sw t0,8(DSTREG) + sw t1,12(DSTREG) + lw t1,-4(SRCREG) + lw t0,-8(SRCREG) + lw v1,-12(SRCREG) + lw t3,-16(SRCREG) + addu DSTREG,32 + sw t1,-4(DSTREG) + sw t0,-8(DSTREG) + sw v1,-12(DSTREG) + bne SRCREG,a3,1b + sw t3,-16(DSTREG) + + /* + * Copy a word at a time, no loop unrolling. + */ +2: # wordcopy + andi t2,SIZEREG,3 # get byte count / 4 + subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 + beq t2,zero,3f + addu t0,SRCREG,t2 # stop at t0 + subu SIZEREG,SIZEREG,t2 +1: + lw t3,0(SRCREG) + addu SRCREG,4 + sw t3,0(DSTREG) + bne SRCREG,t0,1b + addu DSTREG,4 + +3: # bytecopy + beq SIZEREG,zero,4f # nothing left to do? + nop +1: + lb t3,0(SRCREG) + addu SRCREG,1 + sb t3,0(DSTREG) + subu SIZEREG,1 + bgtz SIZEREG,1b + addu DSTREG,1 + +4: # copydone + j ra + nop + + /* + * Copy from unaligned source to aligned dest. + */ +5: # destaligned + andi t0,SIZEREG,3 # t0 = bytecount mod 4 + subu a3,SIZEREG,t0 # number of words to transfer + beq a3,zero,3b + nop + move SIZEREG,t0 # this many to do after we are done + addu a3,SRCREG,a3 # stop point + +1: +#ifdef __MIPSEB__ + lwl t3,0(SRCREG) + lwr t3,3(SRCREG) +#else + lwr t3,0(SRCREG) + lwl t3,3(SRCREG) +#endif + addi SRCREG,4 + sw t3,0(DSTREG) + bne SRCREG,a3,1b + addi DSTREG,4 + + j 3b + nop + +6: # backcopy -- based on above + addu SRCREG,SIZEREG + addu DSTREG,SIZEREG + andi t1,DSTREG,3 # get last 3 bits of dest + bne t1,zero,3f + andi t0,SRCREG,3 # get last 3 bits of src + bne t0,zero,5f + + /* + * Forward aligned->aligned copy, 8*4 bytes at a time. + */ + li AT,-32 + and t0,SIZEREG,AT # count truncated to multiple of 32 + beq t0,zero,2f # any work to do? + subu SIZEREG,t0 + subu a3,SRCREG,t0 + + /* + * loop body + */ +1: # cp + lw t3,-16(SRCREG) + lw v1,-12(SRCREG) + lw t0,-8(SRCREG) + lw t1,-4(SRCREG) + subu SRCREG,32 + sw t3,-16(DSTREG) + sw v1,-12(DSTREG) + sw t0,-8(DSTREG) + sw t1,-4(DSTREG) + lw t1,12(SRCREG) + lw t0,8(SRCREG) + lw v1,4(SRCREG) + lw t3,0(SRCREG) + subu DSTREG,32 + sw t1,12(DSTREG) + sw t0,8(DSTREG) + sw v1,4(DSTREG) + bne SRCREG,a3,1b + sw t3,0(DSTREG) + + /* + * Copy a word at a time, no loop unrolling. + */ +2: # wordcopy + andi t2,SIZEREG,3 # get byte count / 4 + subu t2,SIZEREG,t2 # t2 = number of words to copy * 4 + beq t2,zero,3f + subu t0,SRCREG,t2 # stop at t0 + subu SIZEREG,SIZEREG,t2 +1: + lw t3,-4(SRCREG) + subu SRCREG,4 + sw t3,-4(DSTREG) + bne SRCREG,t0,1b + subu DSTREG,4 + +3: # bytecopy + beq SIZEREG,zero,4f # nothing left to do? + nop +1: + lb t3,-1(SRCREG) + subu SRCREG,1 + sb t3,-1(DSTREG) + subu SIZEREG,1 + bgtz SIZEREG,1b + subu DSTREG,1 + +4: # copydone + j ra + nop + + /* + * Copy from unaligned source to aligned dest. + */ +5: # destaligned + andi t0,SIZEREG,3 # t0 = bytecount mod 4 + subu a3,SIZEREG,t0 # number of words to transfer + beq a3,zero,3b + nop + move SIZEREG,t0 # this many to do after we are done + subu a3,SRCREG,a3 # stop point + +1: +#ifdef __MIPSEB__ + lwl t3,-4(SRCREG) + lwr t3,-1(SRCREG) +#else + lwr t3,-4(SRCREG) + lwl t3,-1(SRCREG) +#endif + subu SRCREG,4 + sw t3,-4(DSTREG) + bne SRCREG,a3,1b + subu DSTREG,4 + + j 3b + nop + + .set reorder + .set at + END(FUNCTION) |