diff options
Diffstat (limited to 'lib/libc/mips/string/bcopy.S')
-rw-r--r-- | lib/libc/mips/string/bcopy.S | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/lib/libc/mips/string/bcopy.S b/lib/libc/mips/string/bcopy.S new file mode 100644 index 0000000..bc227e0 --- /dev/null +++ b/lib/libc/mips/string/bcopy.S @@ -0,0 +1,297 @@ +/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */ + +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +/* + * File: mips_bcopy.s + * Author: Chris Maeda + * Date: June 1993 + * + * Fast copy routine. Derived from aligned_block_copy. + */ + + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#define _LOCORE /* XXX not really, just assembly-code source */ +#include <machine/endian.h> + +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 + ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") +#else + ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $") +#endif +#endif /* LIBC_SCCS and not lint */ + +#ifdef __ABICALLS__ + .abicalls +#endif + +/* + * bcopy(caddr_t src, caddr_t dst, unsigned int len) + * + * a0 src address + * a1 dst address + * a2 length + */ + +#if defined(MEMCOPY) || defined(MEMMOVE) +#ifdef MEMCOPY +#define FUNCTION memcpy +#else +#define FUNCTION memmove +#endif +#define SRCREG a1 +#define DSTREG a0 +#else +#define FUNCTION bcopy +#define SRCREG a0 +#define DSTREG a1 +#endif + +#define SIZEREG a2 + +LEAF(FUNCTION) + .set noat + .set noreorder + +#if defined(MEMCOPY) || defined(MEMMOVE) + /* set up return value, while we still can */ + move v0,DSTREG +#endif + /* + * Make sure we can copy forwards. + */ + sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG + bne t0,zero,6f # copy backwards + + /* + * There are four alignment cases (with frequency) + * (Based on measurements taken with a DECstation 5000/200 + * inside a Mach kernel.) + * + * aligned -> aligned (mostly) + * unaligned -> aligned (sometimes) + * aligned,unaligned -> unaligned (almost never) + * + * Note that we could add another case that checks if + * the destination and source are unaligned but the + * copy is alignable. eg if src and dest are both + * on a halfword boundary. + */ + andi t1,DSTREG,(SZREG-1) # get last bits of dest + bne t1,zero,3f # dest unaligned + andi t0,SRCREG,(SZREG-1) # get last bits of src + bne t0,zero,5f + + /* + * Forward aligned->aligned copy, 8 words at a time. + */ +98: + li AT,-(SZREG*8) + and t0,SIZEREG,AT # count truncated to multiples + PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr + sltu AT,SRCREG,a3 # any work to do? + beq AT,zero,2f + PTR_SUBU SIZEREG,t0 + + /* + * loop body + */ +1: # cp + REG_L t3,(0*SZREG)(SRCREG) + REG_L v1,(1*SZREG)(SRCREG) + REG_L t0,(2*SZREG)(SRCREG) + REG_L t1,(3*SZREG)(SRCREG) + PTR_ADDU SRCREG,SZREG*8 + REG_S t3,(0*SZREG)(DSTREG) + REG_S v1,(1*SZREG)(DSTREG) + REG_S t0,(2*SZREG)(DSTREG) + REG_S t1,(3*SZREG)(DSTREG) + REG_L t1,(-1*SZREG)(SRCREG) + REG_L t0,(-2*SZREG)(SRCREG) + REG_L v1,(-3*SZREG)(SRCREG) + REG_L t3,(-4*SZREG)(SRCREG) + PTR_ADDU DSTREG,SZREG*8 + REG_S t1,(-1*SZREG)(DSTREG) + REG_S t0,(-2*SZREG)(DSTREG) + REG_S v1,(-3*SZREG)(DSTREG) + bne SRCREG,a3,1b + REG_S t3,(-4*SZREG)(DSTREG) + + /* + * Copy a word at a time, no loop unrolling. + */ +2: # wordcopy + andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG + PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG + beq t2,zero,3f + PTR_ADDU t0,SRCREG,t2 # stop at t0 + PTR_SUBU SIZEREG,SIZEREG,t2 +1: + REG_L t3,0(SRCREG) + PTR_ADDU SRCREG,SZREG + REG_S t3,0(DSTREG) + bne SRCREG,t0,1b + PTR_ADDU DSTREG,SZREG + +3: # bytecopy + beq SIZEREG,zero,4f # nothing left to do? + nop +1: + lb t3,0(SRCREG) + PTR_ADDU SRCREG,1 + sb t3,0(DSTREG) + PTR_SUBU SIZEREG,1 + bgtz SIZEREG,1b + PTR_ADDU DSTREG,1 + +4: # copydone + j ra + nop + + /* + * Copy from unaligned source to aligned dest. + */ +5: # destaligned + andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG + PTR_SUBU a3,SIZEREG,t0 # number of words to transfer + beq a3,zero,3b + nop + move SIZEREG,t0 # this many to do after we are done + PTR_ADDU a3,SRCREG,a3 # stop point + +1: + REG_LHI t3,0(SRCREG) + REG_LLO t3,SZREG-1(SRCREG) + PTR_ADDI SRCREG,SZREG + REG_S t3,0(DSTREG) + bne SRCREG,a3,1b + PTR_ADDI DSTREG,SZREG + + b 3b + nop + +6: # backcopy -- based on above + PTR_ADDU SRCREG,SIZEREG + PTR_ADDU DSTREG,SIZEREG + andi t1,DSTREG,SZREG-1 # get last 3 bits of dest + bne t1,zero,3f + andi t0,SRCREG,SZREG-1 # get last 3 bits of src + bne t0,zero,5f + + /* + * Forward aligned->aligned copy, 8*4 bytes at a time. + */ + li AT,(-8*SZREG) + and t0,SIZEREG,AT # count truncated to multiple of 32 + beq t0,zero,2f # any work to do? + PTR_SUBU SIZEREG,t0 + PTR_SUBU a3,SRCREG,t0 + + /* + * loop body + */ +1: # cp + REG_L t3,(-4*SZREG)(SRCREG) + REG_L v1,(-3*SZREG)(SRCREG) + REG_L t0,(-2*SZREG)(SRCREG) + REG_L t1,(-1*SZREG)(SRCREG) + PTR_SUBU SRCREG,8*SZREG + REG_S t3,(-4*SZREG)(DSTREG) + REG_S v1,(-3*SZREG)(DSTREG) + REG_S t0,(-2*SZREG)(DSTREG) + REG_S t1,(-1*SZREG)(DSTREG) + REG_L t1,(3*SZREG)(SRCREG) + REG_L t0,(2*SZREG)(SRCREG) + REG_L v1,(1*SZREG)(SRCREG) + REG_L t3,(0*SZREG)(SRCREG) + PTR_SUBU DSTREG,8*SZREG + REG_S t1,(3*SZREG)(DSTREG) + REG_S t0,(2*SZREG)(DSTREG) + REG_S v1,(1*SZREG)(DSTREG) + bne SRCREG,a3,1b + REG_S t3,(0*SZREG)(DSTREG) + + /* + * Copy a word at a time, no loop unrolling. + */ +2: # wordcopy + andi t2,SIZEREG,SZREG-1 # get byte count / 4 + PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy + beq t2,zero,3f + PTR_SUBU t0,SRCREG,t2 # stop at t0 + PTR_SUBU SIZEREG,SIZEREG,t2 +1: + REG_L t3,-SZREG(SRCREG) + PTR_SUBU SRCREG,SZREG + REG_S t3,-SZREG(DSTREG) + bne SRCREG,t0,1b + PTR_SUBU DSTREG,SZREG + +3: # bytecopy + beq SIZEREG,zero,4f # nothing left to do? + nop +1: + lb t3,-1(SRCREG) + PTR_SUBU SRCREG,1 + sb t3,-1(DSTREG) + PTR_SUBU SIZEREG,1 + bgtz SIZEREG,1b + PTR_SUBU DSTREG,1 + +4: # copydone + j ra + nop + + /* + * Copy from unaligned source to aligned dest. + */ +5: # destaligned + andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 + PTR_SUBU a3,SIZEREG,t0 # number of words to transfer + beq a3,zero,3b + nop + move SIZEREG,t0 # this many to do after we are done + PTR_SUBU a3,SRCREG,a3 # stop point + +1: + REG_LHI t3,-SZREG(SRCREG) + REG_LLO t3,-1(SRCREG) + PTR_SUBU SRCREG,SZREG + REG_S t3,-SZREG(DSTREG) + bne SRCREG,a3,1b + PTR_SUBU DSTREG,SZREG + + b 3b + nop + + .set reorder + .set at + END(FUNCTION) |