diff options
Diffstat (limited to 'lib/libc/alpha/string/bcopy.S')
-rw-r--r-- | lib/libc/alpha/string/bcopy.S | 289 |
1 files changed, 289 insertions, 0 deletions
diff --git a/lib/libc/alpha/string/bcopy.S b/lib/libc/alpha/string/bcopy.S new file mode 100644 index 0000000..dc23063 --- /dev/null +++ b/lib/libc/alpha/string/bcopy.S @@ -0,0 +1,289 @@ +/* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */ + +/* + * Copyright (c) 1995 Carnegie-Mellon University. + * All rights reserved. + * + * Author: Trevor Blackwell. Support for use as memcpy() and memmove() + * added by Chris Demetriou. + * + * Permission to use, copy, modify and distribute this software and + * its documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND + * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie the + * rights to redistribute these changes. + */ + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#if defined(MEMCOPY) || defined(MEMMOVE) +#ifdef MEMCOPY +#define FUNCTION memcpy +#else +#define FUNCTION memmove +#endif +#define SRCREG a1 +#define DSTREG a0 +#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ +#define FUNCTION bcopy +#define SRCREG a0 +#define DSTREG a1 +#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */ + +#define SIZEREG a2 + +/* + * Copy bytes. + * + * void bcopy(char *from, char *to, size_t len); + * char *memcpy(void *to, const void *from, size_t len); + * char *memmove(void *to, const void *from, size_t len); + * + * No matter how invoked, the source and destination registers + * for calculation. There's no point in copying them to "working" + * registers, since the code uses their values "in place," and + * copying them would be slower. + */ + +LEAF(FUNCTION,3) + +#if defined(MEMCOPY) || defined(MEMMOVE) + /* set up return value, while we still can */ + mov DSTREG,v0 +#endif + + /* Check for negative length */ + ble SIZEREG,bcopy_done + + /* Check for overlap */ + subq DSTREG,SRCREG,t5 + cmpult t5,SIZEREG,t5 + bne t5,bcopy_overlap + + /* a3 = end address */ + addq SRCREG,SIZEREG,a3 + + /* Get the first word */ + ldq_u t2,0(SRCREG) + + /* Do they have the same alignment? */ + xor SRCREG,DSTREG,t0 + and t0,7,t0 + and DSTREG,7,t1 + bne t0,bcopy_different_alignment + + /* src & dst have same alignment */ + beq t1,bcopy_all_aligned + + ldq_u t3,0(DSTREG) + addq SIZEREG,t1,SIZEREG + mskqh t2,SRCREG,t2 + mskql t3,SRCREG,t3 + or t2,t3,t2 + + /* Dst is 8-byte aligned */ + +bcopy_all_aligned: + /* If less than 8 bytes,skip loop */ + subq SIZEREG,1,t0 + and SIZEREG,7,SIZEREG + bic t0,7,t0 + beq t0,bcopy_samealign_lp_end + +bcopy_samealign_lp: + stq_u t2,0(DSTREG) + addq DSTREG,8,DSTREG + ldq_u t2,8(SRCREG) + subq t0,8,t0 + addq SRCREG,8,SRCREG + bne t0,bcopy_samealign_lp + +bcopy_samealign_lp_end: + /* If we're done, exit */ + bne SIZEREG,bcopy_small_left + stq_u t2,0(DSTREG) + RET + +bcopy_small_left: + mskql t2,SIZEREG,t4 + ldq_u t3,0(DSTREG) + mskqh t3,SIZEREG,t3 + or t4,t3,t4 + stq_u t4,0(DSTREG) + RET + +bcopy_different_alignment: + /* + * this is the fun part + */ + addq SRCREG,SIZEREG,a3 + cmpule SIZEREG,8,t0 + bne t0,bcopy_da_finish + + beq t1,bcopy_da_noentry + + /* Do the initial partial word */ + subq zero,DSTREG,t0 + and t0,7,t0 + ldq_u t3,7(SRCREG) + extql t2,SRCREG,t2 + extqh t3,SRCREG,t3 + or t2,t3,t5 + insql t5,DSTREG,t5 + ldq_u t6,0(DSTREG) + mskql t6,DSTREG,t6 + or t5,t6,t5 + stq_u t5,0(DSTREG) + addq SRCREG,t0,SRCREG + addq DSTREG,t0,DSTREG + subq SIZEREG,t0,SIZEREG + ldq_u t2,0(SRCREG) + +bcopy_da_noentry: + subq SIZEREG,1,t0 + bic t0,7,t0 + and SIZEREG,7,SIZEREG + beq t0,bcopy_da_finish2 + +bcopy_da_lp: + ldq_u t3,7(SRCREG) + addq SRCREG,8,SRCREG + extql t2,SRCREG,t4 + extqh t3,SRCREG,t5 + subq t0,8,t0 + or t4,t5,t5 + stq t5,0(DSTREG) + addq DSTREG,8,DSTREG + beq t0,bcopy_da_finish1 + ldq_u t2,7(SRCREG) + addq SRCREG,8,SRCREG + extql t3,SRCREG,t4 + extqh t2,SRCREG,t5 + subq t0,8,t0 + or t4,t5,t5 + stq t5,0(DSTREG) + addq DSTREG,8,DSTREG + bne t0,bcopy_da_lp + +bcopy_da_finish2: + /* Do the last new word */ + mov t2,t3 + +bcopy_da_finish1: + /* Do the last partial word */ + ldq_u t2,-1(a3) + extql t3,SRCREG,t3 + extqh t2,SRCREG,t2 + or t2,t3,t2 + br zero,bcopy_samealign_lp_end + +bcopy_da_finish: + /* Do the last word in the next source word */ + ldq_u t3,-1(a3) + extql t2,SRCREG,t2 + extqh t3,SRCREG,t3 + or t2,t3,t2 + insqh t2,DSTREG,t3 + insql t2,DSTREG,t2 + lda t4,-1(zero) + mskql t4,SIZEREG,t5 + cmovne t5,t5,t4 + insqh t4,DSTREG,t5 + insql t4,DSTREG,t4 + addq DSTREG,SIZEREG,a4 + ldq_u t6,0(DSTREG) + ldq_u t7,-1(a4) + bic t6,t4,t6 + bic t7,t5,t7 + and t2,t4,t2 + and t3,t5,t3 + or t2,t6,t2 + or t3,t7,t3 + stq_u t3,-1(a4) + stq_u t2,0(DSTREG) + RET + +bcopy_overlap: + /* + * Basically equivalent to previous case, only backwards. + * Not quite as highly optimized + */ + addq SRCREG,SIZEREG,a3 + addq DSTREG,SIZEREG,a4 + + /* less than 8 bytes - don't worry about overlap */ + cmpule SIZEREG,8,t0 + bne t0,bcopy_ov_short + + /* Possibly do a partial first word */ + and a4,7,t4 + beq t4,bcopy_ov_nostart2 + subq a3,t4,a3 + subq a4,t4,a4 + ldq_u t1,0(a3) + subq SIZEREG,t4,SIZEREG + ldq_u t2,7(a3) + ldq t3,0(a4) + extql t1,a3,t1 + extqh t2,a3,t2 + or t1,t2,t1 + mskqh t3,t4,t3 + mskql t1,t4,t1 + or t1,t3,t1 + stq t1,0(a4) + +bcopy_ov_nostart2: + bic SIZEREG,7,t4 + and SIZEREG,7,SIZEREG + beq t4,bcopy_ov_lp_end + +bcopy_ov_lp: + /* This could be more pipelined, but it doesn't seem worth it */ + ldq_u t0,-8(a3) + subq a4,8,a4 + ldq_u t1,-1(a3) + subq a3,8,a3 + extql t0,a3,t0 + extqh t1,a3,t1 + subq t4,8,t4 + or t0,t1,t0 + stq t0,0(a4) + bne t4,bcopy_ov_lp + +bcopy_ov_lp_end: + beq SIZEREG,bcopy_done + + ldq_u t0,0(SRCREG) + ldq_u t1,7(SRCREG) + ldq_u t2,0(DSTREG) + extql t0,SRCREG,t0 + extqh t1,SRCREG,t1 + or t0,t1,t0 + insql t0,DSTREG,t0 + mskql t2,DSTREG,t2 + or t2,t0,t2 + stq_u t2,0(DSTREG) + +bcopy_done: + RET + +bcopy_ov_short: + ldq_u t2,0(SRCREG) + br zero,bcopy_da_finish + + END(FUNCTION) |