summaryrefslogtreecommitdiffstats
path: root/lib/libc/alpha/string/bcopy.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/alpha/string/bcopy.S')
-rw-r--r--lib/libc/alpha/string/bcopy.S288
1 files changed, 288 insertions, 0 deletions
diff --git a/lib/libc/alpha/string/bcopy.S b/lib/libc/alpha/string/bcopy.S
new file mode 100644
index 0000000..6a45ad6
--- /dev/null
+++ b/lib/libc/alpha/string/bcopy.S
@@ -0,0 +1,288 @@
+/* $NetBSD: bcopy.S,v 1.3 1996/10/17 03:08:11 cgd Exp $ */
+
+/*
+ * Copyright (c) 1995 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Author: Trevor Blackwell. Support for use as memcpy() and memmove()
+ * added by Chris Demetriou.
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <machine/asm.h>
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+#ifdef MEMCOPY
+#define FUNCTION memcpy
+#else
+#define FUNCTION memmove
+#endif
+#define SRCREG a1
+#define DSTREG a0
+#else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
+#define FUNCTION bcopy
+#define SRCREG a0
+#define DSTREG a1
+#endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
+
+#define SIZEREG a2
+
+/*
+ * Copy bytes.
+ *
+ * void bcopy(char *from, char *to, size_t len);
+ * char *memcpy(void *to, const void *from, size_t len);
+ * char *memmove(void *to, const void *from, size_t len);
+ *
+ * No matter how invoked, the source and destination registers
+ * for calculation. There's no point in copying them to "working"
+ * registers, since the code uses their values "in place," and
+ * copying them would be slower.
+ */
+
+LEAF(FUNCTION,3)
+
+#if defined(MEMCOPY) || defined(MEMMOVE)
+ /* set up return value, while we still can */
+ mov DSTREG,v0
+#endif
+
+ /* Check for negative length */
+ ble SIZEREG,bcopy_done
+
+ /* Check for overlap */
+ subq DSTREG,SRCREG,t5
+ cmpult t5,SIZEREG,t5
+ bne t5,bcopy_overlap
+
+ /* a3 = end address */
+ addq SRCREG,SIZEREG,a3
+
+ /* Get the first word */
+ ldq_u t2,0(SRCREG)
+
+ /* Do they have the same alignment? */
+ xor SRCREG,DSTREG,t0
+ and t0,7,t0
+ and DSTREG,7,t1
+ bne t0,bcopy_different_alignment
+
+ /* src & dst have same alignment */
+ beq t1,bcopy_all_aligned
+
+ ldq_u t3,0(DSTREG)
+ addq SIZEREG,t1,SIZEREG
+ mskqh t2,SRCREG,t2
+ mskql t3,SRCREG,t3
+ or t2,t3,t2
+
+ /* Dst is 8-byte aligned */
+
+bcopy_all_aligned:
+ /* If less than 8 bytes,skip loop */
+ subq SIZEREG,1,t0
+ and SIZEREG,7,SIZEREG
+ bic t0,7,t0
+ beq t0,bcopy_samealign_lp_end
+
+bcopy_samealign_lp:
+ stq_u t2,0(DSTREG)
+ addq DSTREG,8,DSTREG
+ ldq_u t2,8(SRCREG)
+ subq t0,8,t0
+ addq SRCREG,8,SRCREG
+ bne t0,bcopy_samealign_lp
+
+bcopy_samealign_lp_end:
+ /* If we're done, exit */
+ bne SIZEREG,bcopy_small_left
+ stq_u t2,0(DSTREG)
+ RET
+
+bcopy_small_left:
+ mskql t2,SIZEREG,t4
+ ldq_u t3,0(DSTREG)
+ mskqh t3,SIZEREG,t3
+ or t4,t3,t4
+ stq_u t4,0(DSTREG)
+ RET
+
+bcopy_different_alignment:
+ /*
+ * this is the fun part
+ */
+ addq SRCREG,SIZEREG,a3
+ cmpule SIZEREG,8,t0
+ bne t0,bcopy_da_finish
+
+ beq t1,bcopy_da_noentry
+
+ /* Do the initial partial word */
+ subq zero,DSTREG,t0
+ and t0,7,t0
+ ldq_u t3,7(SRCREG)
+ extql t2,SRCREG,t2
+ extqh t3,SRCREG,t3
+ or t2,t3,t5
+ insql t5,DSTREG,t5
+ ldq_u t6,0(DSTREG)
+ mskql t6,DSTREG,t6
+ or t5,t6,t5
+ stq_u t5,0(DSTREG)
+ addq SRCREG,t0,SRCREG
+ addq DSTREG,t0,DSTREG
+ subq SIZEREG,t0,SIZEREG
+ ldq_u t2,0(SRCREG)
+
+bcopy_da_noentry:
+ subq SIZEREG,1,t0
+ bic t0,7,t0
+ and SIZEREG,7,SIZEREG
+ beq t0,bcopy_da_finish2
+
+bcopy_da_lp:
+ ldq_u t3,7(SRCREG)
+ addq SRCREG,8,SRCREG
+ extql t2,SRCREG,t4
+ extqh t3,SRCREG,t5
+ subq t0,8,t0
+ or t4,t5,t5
+ stq t5,0(DSTREG)
+ addq DSTREG,8,DSTREG
+ beq t0,bcopy_da_finish1
+ ldq_u t2,7(SRCREG)
+ addq SRCREG,8,SRCREG
+ extql t3,SRCREG,t4
+ extqh t2,SRCREG,t5
+ subq t0,8,t0
+ or t4,t5,t5
+ stq t5,0(DSTREG)
+ addq DSTREG,8,DSTREG
+ bne t0,bcopy_da_lp
+
+bcopy_da_finish2:
+ /* Do the last new word */
+ mov t2,t3
+
+bcopy_da_finish1:
+ /* Do the last partial word */
+ ldq_u t2,-1(a3)
+ extql t3,SRCREG,t3
+ extqh t2,SRCREG,t2
+ or t2,t3,t2
+ br zero,bcopy_samealign_lp_end
+
+bcopy_da_finish:
+ /* Do the last word in the next source word */
+ ldq_u t3,-1(a3)
+ extql t2,SRCREG,t2
+ extqh t3,SRCREG,t3
+ or t2,t3,t2
+ insqh t2,DSTREG,t3
+ insql t2,DSTREG,t2
+ lda t4,-1(zero)
+ mskql t4,SIZEREG,t5
+ cmovne t5,t5,t4
+ insqh t4,DSTREG,t5
+ insql t4,DSTREG,t4
+ addq DSTREG,SIZEREG,a4
+ ldq_u t6,0(DSTREG)
+ ldq_u t7,-1(a4)
+ bic t6,t4,t6
+ bic t7,t5,t7
+ and t2,t4,t2
+ and t3,t5,t3
+ or t2,t6,t2
+ or t3,t7,t3
+ stq_u t3,-1(a4)
+ stq_u t2,0(DSTREG)
+ RET
+
+bcopy_overlap:
+ /*
+ * Basically equivalent to previous case, only backwards.
+ * Not quite as highly optimized
+ */
+ addq SRCREG,SIZEREG,a3
+ addq DSTREG,SIZEREG,a4
+
+ /* less than 8 bytes - don't worry about overlap */
+ cmpule SIZEREG,8,t0
+ bne t0,bcopy_ov_short
+
+ /* Possibly do a partial first word */
+ and a4,7,t4
+ beq t4,bcopy_ov_nostart2
+ subq a3,t4,a3
+ subq a4,t4,a4
+ ldq_u t1,0(a3)
+ subq SIZEREG,t4,SIZEREG
+ ldq_u t2,7(a3)
+ ldq t3,0(a4)
+ extql t1,a3,t1
+ extqh t2,a3,t2
+ or t1,t2,t1
+ mskqh t3,t4,t3
+ mskql t1,t4,t1
+ or t1,t3,t1
+ stq t1,0(a4)
+
+bcopy_ov_nostart2:
+ bic SIZEREG,7,t4
+ and SIZEREG,7,SIZEREG
+ beq t4,bcopy_ov_lp_end
+
+bcopy_ov_lp:
+ /* This could be more pipelined, but it doesn't seem worth it */
+ ldq_u t0,-8(a3)
+ subq a4,8,a4
+ ldq_u t1,-1(a3)
+ subq a3,8,a3
+ extql t0,a3,t0
+ extqh t1,a3,t1
+ subq t4,8,t4
+ or t0,t1,t0
+ stq t0,0(a4)
+ bne t4,bcopy_ov_lp
+
+bcopy_ov_lp_end:
+ beq SIZEREG,bcopy_done
+
+ ldq_u t0,0(SRCREG)
+ ldq_u t1,7(SRCREG)
+ ldq_u t2,0(DSTREG)
+ extql t0,SRCREG,t0
+ extqh t1,SRCREG,t1
+ or t0,t1,t0
+ insql t0,DSTREG,t0
+ mskql t2,DSTREG,t2
+ or t2,t0,t2
+ stq_u t2,0(DSTREG)
+
+bcopy_done:
+ RET
+
+bcopy_ov_short:
+ ldq_u t2,0(SRCREG)
+ br zero,bcopy_da_finish
+
+ END(FUNCTION)
OpenPOWER on IntegriCloud