summaryrefslogtreecommitdiffstats
path: root/sys/mips/mips
diff options
context:
space:
mode:
authorjchandra <jchandra@FreeBSD.org>2013-09-07 16:31:30 +0000
committerjchandra <jchandra@FreeBSD.org>2013-09-07 16:31:30 +0000
commit7618cc8395426b79f319af2310c0d0fd3b4bed02 (patch)
tree0deb85f5bb7feb7eebf16866191088d03b20fc72 /sys/mips/mips
parente372a1314d9ca593e933d34f7f33e4ac9ded62e1 (diff)
downloadFreeBSD-src-7618cc8395426b79f319af2310c0d0fd3b4bed02.zip
FreeBSD-src-7618cc8395426b79f319af2310c0d0fd3b4bed02.tar.gz
Use a better version of memcpy/bcopy for mips kernel.
Use a variant of mips libc memcpy for kernel. This implementation uses 64-bit operations when compiled for 64-bit, and is significantly faster in that case. Submitted by: Tanmay Jagdale <tanmayj@broadcom.com>
Diffstat (limited to 'sys/mips/mips')
-rw-r--r--sys/mips/mips/bcopy.S286
-rw-r--r--sys/mips/mips/support.S92
2 files changed, 286 insertions, 92 deletions
diff --git a/sys/mips/mips/bcopy.S b/sys/mips/mips/bcopy.S
new file mode 100644
index 0000000..a7ac1f2
--- /dev/null
+++ b/sys/mips/mips/bcopy.S
@@ -0,0 +1,286 @@
+/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */
+
+/*
+ * Mach Operating System
+ * Copyright (c) 1993 Carnegie Mellon University
+ * All Rights Reserved.
+ *
+ * Permission to use, copy, modify and distribute this software and its
+ * documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
+ * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
+ * School of Computer Science
+ * Carnegie Mellon University
+ * Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie Mellon
+ * the rights to redistribute these changes.
+ */
+
+/*
+ * File: mips_bcopy.s
+ * Author: Chris Maeda
+ * Date: June 1993
+ *
+ * Fast copy routine. Derived from aligned_block_copy.
+ */
+
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#include <machine/endian.h>
+
+#if defined(LIBC_SCCS) && !defined(lint)
+#if 0
+ ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93")
+#else
+ ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $")
+#endif
+#endif /* LIBC_SCCS and not lint */
+
+#ifdef __ABICALLS__
+ .abicalls
+#endif
+
+/*
+ * bcopy(caddr_t src, caddr_t dst, unsigned int len)
+ *
+ * a0 src address
+ * a1 dst address
+ * a2 length
+ */
+
+#define SRCREG a0
+#define DSTREG a1
+#define SIZEREG a2
+
+LEAF(memcpy)
+ .set noat
+ .set noreorder
+
+ move v0, a0
+ move a0, a1
+ move a1, v0
+
+ALEAF(bcopy)
+ALEAF(ovbcopy)
+ /*
+ * Make sure we can copy forwards.
+ */
+ sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG
+ bne t0,zero,6f # copy backwards
+
+ /*
+ * There are four alignment cases (with frequency)
+ * (Based on measurements taken with a DECstation 5000/200
+ * inside a Mach kernel.)
+ *
+ * aligned -> aligned (mostly)
+ * unaligned -> aligned (sometimes)
+ * aligned,unaligned -> unaligned (almost never)
+ *
+ * Note that we could add another case that checks if
+ * the destination and source are unaligned but the
+ * copy is alignable. eg if src and dest are both
+ * on a halfword boundary.
+ */
+ andi t1,DSTREG,(SZREG-1) # get last bits of dest
+ bne t1,zero,3f # dest unaligned
+ andi t0,SRCREG,(SZREG-1) # get last bits of src
+ bne t0,zero,5f
+
+ /*
+ * Forward aligned->aligned copy, 8 words at a time.
+ */
+98:
+ li AT,-(SZREG*8)
+ and t0,SIZEREG,AT # count truncated to multiples
+ PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr
+ sltu AT,SRCREG,a3 # any work to do?
+ beq AT,zero,2f
+ PTR_SUBU SIZEREG,t0
+
+ /*
+ * loop body
+ */
+1: # cp
+ REG_L t3,(0*SZREG)(SRCREG)
+ REG_L v1,(1*SZREG)(SRCREG)
+ REG_L t0,(2*SZREG)(SRCREG)
+ REG_L t1,(3*SZREG)(SRCREG)
+ PTR_ADDU SRCREG,SZREG*8
+ REG_S t3,(0*SZREG)(DSTREG)
+ REG_S v1,(1*SZREG)(DSTREG)
+ REG_S t0,(2*SZREG)(DSTREG)
+ REG_S t1,(3*SZREG)(DSTREG)
+ REG_L t1,(-1*SZREG)(SRCREG)
+ REG_L t0,(-2*SZREG)(SRCREG)
+ REG_L v1,(-3*SZREG)(SRCREG)
+ REG_L t3,(-4*SZREG)(SRCREG)
+ PTR_ADDU DSTREG,SZREG*8
+ REG_S t1,(-1*SZREG)(DSTREG)
+ REG_S t0,(-2*SZREG)(DSTREG)
+ REG_S v1,(-3*SZREG)(DSTREG)
+ bne SRCREG,a3,1b
+ REG_S t3,(-4*SZREG)(DSTREG)
+
+ /*
+ * Copy a word at a time, no loop unrolling.
+ */
+2: # wordcopy
+ andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG
+ PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG
+ beq t2,zero,3f
+ PTR_ADDU t0,SRCREG,t2 # stop at t0
+ PTR_SUBU SIZEREG,SIZEREG,t2
+1:
+ REG_L t3,0(SRCREG)
+ PTR_ADDU SRCREG,SZREG
+ REG_S t3,0(DSTREG)
+ bne SRCREG,t0,1b
+ PTR_ADDU DSTREG,SZREG
+
+3: # bytecopy
+ beq SIZEREG,zero,4f # nothing left to do?
+ nop
+1:
+ lb t3,0(SRCREG)
+ PTR_ADDU SRCREG,1
+ sb t3,0(DSTREG)
+ PTR_SUBU SIZEREG,1
+ bgtz SIZEREG,1b
+ PTR_ADDU DSTREG,1
+
+4: # copydone
+ j ra
+ nop
+
+ /*
+ * Copy from unaligned source to aligned dest.
+ */
+5: # destaligned
+ andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG
+ PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
+ beq a3,zero,3b
+ nop
+ move SIZEREG,t0 # this many to do after we are done
+ PTR_ADDU a3,SRCREG,a3 # stop point
+
+1:
+ REG_LHI t3,0(SRCREG)
+ REG_LLO t3,SZREG-1(SRCREG)
+ PTR_ADDI SRCREG,SZREG
+ REG_S t3,0(DSTREG)
+ bne SRCREG,a3,1b
+ PTR_ADDI DSTREG,SZREG
+
+ b 3b
+ nop
+
+6: # backcopy -- based on above
+ PTR_ADDU SRCREG,SIZEREG
+ PTR_ADDU DSTREG,SIZEREG
+ andi t1,DSTREG,SZREG-1 # get last 3 bits of dest
+ bne t1,zero,3f
+ andi t0,SRCREG,SZREG-1 # get last 3 bits of src
+ bne t0,zero,5f
+
+ /*
+ * Forward aligned->aligned copy, 8*4 bytes at a time.
+ */
+ li AT,(-8*SZREG)
+ and t0,SIZEREG,AT # count truncated to multiple of 32
+ beq t0,zero,2f # any work to do?
+ PTR_SUBU SIZEREG,t0
+ PTR_SUBU a3,SRCREG,t0
+
+ /*
+ * loop body
+ */
+1: # cp
+ REG_L t3,(-4*SZREG)(SRCREG)
+ REG_L v1,(-3*SZREG)(SRCREG)
+ REG_L t0,(-2*SZREG)(SRCREG)
+ REG_L t1,(-1*SZREG)(SRCREG)
+ PTR_SUBU SRCREG,8*SZREG
+ REG_S t3,(-4*SZREG)(DSTREG)
+ REG_S v1,(-3*SZREG)(DSTREG)
+ REG_S t0,(-2*SZREG)(DSTREG)
+ REG_S t1,(-1*SZREG)(DSTREG)
+ REG_L t1,(3*SZREG)(SRCREG)
+ REG_L t0,(2*SZREG)(SRCREG)
+ REG_L v1,(1*SZREG)(SRCREG)
+ REG_L t3,(0*SZREG)(SRCREG)
+ PTR_SUBU DSTREG,8*SZREG
+ REG_S t1,(3*SZREG)(DSTREG)
+ REG_S t0,(2*SZREG)(DSTREG)
+ REG_S v1,(1*SZREG)(DSTREG)
+ bne SRCREG,a3,1b
+ REG_S t3,(0*SZREG)(DSTREG)
+
+ /*
+ * Copy a word at a time, no loop unrolling.
+ */
+2: # wordcopy
+ andi t2,SIZEREG,SZREG-1 # get byte count / 4
+ PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy
+ beq t2,zero,3f
+ PTR_SUBU t0,SRCREG,t2 # stop at t0
+ PTR_SUBU SIZEREG,SIZEREG,t2
+1:
+ REG_L t3,-SZREG(SRCREG)
+ PTR_SUBU SRCREG,SZREG
+ REG_S t3,-SZREG(DSTREG)
+ bne SRCREG,t0,1b
+ PTR_SUBU DSTREG,SZREG
+
+3: # bytecopy
+ beq SIZEREG,zero,4f # nothing left to do?
+ nop
+1:
+ lb t3,-1(SRCREG)
+ PTR_SUBU SRCREG,1
+ sb t3,-1(DSTREG)
+ PTR_SUBU SIZEREG,1
+ bgtz SIZEREG,1b
+ PTR_SUBU DSTREG,1
+
+4: # copydone
+ j ra
+ nop
+
+ /*
+ * Copy from unaligned source to aligned dest.
+ */
+5: # destaligned
+ andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4
+ PTR_SUBU a3,SIZEREG,t0 # number of words to transfer
+ beq a3,zero,3b
+ nop
+ move SIZEREG,t0 # this many to do after we are done
+ PTR_SUBU a3,SRCREG,a3 # stop point
+
+1:
+ REG_LHI t3,-SZREG(SRCREG)
+ REG_LLO t3,-1(SRCREG)
+ PTR_SUBU SRCREG,SZREG
+ REG_S t3,-SZREG(DSTREG)
+ bne SRCREG,a3,1b
+ PTR_SUBU DSTREG,SZREG
+
+ b 3b
+ nop
+
+ .set reorder
+ .set at
+END(memcpy)
diff --git a/sys/mips/mips/support.S b/sys/mips/mips/support.S
index 7acebf0..4280000 100644
--- a/sys/mips/mips/support.S
+++ b/sys/mips/mips/support.S
@@ -507,98 +507,6 @@ LEAF(fswintrberr)
END(fswintrberr)
/*
- * memcpy(to, from, len)
- * {ov}bcopy(from, to, len)
- */
-LEAF(memcpy)
- .set noreorder
- move v0, a0 # swap from and to
- move a0, a1
- move a1, v0
-ALEAF(bcopy)
-ALEAF(ovbcopy)
- .set noreorder
- PTR_ADDU t0, a0, a2 # t0 = end of s1 region
- sltu t1, a1, t0
- sltu t2, a0, a1
- and t1, t1, t2 # t1 = true if from < to < (from+len)
- beq t1, zero, forward # non overlapping, do forward copy
- slt t2, a2, 12 # check for small copy
-
- ble a2, zero, 2f
- PTR_ADDU t1, a1, a2 # t1 = end of to region
-1:
- lb v1, -1(t0) # copy bytes backwards,
- PTR_SUBU t0, t0, 1 # doesnt happen often so do slow way
- PTR_SUBU t1, t1, 1
- bne t0, a0, 1b
- sb v1, 0(t1)
-2:
- j ra
- nop
-forward:
- bne t2, zero, smallcpy # do a small bcopy
- xor v1, a0, a1 # compare low two bits of addresses
- and v1, v1, 3
- PTR_SUBU a3, zero, a1 # compute # bytes to word align address
- beq v1, zero, aligned # addresses can be word aligned
- and a3, a3, 3
-
- beq a3, zero, 1f
- PTR_SUBU a2, a2, a3 # subtract from remaining count
- LWHI v1, 0(a0) # get next 4 bytes (unaligned)
- LWLO v1, 3(a0)
- PTR_ADDU a0, a0, a3
- SWHI v1, 0(a1) # store 1, 2, or 3 bytes to align a1
- PTR_ADDU a1, a1, a3
-1:
- and v1, a2, 3 # compute number of words left
- PTR_SUBU a3, a2, v1
- move a2, v1
- PTR_ADDU a3, a3, a0 # compute ending address
-2:
- LWHI v1, 0(a0) # copy words a0 unaligned, a1 aligned
- LWLO v1, 3(a0)
- PTR_ADDU a0, a0, 4
- sw v1, 0(a1)
- PTR_ADDU a1, a1, 4
- bne a0, a3, 2b
- nop # We have to do this mmu-bug.
- b smallcpy
- nop
-aligned:
- beq a3, zero, 1f
- PTR_SUBU a2, a2, a3 # subtract from remaining count
- LWHI v1, 0(a0) # copy 1, 2, or 3 bytes to align
- PTR_ADDU a0, a0, a3
- SWHI v1, 0(a1)
- PTR_ADDU a1, a1, a3
-1:
- and v1, a2, 3 # compute number of whole words left
- PTR_SUBU a3, a2, v1
- move a2, v1
- PTR_ADDU a3, a3, a0 # compute ending address
-2:
- lw v1, 0(a0) # copy words
- PTR_ADDU a0, a0, 4
- sw v1, 0(a1)
- bne a0, a3, 2b
- PTR_ADDU a1, a1, 4
-smallcpy:
- ble a2, zero, 2f
- PTR_ADDU a3, a2, a0 # compute ending address
-1:
- lbu v1, 0(a0) # copy bytes
- PTR_ADDU a0, a0, 1
- sb v1, 0(a1)
- bne a0, a3, 1b
- PTR_ADDU a1, a1, 1 # MMU BUG ? can not do -1(a1) at 0x80000000!!
-2:
- j ra
- nop
-END(memcpy)
-
-/*
* memset(void *s1, int c, int len)
* NetBSD: memset.S,v 1.3 2001/10/16 15:40:53 uch Exp
*/
OpenPOWER on IntegriCloud