diff options
author | jchandra <jchandra@FreeBSD.org> | 2013-09-07 16:31:30 +0000 |
---|---|---|
committer | jchandra <jchandra@FreeBSD.org> | 2013-09-07 16:31:30 +0000 |
commit | 7618cc8395426b79f319af2310c0d0fd3b4bed02 (patch) | |
tree | 0deb85f5bb7feb7eebf16866191088d03b20fc72 /sys/mips | |
parent | e372a1314d9ca593e933d34f7f33e4ac9ded62e1 (diff) | |
download | FreeBSD-src-7618cc8395426b79f319af2310c0d0fd3b4bed02.zip FreeBSD-src-7618cc8395426b79f319af2310c0d0fd3b4bed02.tar.gz |
Use a better version of memcpy/bcopy for mips kernel.
Use a variant of mips libc memcpy for kernel. This implementation uses
64-bit operations when compiled for 64-bit, and is significantly faster
in that case.
Submitted by: Tanmay Jagdale <tanmayj@broadcom.com>
Diffstat (limited to 'sys/mips')
-rw-r--r-- | sys/mips/mips/bcopy.S | 286 | ||||
-rw-r--r-- | sys/mips/mips/support.S | 92 |
2 files changed, 286 insertions, 92 deletions
diff --git a/sys/mips/mips/bcopy.S b/sys/mips/mips/bcopy.S new file mode 100644 index 0000000..a7ac1f2 --- /dev/null +++ b/sys/mips/mips/bcopy.S @@ -0,0 +1,286 @@ +/* $NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $ */ + +/* + * Mach Operating System + * Copyright (c) 1993 Carnegie Mellon University + * All Rights Reserved. + * + * Permission to use, copy, modify and distribute this software and its + * documentation is hereby granted, provided that both the copyright + * notice and this permission notice appear in all copies of the + * software, derivative works or modified versions, and any portions + * thereof, and that both notices appear in supporting documentation. + * + * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" + * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR + * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. + * + * Carnegie Mellon requests users of this software to return to + * + * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU + * School of Computer Science + * Carnegie Mellon University + * Pittsburgh PA 15213-3890 + * + * any improvements or extensions that they make and grant Carnegie Mellon + * the rights to redistribute these changes. + */ + +/* + * File: mips_bcopy.s + * Author: Chris Maeda + * Date: June 1993 + * + * Fast copy routine. Derived from aligned_block_copy. + */ + + +#include <machine/asm.h> +__FBSDID("$FreeBSD$"); + +#include <machine/endian.h> + +#if defined(LIBC_SCCS) && !defined(lint) +#if 0 + ASMSTR("from: @(#)mips_bcopy.s 2.2 CMU 18/06/93") +#else + ASMSTR("$NetBSD: bcopy.S,v 1.3 2009/12/14 00:39:00 matt Exp $") +#endif +#endif /* LIBC_SCCS and not lint */ + +#ifdef __ABICALLS__ + .abicalls +#endif + +/* + * bcopy(caddr_t src, caddr_t dst, unsigned int len) + * + * a0 src address + * a1 dst address + * a2 length + */ + +#define SRCREG a0 +#define DSTREG a1 +#define SIZEREG a2 + +LEAF(memcpy) + .set noat + .set noreorder + + move v0, a0 + move a0, a1 + move a1, v0 + +ALEAF(bcopy) +ALEAF(ovbcopy) + /* + * Make sure we can copy forwards. + */ + sltu t0,SRCREG,DSTREG # t0 == SRCREG < DSTREG + bne t0,zero,6f # copy backwards + + /* + * There are four alignment cases (with frequency) + * (Based on measurements taken with a DECstation 5000/200 + * inside a Mach kernel.) + * + * aligned -> aligned (mostly) + * unaligned -> aligned (sometimes) + * aligned,unaligned -> unaligned (almost never) + * + * Note that we could add another case that checks if + * the destination and source are unaligned but the + * copy is alignable. eg if src and dest are both + * on a halfword boundary. + */ + andi t1,DSTREG,(SZREG-1) # get last bits of dest + bne t1,zero,3f # dest unaligned + andi t0,SRCREG,(SZREG-1) # get last bits of src + bne t0,zero,5f + + /* + * Forward aligned->aligned copy, 8 words at a time. + */ +98: + li AT,-(SZREG*8) + and t0,SIZEREG,AT # count truncated to multiples + PTR_ADDU a3,SRCREG,t0 # run fast loop up to this addr + sltu AT,SRCREG,a3 # any work to do? + beq AT,zero,2f + PTR_SUBU SIZEREG,t0 + + /* + * loop body + */ +1: # cp + REG_L t3,(0*SZREG)(SRCREG) + REG_L v1,(1*SZREG)(SRCREG) + REG_L t0,(2*SZREG)(SRCREG) + REG_L t1,(3*SZREG)(SRCREG) + PTR_ADDU SRCREG,SZREG*8 + REG_S t3,(0*SZREG)(DSTREG) + REG_S v1,(1*SZREG)(DSTREG) + REG_S t0,(2*SZREG)(DSTREG) + REG_S t1,(3*SZREG)(DSTREG) + REG_L t1,(-1*SZREG)(SRCREG) + REG_L t0,(-2*SZREG)(SRCREG) + REG_L v1,(-3*SZREG)(SRCREG) + REG_L t3,(-4*SZREG)(SRCREG) + PTR_ADDU DSTREG,SZREG*8 + REG_S t1,(-1*SZREG)(DSTREG) + REG_S t0,(-2*SZREG)(DSTREG) + REG_S v1,(-3*SZREG)(DSTREG) + bne SRCREG,a3,1b + REG_S t3,(-4*SZREG)(DSTREG) + + /* + * Copy a word at a time, no loop unrolling. + */ +2: # wordcopy + andi t2,SIZEREG,(SZREG-1) # get byte count / SZREG + PTR_SUBU t2,SIZEREG,t2 # t2 = words to copy * SZREG + beq t2,zero,3f + PTR_ADDU t0,SRCREG,t2 # stop at t0 + PTR_SUBU SIZEREG,SIZEREG,t2 +1: + REG_L t3,0(SRCREG) + PTR_ADDU SRCREG,SZREG + REG_S t3,0(DSTREG) + bne SRCREG,t0,1b + PTR_ADDU DSTREG,SZREG + +3: # bytecopy + beq SIZEREG,zero,4f # nothing left to do? + nop +1: + lb t3,0(SRCREG) + PTR_ADDU SRCREG,1 + sb t3,0(DSTREG) + PTR_SUBU SIZEREG,1 + bgtz SIZEREG,1b + PTR_ADDU DSTREG,1 + +4: # copydone + j ra + nop + + /* + * Copy from unaligned source to aligned dest. + */ +5: # destaligned + andi t0,SIZEREG,(SZREG-1) # t0 = bytecount mod SZREG + PTR_SUBU a3,SIZEREG,t0 # number of words to transfer + beq a3,zero,3b + nop + move SIZEREG,t0 # this many to do after we are done + PTR_ADDU a3,SRCREG,a3 # stop point + +1: + REG_LHI t3,0(SRCREG) + REG_LLO t3,SZREG-1(SRCREG) + PTR_ADDI SRCREG,SZREG + REG_S t3,0(DSTREG) + bne SRCREG,a3,1b + PTR_ADDI DSTREG,SZREG + + b 3b + nop + +6: # backcopy -- based on above + PTR_ADDU SRCREG,SIZEREG + PTR_ADDU DSTREG,SIZEREG + andi t1,DSTREG,SZREG-1 # get last 3 bits of dest + bne t1,zero,3f + andi t0,SRCREG,SZREG-1 # get last 3 bits of src + bne t0,zero,5f + + /* + * Forward aligned->aligned copy, 8*4 bytes at a time. + */ + li AT,(-8*SZREG) + and t0,SIZEREG,AT # count truncated to multiple of 32 + beq t0,zero,2f # any work to do? + PTR_SUBU SIZEREG,t0 + PTR_SUBU a3,SRCREG,t0 + + /* + * loop body + */ +1: # cp + REG_L t3,(-4*SZREG)(SRCREG) + REG_L v1,(-3*SZREG)(SRCREG) + REG_L t0,(-2*SZREG)(SRCREG) + REG_L t1,(-1*SZREG)(SRCREG) + PTR_SUBU SRCREG,8*SZREG + REG_S t3,(-4*SZREG)(DSTREG) + REG_S v1,(-3*SZREG)(DSTREG) + REG_S t0,(-2*SZREG)(DSTREG) + REG_S t1,(-1*SZREG)(DSTREG) + REG_L t1,(3*SZREG)(SRCREG) + REG_L t0,(2*SZREG)(SRCREG) + REG_L v1,(1*SZREG)(SRCREG) + REG_L t3,(0*SZREG)(SRCREG) + PTR_SUBU DSTREG,8*SZREG + REG_S t1,(3*SZREG)(DSTREG) + REG_S t0,(2*SZREG)(DSTREG) + REG_S v1,(1*SZREG)(DSTREG) + bne SRCREG,a3,1b + REG_S t3,(0*SZREG)(DSTREG) + + /* + * Copy a word at a time, no loop unrolling. + */ +2: # wordcopy + andi t2,SIZEREG,SZREG-1 # get byte count / 4 + PTR_SUBU t2,SIZEREG,t2 # t2 = number of words to copy + beq t2,zero,3f + PTR_SUBU t0,SRCREG,t2 # stop at t0 + PTR_SUBU SIZEREG,SIZEREG,t2 +1: + REG_L t3,-SZREG(SRCREG) + PTR_SUBU SRCREG,SZREG + REG_S t3,-SZREG(DSTREG) + bne SRCREG,t0,1b + PTR_SUBU DSTREG,SZREG + +3: # bytecopy + beq SIZEREG,zero,4f # nothing left to do? + nop +1: + lb t3,-1(SRCREG) + PTR_SUBU SRCREG,1 + sb t3,-1(DSTREG) + PTR_SUBU SIZEREG,1 + bgtz SIZEREG,1b + PTR_SUBU DSTREG,1 + +4: # copydone + j ra + nop + + /* + * Copy from unaligned source to aligned dest. + */ +5: # destaligned + andi t0,SIZEREG,SZREG-1 # t0 = bytecount mod 4 + PTR_SUBU a3,SIZEREG,t0 # number of words to transfer + beq a3,zero,3b + nop + move SIZEREG,t0 # this many to do after we are done + PTR_SUBU a3,SRCREG,a3 # stop point + +1: + REG_LHI t3,-SZREG(SRCREG) + REG_LLO t3,-1(SRCREG) + PTR_SUBU SRCREG,SZREG + REG_S t3,-SZREG(DSTREG) + bne SRCREG,a3,1b + PTR_SUBU DSTREG,SZREG + + b 3b + nop + + .set reorder + .set at +END(memcpy) diff --git a/sys/mips/mips/support.S b/sys/mips/mips/support.S index 7acebf0..4280000 100644 --- a/sys/mips/mips/support.S +++ b/sys/mips/mips/support.S @@ -507,98 +507,6 @@ LEAF(fswintrberr) END(fswintrberr) /* - * memcpy(to, from, len) - * {ov}bcopy(from, to, len) - */ -LEAF(memcpy) - .set noreorder - move v0, a0 # swap from and to - move a0, a1 - move a1, v0 -ALEAF(bcopy) -ALEAF(ovbcopy) - .set noreorder - PTR_ADDU t0, a0, a2 # t0 = end of s1 region - sltu t1, a1, t0 - sltu t2, a0, a1 - and t1, t1, t2 # t1 = true if from < to < (from+len) - beq t1, zero, forward # non overlapping, do forward copy - slt t2, a2, 12 # check for small copy - - ble a2, zero, 2f - PTR_ADDU t1, a1, a2 # t1 = end of to region -1: - lb v1, -1(t0) # copy bytes backwards, - PTR_SUBU t0, t0, 1 # doesnt happen often so do slow way - PTR_SUBU t1, t1, 1 - bne t0, a0, 1b - sb v1, 0(t1) -2: - j ra - nop -forward: - bne t2, zero, smallcpy # do a small bcopy - xor v1, a0, a1 # compare low two bits of addresses - and v1, v1, 3 - PTR_SUBU a3, zero, a1 # compute # bytes to word align address - beq v1, zero, aligned # addresses can be word aligned - and a3, a3, 3 - - beq a3, zero, 1f - PTR_SUBU a2, a2, a3 # subtract from remaining count - LWHI v1, 0(a0) # get next 4 bytes (unaligned) - LWLO v1, 3(a0) - PTR_ADDU a0, a0, a3 - SWHI v1, 0(a1) # store 1, 2, or 3 bytes to align a1 - PTR_ADDU a1, a1, a3 -1: - and v1, a2, 3 # compute number of words left - PTR_SUBU a3, a2, v1 - move a2, v1 - PTR_ADDU a3, a3, a0 # compute ending address -2: - LWHI v1, 0(a0) # copy words a0 unaligned, a1 aligned - LWLO v1, 3(a0) - PTR_ADDU a0, a0, 4 - sw v1, 0(a1) - PTR_ADDU a1, a1, 4 - bne a0, a3, 2b - nop # We have to do this mmu-bug. - b smallcpy - nop -aligned: - beq a3, zero, 1f - PTR_SUBU a2, a2, a3 # subtract from remaining count - LWHI v1, 0(a0) # copy 1, 2, or 3 bytes to align - PTR_ADDU a0, a0, a3 - SWHI v1, 0(a1) - PTR_ADDU a1, a1, a3 -1: - and v1, a2, 3 # compute number of whole words left - PTR_SUBU a3, a2, v1 - move a2, v1 - PTR_ADDU a3, a3, a0 # compute ending address -2: - lw v1, 0(a0) # copy words - PTR_ADDU a0, a0, 4 - sw v1, 0(a1) - bne a0, a3, 2b - PTR_ADDU a1, a1, 4 -smallcpy: - ble a2, zero, 2f - PTR_ADDU a3, a2, a0 # compute ending address -1: - lbu v1, 0(a0) # copy bytes - PTR_ADDU a0, a0, 1 - sb v1, 0(a1) - bne a0, a3, 1b - PTR_ADDU a1, a1, 1 # MMU BUG ? can not do -1(a1) at 0x80000000!! -2: - j ra - nop -END(memcpy) - -/* * memset(void *s1, int c, int len) * NetBSD: memset.S,v 1.3 2001/10/16 15:40:53 uch Exp */ |