diff options
Diffstat (limited to 'arch/mn10300/lib/memcpy.S')
-rw-r--r-- | arch/mn10300/lib/memcpy.S | 135 |
1 files changed, 135 insertions, 0 deletions
diff --git a/arch/mn10300/lib/memcpy.S b/arch/mn10300/lib/memcpy.S new file mode 100644 index 0000000..25fb9bb --- /dev/null +++ b/arch/mn10300/lib/memcpy.S @@ -0,0 +1,135 @@ +/* MN10300 Optimised simple memory to memory copy + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#include <asm/cache.h> + + .section .text + .balign L1_CACHE_BYTES + +############################################################################### +# +# void *memcpy(void *dst, const void *src, size_t n) +# +############################################################################### + .globl memcpy + .type memcpy,@function +memcpy: + movm [d2,d3],(sp) + mov d0,(12,sp) + mov d1,(16,sp) + mov (20,sp),d2 # count + mov d0,a0 # dst + mov d1,a1 # src + mov d0,e3 # the return value + + cmp +0,d2 + beq memcpy_done # return if zero-length copy + + # see if the three parameters are all four-byte aligned + or d0,d1,d3 + or d2,d3 + and +3,d3 + bne memcpy_1 # jump if not + + # we want to transfer as much as we can in chunks of 32 bytes + cmp +31,d2 + bls memcpy_4_remainder # 4-byte aligned remainder + + movm [exreg1],(sp) + add -32,d2 + mov +32,d3 + +memcpy_4_loop: + mov (a1+),d0 + mov (a1+),d1 + mov (a1+),e0 + mov (a1+),e1 + mov (a1+),e4 + mov (a1+),e5 + mov (a1+),e6 + mov (a1+),e7 + mov d0,(a0+) + mov d1,(a0+) + mov e0,(a0+) + mov e1,(a0+) + mov e4,(a0+) + mov e5,(a0+) + mov e6,(a0+) + mov e7,(a0+) + + sub d3,d2 + bcc memcpy_4_loop + + movm (sp),[exreg1] + add d3,d2 + beq memcpy_4_no_remainder + +memcpy_4_remainder: + # cut 4-7 words down to 0-3 + cmp +16,d2 + bcs memcpy_4_three_or_fewer_words + mov (a1+),d0 + mov (a1+),d1 + mov (a1+),e0 + mov (a1+),e1 + mov d0,(a0+) + mov d1,(a0+) + mov e0,(a0+) + mov e1,(a0+) + add -16,d2 + beq memcpy_4_no_remainder + + # copy the remaining 1, 2 or 3 words +memcpy_4_three_or_fewer_words: + cmp +8,d2 + bcs memcpy_4_one_word + beq memcpy_4_two_words + mov (a1+),d0 + mov d0,(a0+) +memcpy_4_two_words: + mov (a1+),d0 + mov d0,(a0+) +memcpy_4_one_word: + mov (a1+),d0 + mov d0,(a0+) + +memcpy_4_no_remainder: + # check we copied the correct amount + # TODO: REMOVE CHECK + sub e3,a0,d2 + mov (20,sp),d1 + cmp d2,d1 + beq memcpy_done + break + break + break + +memcpy_done: + mov e3,a0 + ret [d2,d3],8 + + # handle misaligned copying +memcpy_1: + add -1,d2 + mov +1,d3 + setlb # setlb requires the next insns + # to occupy exactly 4 bytes + + sub d3,d2 + movbu (a1),d0 + movbu d0,(a0) + add_add d3,a1,d3,a0 + lcc + + mov e3,a0 + ret [d2,d3],8 + +memcpy_end: + .size memcpy, memcpy_end-memcpy |