From c50aa1163b69fcc4a3e96fc50fc21650d7007875 Mon Sep 17 00:00:00 2001 From: gnn Date: Thu, 21 Jul 2011 16:32:13 +0000 Subject: Make both stpcpy and strcpy be assembly language implementations on amd64. Submitted by: Guillaume Morin (guillaume at morinfr.org) Reviewed by: kib, jhb Approved by: re (bz) MFC after: 1 month --- lib/libc/amd64/string/Makefile.inc | 2 +- lib/libc/amd64/string/stpcpy.S | 116 +++++++++++++++++++++++++++++++++++++ lib/libc/amd64/string/strcpy.S | 114 ------------------------------------ lib/libc/amd64/string/strcpy.c | 38 ++++++++++++ 4 files changed, 155 insertions(+), 115 deletions(-) create mode 100644 lib/libc/amd64/string/stpcpy.S delete mode 100644 lib/libc/amd64/string/strcpy.S create mode 100644 lib/libc/amd64/string/strcpy.c (limited to 'lib/libc') diff --git a/lib/libc/amd64/string/Makefile.inc b/lib/libc/amd64/string/Makefile.inc index f5d69d6..46571ab 100644 --- a/lib/libc/amd64/string/Makefile.inc +++ b/lib/libc/amd64/string/Makefile.inc @@ -1,4 +1,4 @@ # $FreeBSD$ MDSRCS+= bcmp.S bcopy.S bzero.S memcmp.S memcpy.S memmove.S memset.S \ - strcat.S strcmp.S strcpy.S + strcat.S strcmp.S stpcpy.S strcpy.c diff --git a/lib/libc/amd64/string/stpcpy.S b/lib/libc/amd64/string/stpcpy.S new file mode 100644 index 0000000..c8772f8 --- /dev/null +++ b/lib/libc/amd64/string/stpcpy.S @@ -0,0 +1,116 @@ +/* + * Adapted by Guillaume Morin from strcpy.S + * written by J.T. Conklin + * Public domain. + */ + +#include +__FBSDID("$FreeBSD$"); + +/* + * This stpcpy implementation copies a byte at a time until the + * source pointer is aligned to a word boundary, it then copies by + * words until it finds a word containing a zero byte, and finally + * copies by bytes until the end of the string is reached. + * + * While this may result in unaligned stores if the source and + * destination pointers are unaligned with respect to each other, + * it is still faster than either byte copies or the overhead of + * an implementation suitable for machines with strict alignment + * requirements. + */ + + .globl stpcpy,__stpcpy +ENTRY(stpcpy) +__stpcpy: + movabsq $0x0101010101010101,%r8 + movabsq $0x8080808080808080,%r9 + + /* + * Align source to a word boundary. + * Consider unrolling loop? + */ +.Lalign: + testb $7,%sil + je .Lword_aligned + movb (%rsi),%dl + incq %rsi + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl + jne .Lalign + movq %rdi,%rax + dec %rax + ret + + .p2align 4 +.Lloop: + movq %rdx,(%rdi) + addq $8,%rdi +.Lword_aligned: + movq (%rsi),%rdx + movq %rdx,%rcx + addq $8,%rsi + subq %r8,%rcx + testq %r9,%rcx + je .Lloop + + /* + * In rare cases, the above loop may exit prematurely. We must + * return to the loop if none of the bytes in the word equal 0. + */ + + movb %dl,(%rdi) + testb %dl,%dl /* 1st byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + testb %dl,%dl /* 2nd byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + testb %dl,%dl /* 3rd byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + testb %dl,%dl /* 4th byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + testb %dl,%dl /* 5th byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + testb %dl,%dl /* 6th byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + testb %dl,%dl /* 7th byte == 0? */ + je .Ldone + incq %rdi + + shrq $8,%rdx + movb %dl,(%rdi) + incq %rdi + testb %dl,%dl /* 8th byte == 0? */ + jne .Lword_aligned + decq %rdi + +.Ldone: + movq %rdi,%rax + ret +END(stpcpy) + + .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strcpy.S b/lib/libc/amd64/string/strcpy.S deleted file mode 100644 index 5feb925..0000000 --- a/lib/libc/amd64/string/strcpy.S +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Written by J.T. Conklin - * Public domain. - */ - -#include -__FBSDID("$FreeBSD$"); - -#if 0 - RCSID("$NetBSD: strcpy.S,v 1.3 2004/07/19 20:04:41 drochner Exp $") -#endif - -/* - * This strcpy implementation copies a byte at a time until the - * source pointer is aligned to a word boundary, it then copies by - * words until it finds a word containing a zero byte, and finally - * copies by bytes until the end of the string is reached. - * - * While this may result in unaligned stores if the source and - * destination pointers are unaligned with respect to each other, - * it is still faster than either byte copies or the overhead of - * an implementation suitable for machines with strict alignment - * requirements. - */ - -ENTRY(strcpy) - movq %rdi,%rax - movabsq $0x0101010101010101,%r8 - movabsq $0x8080808080808080,%r9 - - /* - * Align source to a word boundary. - * Consider unrolling loop? - */ -.Lalign: - testb $7,%sil - je .Lword_aligned - movb (%rsi),%dl - incq %rsi - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl - jne .Lalign - ret - - .p2align 4 -.Lloop: - movq %rdx,(%rdi) - addq $8,%rdi -.Lword_aligned: - movq (%rsi),%rdx - movq %rdx,%rcx - addq $8,%rsi - subq %r8,%rcx - testq %r9,%rcx - je .Lloop - - /* - * In rare cases, the above loop may exit prematurely. We must - * return to the loop if none of the bytes in the word equal 0. - */ - - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 1st byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 2nd byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 3rd byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 4th byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 5th byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 6th byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 7th byte == 0? */ - je .Ldone - - shrq $8,%rdx - movb %dl,(%rdi) - incq %rdi - testb %dl,%dl /* 8th byte == 0? */ - jne .Lword_aligned - -.Ldone: - ret -END(strcpy) - - .section .note.GNU-stack,"",%progbits diff --git a/lib/libc/amd64/string/strcpy.c b/lib/libc/amd64/string/strcpy.c new file mode 100644 index 0000000..11a24eb --- /dev/null +++ b/lib/libc/amd64/string/strcpy.c @@ -0,0 +1,38 @@ +/* + * Copyright 2011 George V. Neville-Neil. All rights reserved. + * + * The compilation of software known as FreeBSD is distributed under the + * following terms: + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +char *__stpcpy(char * __restrict, const char * __restrict); + +char * +strcpy(char * __restrict to, const char * __restrict from) +{ + __stpcpy(to, from); + return(to); +} -- cgit v1.1