summaryrefslogtreecommitdiffstats
path: root/lib/libc/amd64/string/strcat.S
blob: 7b5a1dd39cd3063edfd18fafc24232e6016409f8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/*
 * Written by J.T. Conklin <jtc@acorntoolworks.com>
 * Public domain.
 */

#include <machine/asm.h>
__FBSDID("$FreeBSD$");

#if 0
	RCSID("$NetBSD: strcat.S,v 1.4 2004/07/26 18:51:21 drochner Exp $")
#endif

ENTRY(strcat)
	movq	%rdi,%rax
	movabsq	$0x0101010101010101,%r8
	movabsq	$0x8080808080808080,%r9

	/*
	 * Align destination to word boundary.
	 * Consider unrolling loop?
	 */
.Lscan:
.Lscan_align:
	testb	$7,%dil
	je	.Lscan_aligned
	cmpb	$0,(%rdi)
	je	.Lcopy
	incq	%rdi
	jmp	.Lscan_align

	.align	4
.Lscan_aligned:
.Lscan_loop:
	movq	(%rdi),%rdx
	addq	$8,%rdi
	subq	%r8,%rdx
	testq	%r9,%rdx
	je	.Lscan_loop

	/*
	 * In rare cases, the above loop may exit prematurely. We must
	 * return to the loop if none of the bytes in the word equal 0.
	 */

	cmpb	$0,-8(%rdi)	/* 1st byte == 0? */
	jne	1f
	subq	$8,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-7(%rdi)	/* 2nd byte == 0? */
	jne	1f
	subq	$7,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-6(%rdi)	/* 3rd byte == 0? */
	jne	1f
	subq	$6,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-5(%rdi)	/* 4th byte == 0? */
	jne	1f
	subq	$5,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-4(%rdi)	/* 5th byte == 0? */
	jne	1f
	subq	$4,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-3(%rdi)	/* 6th byte == 0? */
	jne	1f
	subq	$3,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-2(%rdi)	/* 7th byte == 0? */
	jne	1f
	subq	$2,%rdi
	jmp	.Lcopy

1:	cmpb	$0,-1(%rdi)	/* 8th byte == 0? */
	jne	.Lscan_loop
	subq	$1,%rdi

	/*
	 * Align source to a word boundary.
	 * Consider unrolling loop?
	 */
.Lcopy:
.Lcopy_align:
	testb	$7,%sil
	je	.Lcopy_aligned
	movb	(%rsi),%dl
	incq	%rsi
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl
	jne	.Lcopy_align
	ret

	.align	4
.Lcopy_loop:
	movq	%rdx,(%rdi)
	addq	$8,%rdi
.Lcopy_aligned:
	movq	(%rsi),%rdx
	movq	%rdx,%rcx
	addq	$8,%rsi
	subq	%r8,%rcx
	testq	%r9,%rcx
	je	.Lcopy_loop

	/*
	 * In rare cases, the above loop may exit prematurely. We must
	 * return to the loop if none of the bytes in the word equal 0.
	 */

	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 1st byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 2nd byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 3rd byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 4th byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 5th byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 6th byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 7th byte == 0? */
	je	.Ldone

	shrq	$8,%rdx
	movb	%dl,(%rdi)
	incq	%rdi
	testb	%dl,%dl		/* 8th byte == 0? */
	jne	.Lcopy_aligned

.Ldone:
	ret
END(strcat)

	.section .note.GNU-stack,"",%progbits
OpenPOWER on IntegriCloud