common/lib/libc/arch/i386/string/strcpy.S


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87

/*
 * Written by J.T. Conklin <jtc@acorntoolworks.com>
 * Public domain.
 */

#include <machine/asm.h>

#if defined(LIBC_SCCS)
	RCSID("$NetBSD: strcpy.S,v 1.2 2014/03/22 19:38:46 jakllsch Exp $")
#endif

/*
 * This strcpy implementation copies a byte at a time until the
 * source pointer is aligned to a word boundary, it then copies by
 * words until it finds a word containing a zero byte, and finally
 * copies by bytes until the end of the string is reached.
 *
 * While this may result in unaligned stores if the source and
 * destination pointers are unaligned with respect to each other,
 * it is still faster than either byte copies or the overhead of
 * an implementation suitable for machines with strict alignment
 * requirements.
 */

ENTRY(strcpy)
	pushl	%ebx
	movl	8(%esp),%ecx
	movl	12(%esp),%eax

	/*
	 * Align source to a word boundary.
	 * Consider unrolling loop?
	 */
	_ALIGN_TEXT
.Lalign:
	testl	$3,%eax
	je	.Lword_aligned
	movb	(%eax),%bl
	incl	%eax
	movb	%bl,(%ecx)
	incl	%ecx
	testb	%bl,%bl
	jne	.Lalign
	jmp	.Ldone

	_ALIGN_TEXT
.Lloop:
	movl	%ebx,(%ecx)
	addl	$4,%ecx
.Lword_aligned:
	movl	(%eax),%ebx
	addl	$4,%eax
	leal	-0x01010101(%ebx),%edx
	testl	$0x80808080,%edx
	je	.Lloop

	/*
	 * In rare cases, the above loop may exit prematurely. We must
	 * return to the loop if none of the bytes in the word equal 0.
	 */

	movb	%bl,(%ecx)
	incl	%ecx
	testb	%bl,%bl
	je	.Ldone

	movb	%bh,(%ecx)
	incl	%ecx
	testb	%bh,%bh
	je	.Ldone

	shrl	$16,%ebx
	movb	%bl,(%ecx)
	incl	%ecx
	testb	%bl,%bl
	je	.Ldone

	movb	%bh,(%ecx)
	incl	%ecx
	testb	%bh,%bh
	jne	.Lword_aligned

.Ldone:
	movl	8(%esp),%eax
	popl	%ebx
	ret
END(strcpy)