summaryrefslogtreecommitdiff
path: root/common/lib/libc/arch/arm/string/strcpy_arm.S
blob: 8ad6ea079b937059b5feae29c006a3a6e4c66437 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
/*-
 * Copyright (c) 2013 The NetBSD Foundation, Inc.
 * All rights reserved.
 *
 * This code is derived from software contributed to The NetBSD Foundation
 * by Matt Thomas of 3am Software Foundry.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

#include <machine/asm.h>

RCSID("$NetBSD: strcpy_arm.S,v 1.3 2013/08/11 04:56:32 matt Exp $")

#ifdef STRLCPY
#ifdef _LIBC
WEAK_ALIAS(strlcpy, _strlcpy)
#endif
#define	FUNCNAME	strlcpy
#elif defined(STRNCPY)
#define	FUNCNAME	strncpy
#else
#define	FUNCNAME	strcpy
#endif

#ifdef _LIBC
#include "namespace.h"
#endif

#ifdef __ARMEL__
#define	lslo	lsr		/* shift to lower address */
#define	lshi	lsl		/* shift to higher address */
#define	BYTE0	0x000000ff
#define	BYTE1	0x0000ff00
#define	BYTE2	0x00ff0000
#define	BYTE3	0xff000000
#else
#define	lslo	lsl		/* shift to lower address */
#define	lshi	lsr		/* shift to higher address */
#define	BYTE0	0xff000000
#define	BYTE1	0x00ff0000
#define	BYTE2	0x0000ff00
#define	BYTE3	0x000000ff
#endif

/*
 * On armv6 and later, to quickly determine if a word contains a NUL (0) byte,
 * we add 254 to each byte using the UQADD8 (unsigned saturating add 8)
 * instruction.  For every non-NUL byte, the result for that byte will become
 * 255.  For NUL, it will be 254.  When we complement the result of all 4 adds,
 * if the result is non-0 then we must have encountered a NUL.
 *
 * For earlier architecture, we just use tst on all 4 bytes.  There are other
 * algorithms to detect NULs but they take longer and use more instructions.
 */

/*
 * char *strcpy(char *dst, const char *src);
 * char *strncpy(char *dst, const char *src, size_t len);
 * size_t strlcpy(char *dst, const char *src, size_t len);
 */

	.text
ENTRY(FUNCNAME)
#if defined(STRLCPY)
	cmp	r2, #1			/* is length 1 or less? */
	bhi	1f			/*   no, do normal */
	moveq	r3, #0			/*   = 1? load NUL */
	strbeq	r3, [r0]		/*   = 1? write NUL to dst */
	mov	r0, r1			/* move src to r0 */
	b	PLT_SYM(_C_LABEL(strlen)) /* and tailcall strlen */
1:
	sub	r2, r2, #1		/* leave one byte for NUL */
#endif
#if defined(STRNCPY)
	cmp	r2, #0			/* 0 length? */
	RETc(eq)			/*   yes, just return */
#endif
	push	{r4-r9}			/* save some registers */
#ifdef _ARM_ARCH_6
#ifdef _ARM_ARCH_7
	movw	r7, #0xfefe		/* magic constant; 254 in each byte */
#else
	mov	r7, #0xfe		/* put 254 in low byte */
	orr	r7, r7, r7, lsl #8	/* move to next byte */
#endif
	orr	r7, r7, r7, lsl #16	/* move to next halfword */
#endif

#if defined(STRLCPY)
	add	r6, r1, #1		/* save for return (deal with NUL) */
#else
	mov	r6, r0			/* save for return */
#endif

.Ldst_align:
	tst	r0, #3			/* check for dst alignment */
	beq	.Ldst_aligned		/*   ok, proceed to next check */
	ldrb	r5, [r1], #1		/* load a byte */
#if defined(STRNCPY)
	subs	r2, r2, #1		/* subtract out from count */
	bmi	.Ldst_full		/*   zero? the dst has no more room */
#endif
	strb	r5, [r0], #1		/* store a byte */
	teq	r5, #0			/* was it a NUL? */
	beq	.Lend_of_string		/*   yes, we are done */
#if defined(STRLCPY)
	subs	r2, r2, #1		/* subtract one from count */
	strbeq	r2, [r0], #1		/*    zero? write trailing NUL */
	beq	.Ldst_full		/*    zero? the dst has no more room */
#endif
	b	.Ldst_align		/* loop around for next byte */
.Ldst_aligned:
	tst	r1, #3			/* get the misalignment of src */
	bne	.Lincongruent		/*  !=? incongruent (slower) */

	/*   =?   congruent (faster) */

.Lcongruent:
#if defined(STRLCPY)
	add	r6, r6, #3		/* compensate for word post-inc */
#endif
	b	.Lcongruent_mainloop_load
.Lcongruent_mainloop:
#if defined(STRLCPY) || defined(STRNCPY)
	subs	r2, r2, #4		/* subtract 4 from the count */
	bmi	.Lno_more_room
#endif
	str	r5, [r0], #4		/* store word into dst */
#if defined(STRLCPY)
	beq	.Lno_more_room		/*   count is 0? no room in dst */
#endif
#if defined(STRNCPY)
	beq	.Ldst_full_word_aligned	/*   count is 0? no room in dst */
#endif
.Lcongruent_mainloop_load:
	ldr	r5, [r1], #4		/* load word from source */
#if defined(_ARM_ARCH_6)
	uqadd8	r3, r5, r7		/* magic happens here */
	mvns	r3, r3			/* is the complemented result 0? */
	beq	.Lcongruent_mainloop	/*    yes, no NULs, do it again */
#else
	tst	r5, #BYTE0		/* does byte 0 contain a NUL? */
	tstne	r5, #BYTE1		/*   no, does byte 1 contain a NUL? */
	tstne	r5, #BYTE2		/*   no, does byte 2 contain a NUL? */
	tstne	r5, #BYTE3		/*   no, does byte 3 contain a NUL? */
	bne	.Lcongruent_mainloop	/*    yes, no NULs, do it again */
#endif
#if defined(STRLCPY) && 0
	sub	r1, r1, #3		/* back up src pointer */
#endif
#if defined(_ARM_ARCH_6)
#ifdef __ARMEL__
	rev	r3, r3			/* CLZ needs BE data */
#endif
	clz	r3, r3			/* count leading zeros */
#else
	mov	r3, #0			/* assume NUL is in byte 0 */
	tst	r5, #BYTE0		/* is NUL in byte 2? */
	beq	.Lcongruent_last_bytes	/*   yes, done searching. */
	mov	r3, #8			/* assume NUL is in byte 1 */
	tst	r5, #BYTE1		/* is NUL in byte 2? */
	beq	.Lcongruent_last_bytes	/*   yes, done searching. */
	mov	r3, #16			/* assume NUL is in byte 2 */
	tst	r5, #BYTE2		/* is NUL in byte 2? */
#if !defined(STRLCPY)
	beq	.Lcongruent_last_bytes	/*   yes, done searching. */
	mov	r3, #24			/* NUL must be in byte 3 */
#else
	movne	r3, #24			/*    no, then NUL is in byte 3 */
#endif
#endif /* _ARM_ARCH_6 */
#if defined(STRLCPY)
.Lcongruent_last_bytes:
#endif
#if defined(STRLCPY)
	add	r1, r1, r3, lsr #3	/* position to point at NUL + 4 */
#endif
	b	.Llast_bytes		/* store the last bytes */


.Lincongruent:
	/*
	 * At this point dst is word aligned by src is not.  Read bytes
	 * from src until it is read aligned.
	 */
	and	r3, r1, #3		/* extract misalignment */
	mov	r9, r3, lsl #3		/* calculate discard shift */
	rsb	r8, r9, #32		/* calculate insertion shift */
#if defined(STRLCPY)
	add	r6, r6, #3		/* compensate for word post-inc */	
#endif
	bic	r1, r1, #3		/* word align src */
	ldr	r5, [r1], #4		/* load word frm src */
	mov	r4, r5, lslo r9		/* discard lo bytes from src */
	tst	r4, #BYTE0		/* does byte 0 contain a NUL? */
#if defined(STRNCPY)
	beq	.Lend_of_string		/*   yes, zero fill rest of string */
#else
	moveq	r3, r9			/*   yes, set offset */
	beq	.Lincongruent_end_of_string /*   yes, deal with the last bytes */
#endif
	/*
	 * To make our test for NULs below do not generate false positives,
	 * fill the bytes in the word we don't want to match with all 1s.
	 */
	mvn	r3, #0			/* create a mask */
	mov	r3, r3, lslo r8		/* zero out bytes being kept */
	orr	r5, r5, r3		/* merge src and mask */
#ifdef _ARM_ARCH_6
	uqadd8	r3, r5, r7		/* NUL detection magic happens */
	mvns	r3, r3			/* is the complemented result 0? */
	beq	.Lincongruent_mainloop_load /*   yes, no NUL encountered! */
#ifdef __ARMEL__
	rev	r3, r3			/* CLZ wants BE input */
#endif
	clz	r3, r3			/* count leading zeros */
#else
	/*
	 * We already tested for byte 0 above so we don't need to it again.
	 */
	mov	r3, #24			/* assume NUL is in byte 3 */
	tst	r5, #BYTE1		/* did we find a NUL in byte 1? */
	subeq	r3, r3, #8		/*   yes, decremnt byte position */
	tstne	r5, #BYTE2		/*   no, did we find a NUL in byte 2? */
	subeq	r3, r3, #8		/*   yes, decremnt byte position */
	tstne	r5, #BYTE3		/*   no, did we find a NUL in byte 3? */
	bne	.Lincongruent_mainloop_load /*   no, no NUL encountered! */
#endif
	mov	r5, r4			/* discard already dealt with bytes */
.Lincongruent_end_of_string:
#if defined(STRLCPY)
	add	r1, r1, r3, lsr #3	/* then add offset to NUL */
#endif
	sub	r3, r3, r9		/* adjust NUL offset */
	b	.Llast_bytes		/* NUL encountered! finish up */

#if defined(STRLCPY) || defined(STRNCPY)
.Lincongruent_no_more_room:
	mov	r5, r4			/* move data to be stored to r5 */
	b	.Lno_more_room		/* fill remaining space */
#endif /* STRLCPY || STRNCPY */

	/*
	 * At this point both dst and src are word aligned and r4 contains
	 * partial contents from src.
	 */
.Lincongruent_mainloop:
	orr	r4, r4, r5, lshi r8	/* put new src data into dst word */
#if defined(STRLCPY) || defined(STRNCPY)
	subs	r2, r2, #4		/* subtract 4 from count */
	bmi	.Lincongruent_no_more_room /*   count < 0? dst will be full */
#endif
	str	r4, [r0], #4		/* store word in dst */
#if defined(STRLCPY)
	beq	.Lno_more_room		/*   space left is 0? stop copy */
#endif
#if defined(STRNCPY)
	beq	.Ldst_full_word_aligned	/*   space left is 0? stop copy */
#endif
	mov	r4, r5, lslo r9		/* move rest of src into dst word */
.Lincongruent_mainloop_load:
	ldr	r5, [r1], #4		/* read src */
#ifdef _ARM_ARCH_6
	uqadd8	r3, r5, r7		/* magic happens here */
	mvns	r3, r3			/* is the complemented result 0? */
	beq	.Lincongruent_mainloop	/*   yes, no NUL encountered! */
	/*
	 * fall into this since we encountered a NULL.  At this point we have
	 * from 1-5 bytes (excluding trailing NUL) to write.
	 */
#ifdef __ARMEL__
	rev	r3, r3			/* CLZ works on BE data */
#endif
	clz	r3, r3			/* count leading zeroes */
#else
	tst	r5, #BYTE0		/* does byte 0 contain a NUL? */
	tstne	r5, #BYTE1		/*   no, does byte 1 contain a NUL? */
	tstne	r5, #BYTE2		/*   no, does byte 2 contain a NUL? */
	tstne	r5, #BYTE3		/*   no, does byte 3 contain a NUL? */
	bne	.Lincongruent_mainloop	/*   no, no NUL encountered! */
	/*
	 * fall into this since we encountered a NULL.  At this point we have
	 * from 1-5 bytes (excluding trailing NUL) to write.
	 */
	mov	r3, #0			/* assume a NUL is in byte 0 */
	tst	r5, #BYTE0		/* is there a NUL in byte 0? */
	beq	1f			/*   yes, found a NUL! */
	mov	r3, #8			/* assume a NUL is in byte 1 */
	tst	r5, #BYTE1		/* is there a NUL in byte 0? */
	beq	1f			/*   yes, found a NUL! */
	tst	r5, #BYTE2		/* is there a NUL in byte 2? */
	moveq	r3, #16			/*   yes, mark its position */
	movne	r3, #24			/*   no, it must be in byte 3 */
1:
#endif
	orr	r4, r4, r5, lshi r8	/* merge new and old src words */
#if defined(STRLCPY)
	add	r1, r1, r3, lsr #3	/* adjust src to point to NUL */
#endif
	add	r3, r3, r8		/* add remainder bytes worth */
	cmp	r3, #32			/* do we have at least one word to write? */
	movlt	r5, r4			/*   no, move source bytes to expected reg */
	blt	.Llast_bytes		/*   no, deal with them */
#if defined(STRLCPY)
	subs	r2, r2, #4		/* subtract 4 from count */
	bpl	1f			/*   we have space for at least 4 */
	/*
	 * Since the space just went minus, we don't have enough room to
	 * write all 4 bytes.  In fact, the most we can write is 3 so just
	 * just lie and say we have 3 bytes to write and discard the rest.
	 */
	add	r2, r2, #4		/* add 4 back */
	mov	r3, #24			/* say we have 3 bytes */
	mov	r5, r4			/* discard the bytes we can't store */
	b	.Llast_bytes		/* and treat this as our last word */
1:
#elif defined(STRNCPY)
	subs	r2, r2, #4		/* subtract 4 from count */
	bmi	.Lincongruent_no_more_room /*   count < 0? dst will be full */
#endif
	str	r4, [r0], #4		/* store dst word */
#if defined(STRNCPY)
	beq	.Ldst_full_word_aligned	/*   space left is 0? stop copy */
#endif
#if defined(STRLCPY)
	bne	1f			/* we still have space remaining */
	strb	r2, [r0]		/* write final NUL */
	b	.Lend_of_string		/* we are done */
1:
#endif
	/*
	 * Subtract the 32 bits just written from the number of bits left
	 * to write.  If 0 bits are left and not doing strncpy, just write
	 * the trailing NUL and be done.
	 */
	subs	r3, r3, #32		/* we wrote one word */
#if !defined(STRNCPY)
	bne	1f			/* no more data? */
	strb	r3, [r0]		/* write final NUL */
	b	.Lend_of_string		/* we are done */
1:
#endif
	/*
	 * At this point after writing 4 bytes, we have 0 or 1 bytes left to
	 * write (excluding the trailing NUL).
	 */
	mov	r5, r5, lslo r9		/* get remainder of src */

	/* fall into .Llast_bytes */

#if !defined(STRLCPY)
.Lcongruent_last_bytes:
#endif
.Llast_bytes:
	/*
	 * r5 contains the last word and is in host byte order.
	 * r3 contains number of bits left to copy (0..31).
	 * r1 should point to the NUL + 4.
	 */
	bics	ip, r3, #7		/* truncate bits, is result 0? */
#if !defined(STRNCPY)
	bne	1f			/*   no, have to write some bytes */
	strb	ip, [r0]		/*   yes, write trailing NUL */
	b	.Lend_of_string		/*   yes, and we are the end */
1:
#endif
#if defined(STRLCPY) || defined(STRNCPY)
	cmp	r2, ip, lsr #3		/* is there enough room? */
	movlt	ip, r2, lsl #3		/*   no, only fill remaining space */
#endif
	mvn	r3, #0			/* create a mask */
	mov	r3, r3, lshi ip		/* clear leading bytes */
	bic	r5, r5, r3		/* clear trailing bytes */
#if defined(STRNCPY)
	cmp	r2, #4			/* room for 4 bytes? */
	movge	ip, #32			/*   yes, we will write 4 bytes */
	bge	2f			/*   yes, and go do it */
	mvn	r3, #0			/* create a mask (again) */
	mov	ip, r2, lsl #3		/* remaining space bytes -> bits */
	mov	r3, r3, lshi ip		/* clear remaining bytes */
#elif defined(STRLCPY)
	cmp	r2, #3			/* do we have room for 3 bytes & NUL? */
	bge	2f			/*   yes, just clear out dst */
	mov	r3, r3, lshi #8		/* mask out trailing NUL */
#else
	cmp	ip, #24			/* are we writing 3 bytes & a NUL? */
	bge	2f			/*   yes, just overwrite dst */
	mov	r3, r3, lshi #8		/* mask out trailing NUL */
#endif /* !STRNCPY */
	ldr	r4, [r0]		/* fetch dst word */
	and	r4, r4, r3		/* preserve trailing bytes */
	orr	r5, r5, r4		/* merge dst with src */
2:	str	r5, [r0], #4		/* store last word */
#if defined(STRNCPY)
	subs	r2, r2, ip, lsr #3	/* subtract bytes cleared from count */
	beq	.Ldst_full_word_aligned
#endif
	b	.Lend_of_string

#if defined(STRLCPY) || defined(STRNCPY)
.Lno_more_room:
#if defined(STRLCPY)
	cmp	r2, #-1			/* tried to write 3 bytes? */
	blt	1f			/*   less, partial word write */
	cmp	r2, #0			/* no space left? */
	strbeq	r2, [r0]		/* write the final NUL */
	bicne	r5, r5, #BYTE3		/* clear trailing NUL */
	strne	r5, [r0]		/* write last word */
	b	.Ldst_full_word_aligned	/* the dst buffer is full */
1:
#endif /* STRLCPY */
	add	r2, r2, #4		/* restore remaining space */
	ldr	r4, [r0]		/* load dst */
	mvn	r3, #0			/* create a mask */
	mov	r2, r2, lsl #3		/* bytes -> bits */
	mov	r3, r3, lshi r2		/* clear leading bytes */
	bic	r5, r5, r3		/* clear trailing bytes from src */
#if defined(STRLCPY)
	mov	r3, r3, lshi #8		/* mask out trailing NUL */
#endif /* STRLCPY */
	and	r4, r4, r3		/* preserve trailing bytes in dst */
	orr	r4, r4, r5		/* merge src with dst */
	str	r4, [r0], #4		/* write last word */
	b	.Ldst_full_word_aligned
#endif /* STRLCPY || STRNCPY */

#if defined(STRLCPY)
	/*
	 * Destination was filled (and NUL terminated).
	 * All that's left is count the number of bytes left in src.
	 */
.Ldst_full:
1:	tst	r1, #3			/* dst word aligned? */
	beq	2f			/*   yes, so do it word by word */
	ldrb	r5, [r1], #1		/* load next byte */
	teq	r5, #0			/* is it a NUL? */
	bne	1b			/*   no, check alignment */
	b	.Lend_of_string		/* and return */
2:	add	r6, r6, #3		/* compensate for post-inc */
.Ldst_full_word_aligned:
3:	ldr	r5, [r1], #4		/* load word from src */
#ifdef _ARM_ARCH_6
	uqadd8	r5, r5, r7		/* perform NUL magic */
	mvns	r5, r5			/* complement all 0s? */
	beq	3b			/*   yes, no NUL so get next word */
#else
	tst	r5, #BYTE0		/* does byte 0 contain a NUL? */
	tstne	r5, #BYTE1		/*   no, does byte 1 contain a NUL? */
	tstne	r5, #BYTE2		/*   no, does byte 2 contain a NUL? */
	tstne	r5, #BYTE3		/*   no, does byte 3 contain a NUL? */
	bne	3b			/*   no, no NUL encountered! */
#endif
#ifdef _ARM_ARCH_6
#ifdef __ARMEL__
	rev	r5, r5			/* CLZ needs BE data */
#endif
	clz	r5, r5			/* count leading zeros */
	add	r1, r1, r5, lsr #3	/* add offset to NUL to src pointer */
#else
	tst	r5, #BYTE0		/* is there a NUL in byte 0? */
	beq	4f			/*   yes, don't check any further */
	add	r1, r1, #1		/*   no, advance src pointer by 1 */
	tst	r5, #BYTE1		/* is there a NUL in byte 1? */
	beq	4f			/*   yes, don't check any further */
	add	r1, r1, #1		/*   no, advance src pointer by 1 */
	tst	r5, #BYTE2		/* is there a NUL in byte 2? */
	addne	r1, r1, #1		/*   no, there must be in byte 3 */
4:
#endif /* _ARM_ARCH_6 */
.Lend_of_string:
	sub	r0, r1, r6		/* subtract start from finish */
	pop	{r4-r9}			/* restore registers */
	RET
#elif defined(STRNCPY)
.Lend_of_string:
	teq	r2, #0			/* any bytes left to zero? */
	beq	3f 			/*   no, just return. */
	mov	r1, #0			/*   yes, prepare to zero */
	cmp	r2, #16			/* some, but not a lot? */
	ble	1f
	mov	r4, lr			/* preserve lr */
	bl	PLT_SYM(_C_LABEL(memset)) /*   yes, and let memset do it */
	mov	lr, r4			/* restore lr */
	b	3f			/* return */
1:	add	ip, r0, r2		/* calculate stopping point */
2:	strb	r1, [r0], #1		/* clear a byte */
	cmp	r0, ip			/* done? */
	blt	2b			/*   no, clear next byte */
3:	mov	r0, r6			/* restore dst pointer */
	pop	{r4-r9}			/* restore registers */
	RET
.Ldst_full:
.Ldst_full_word_aligned:
	/*
	 * Destination was filled (but not NUL terminated).
	 * All that's left is return the start of dst
	 */
	mov	r0, r6			/* restore dst pointer */
	pop	{r4-r9}			/* restore registers */
	RET
#else
.Lend_of_string:
	mov	r0, r6			/* restore dst pointer */
	pop	{r4-r9}			/* restore registers */
	RET
#endif
END(FUNCNAME)