1 /* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
32 #include <machine/asm.h>
33 __FBSDID("$FreeBSD$");
36 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
39 /* bcopy = memcpy/memmove with arguments reversed. */
40 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
42 /* switch the source and destination registers */
47 /* Do the buffers overlap? */
49 RETeq /* Bail now if src/dst are the same */
50 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
51 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
52 cmp r3, r2 /* if (r3 < len) we have an overlap */
53 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
55 /* Determine copy direction */
57 bcc .Lmemmove_backwards
59 moveq r0, #0 /* Quick abort for len=0 */
62 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
64 blt .Lmemmove_fl4 /* less than 4 bytes */
66 bne .Lmemmove_fdestul /* oh unaligned destination addr */
68 bne .Lmemmove_fsrcul /* oh unaligned source addr */
71 /* We have aligned source and destination */
73 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
75 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
76 stmdb sp!, {r4} /* borrow r4 */
78 /* blat 32 bytes at a time */
79 /* XXX for really big copies perhaps we should use more registers */
81 ldmia r1!, {r3, r4, r12, lr}
82 stmia r0!, {r3, r4, r12, lr}
83 ldmia r1!, {r3, r4, r12, lr}
84 stmia r0!, {r3, r4, r12, lr}
89 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
90 stmgeia r0!, {r3, r4, r12, lr}
92 ldmia sp!, {r4} /* return r4 */
97 /* blat 12 bytes at a time */
99 ldmgeia r1!, {r3, r12, lr}
100 stmgeia r0!, {r3, r12, lr}
102 bge .Lmemmove_floop12
111 ldmgeia r1!, {r3, r12}
112 stmgeia r0!, {r3, r12}
116 /* less than 4 bytes to go */
118 ldmeqia sp!, {r0, pc} /* done */
120 /* copy the crud byte at a time */
130 /* erg - unaligned destination */
135 /* align destination with byte copies */
143 blt .Lmemmove_fl4 /* less the 4 bytes */
146 beq .Lmemmove_ft8 /* we have an aligned source */
148 /* erg - unaligned source */
149 /* This is where it gets nasty ... */
154 bgt .Lmemmove_fsrcul3
155 beq .Lmemmove_fsrcul2
157 blt .Lmemmove_fsrcul1loop4
161 .Lmemmove_fsrcul1loop16:
167 ldmia r1!, {r4, r5, r12, lr}
169 orr r3, r3, r4, lsr #24
171 orr r4, r4, r5, lsr #24
173 orr r5, r5, r12, lsr #24
175 orr r12, r12, lr, lsr #24
177 orr r3, r3, r4, lsl #24
179 orr r4, r4, r5, lsl #24
181 orr r5, r5, r12, lsl #24
183 orr r12, r12, lr, lsl #24
185 stmia r0!, {r3-r5, r12}
187 bge .Lmemmove_fsrcul1loop16
190 blt .Lmemmove_fsrcul1l4
192 .Lmemmove_fsrcul1loop4:
200 orr r12, r12, lr, lsr #24
202 orr r12, r12, lr, lsl #24
206 bge .Lmemmove_fsrcul1loop4
214 blt .Lmemmove_fsrcul2loop4
218 .Lmemmove_fsrcul2loop16:
224 ldmia r1!, {r4, r5, r12, lr}
226 orr r3, r3, r4, lsr #16
228 orr r4, r4, r5, lsr #16
230 orr r5, r5, r12, lsr #16
231 mov r12, r12, lsl #16
232 orr r12, r12, lr, lsr #16
234 orr r3, r3, r4, lsl #16
236 orr r4, r4, r5, lsl #16
238 orr r5, r5, r12, lsl #16
239 mov r12, r12, lsr #16
240 orr r12, r12, lr, lsl #16
242 stmia r0!, {r3-r5, r12}
244 bge .Lmemmove_fsrcul2loop16
247 blt .Lmemmove_fsrcul2l4
249 .Lmemmove_fsrcul2loop4:
257 orr r12, r12, lr, lsr #16
259 orr r12, r12, lr, lsl #16
263 bge .Lmemmove_fsrcul2loop4
271 blt .Lmemmove_fsrcul3loop4
275 .Lmemmove_fsrcul3loop16:
281 ldmia r1!, {r4, r5, r12, lr}
283 orr r3, r3, r4, lsr #8
285 orr r4, r4, r5, lsr #8
287 orr r5, r5, r12, lsr #8
288 mov r12, r12, lsl #24
289 orr r12, r12, lr, lsr #8
291 orr r3, r3, r4, lsl #8
293 orr r4, r4, r5, lsl #8
295 orr r5, r5, r12, lsl #8
296 mov r12, r12, lsr #24
297 orr r12, r12, lr, lsl #8
299 stmia r0!, {r3-r5, r12}
301 bge .Lmemmove_fsrcul3loop16
304 blt .Lmemmove_fsrcul3l4
306 .Lmemmove_fsrcul3loop4:
314 orr r12, r12, lr, lsr #8
316 orr r12, r12, lr, lsl #8
320 bge .Lmemmove_fsrcul3loop4
330 blt .Lmemmove_bl4 /* less than 4 bytes */
332 bne .Lmemmove_bdestul /* oh unaligned destination addr */
334 bne .Lmemmove_bsrcul /* oh unaligned source addr */
337 /* We have aligned source and destination */
339 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
341 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
344 /* blat 32 bytes at a time */
345 /* XXX for really big copies perhaps we should use more registers */
347 ldmdb r1!, {r3, r4, r12, lr}
348 stmdb r0!, {r3, r4, r12, lr}
349 ldmdb r1!, {r3, r4, r12, lr}
350 stmdb r0!, {r3, r4, r12, lr}
352 bge .Lmemmove_bloop32
356 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
357 stmgedb r0!, {r3, r4, r12, lr}
360 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
361 stmgedb r0!, {r3, r12, lr}
371 ldmgedb r1!, {r3, r12}
372 stmgedb r0!, {r3, r12}
376 /* less than 4 bytes to go */
380 /* copy the crud byte at a time */
384 ldrgeb r3, [r1, #-1]!
385 strgeb r3, [r0, #-1]!
386 ldrgtb r3, [r1, #-1]!
387 strgtb r3, [r0, #-1]!
390 /* erg - unaligned destination */
394 /* align destination with byte copies */
397 ldrgeb r3, [r1, #-1]!
398 strgeb r3, [r0, #-1]!
399 ldrgtb r3, [r1, #-1]!
400 strgtb r3, [r0, #-1]!
402 blt .Lmemmove_bl4 /* less than 4 bytes to go */
404 beq .Lmemmove_bt8 /* we have an aligned source */
406 /* erg - unaligned source */
407 /* This is where it gets nasty ... */
412 blt .Lmemmove_bsrcul1
413 beq .Lmemmove_bsrcul2
415 blt .Lmemmove_bsrcul3loop4
417 stmdb sp!, {r4, r5, lr}
419 .Lmemmove_bsrcul3loop16:
425 ldmdb r1!, {r3-r5, r12}
427 orr lr, lr, r12, lsl #24
429 orr r12, r12, r5, lsl #24
431 orr r5, r5, r4, lsl #24
433 orr r4, r4, r3, lsl #24
435 orr lr, lr, r12, lsr #24
437 orr r12, r12, r5, lsr #24
439 orr r5, r5, r4, lsr #24
441 orr r4, r4, r3, lsr #24
443 stmdb r0!, {r4, r5, r12, lr}
445 bge .Lmemmove_bsrcul3loop16
446 ldmia sp!, {r4, r5, lr}
448 blt .Lmemmove_bsrcul3l4
450 .Lmemmove_bsrcul3loop4:
458 orr r12, r12, r3, lsl #24
460 orr r12, r12, r3, lsr #24
464 bge .Lmemmove_bsrcul3loop4
472 blt .Lmemmove_bsrcul2loop4
474 stmdb sp!, {r4, r5, lr}
476 .Lmemmove_bsrcul2loop16:
482 ldmdb r1!, {r3-r5, r12}
484 orr lr, lr, r12, lsl #16
485 mov r12, r12, lsr #16
486 orr r12, r12, r5, lsl #16
488 orr r5, r5, r4, lsl #16
490 orr r4, r4, r3, lsl #16
492 orr lr, lr, r12, lsr #16
493 mov r12, r12, lsl #16
494 orr r12, r12, r5, lsr #16
496 orr r5, r5, r4, lsr #16
498 orr r4, r4, r3, lsr #16
500 stmdb r0!, {r4, r5, r12, lr}
502 bge .Lmemmove_bsrcul2loop16
503 ldmia sp!, {r4, r5, lr}
505 blt .Lmemmove_bsrcul2l4
507 .Lmemmove_bsrcul2loop4:
515 orr r12, r12, r3, lsl #16
517 orr r12, r12, r3, lsr #16
521 bge .Lmemmove_bsrcul2loop4
529 blt .Lmemmove_bsrcul1loop4
531 stmdb sp!, {r4, r5, lr}
533 .Lmemmove_bsrcul1loop32:
539 ldmdb r1!, {r3-r5, r12}
541 orr lr, lr, r12, lsl #8
542 mov r12, r12, lsr #24
543 orr r12, r12, r5, lsl #8
545 orr r5, r5, r4, lsl #8
547 orr r4, r4, r3, lsl #8
549 orr lr, lr, r12, lsr #8
550 mov r12, r12, lsl #24
551 orr r12, r12, r5, lsr #8
553 orr r5, r5, r4, lsr #8
555 orr r4, r4, r3, lsr #8
557 stmdb r0!, {r4, r5, r12, lr}
559 bge .Lmemmove_bsrcul1loop32
560 ldmia sp!, {r4, r5, lr}
562 blt .Lmemmove_bsrcul1l4
564 .Lmemmove_bsrcul1loop4:
572 orr r12, r12, r3, lsl #8
574 orr r12, r12, r3, lsr #8
578 bge .Lmemmove_bsrcul1loop4