1 /* $NetBSD: memmove.S,v 1.4 2003/10/14 07:51:45 scw Exp $ */
4 * Copyright (c) 1997 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Neil A. Carson and Mark Brinicombe
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the NetBSD
21 * Foundation, Inc. and its contributors.
22 * 4. Neither the name of The NetBSD Foundation nor the names of its
23 * contributors may be used to endorse or promote products derived
24 * from this software without specific prior written permission.
26 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
27 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
39 #include <machine/asm.h>
40 __FBSDID("$FreeBSD$");
43 /* LINTSTUB: Func: void *memmove(void *, const void *, size_t) */
46 /* bcopy = memcpy/memmove with arguments reversed. */
47 /* LINTSTUB: Func: void bcopy(void *, void *, size_t) */
49 /* switch the source and destination registers */
54 /* Do the buffers overlap? */
56 RETeq /* Bail now if src/dst are the same */
57 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
58 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
59 cmp r3, r2 /* if (r3 < len) we have an overlap */
60 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
62 /* Determine copy direction */
64 bcc .Lmemmove_backwards
66 moveq r0, #0 /* Quick abort for len=0 */
69 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
71 blt .Lmemmove_fl4 /* less than 4 bytes */
73 bne .Lmemmove_fdestul /* oh unaligned destination addr */
75 bne .Lmemmove_fsrcul /* oh unaligned source addr */
78 /* We have aligned source and destination */
80 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
82 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
83 stmdb sp!, {r4} /* borrow r4 */
85 /* blat 32 bytes at a time */
86 /* XXX for really big copies perhaps we should use more registers */
88 ldmia r1!, {r3, r4, r12, lr}
89 stmia r0!, {r3, r4, r12, lr}
90 ldmia r1!, {r3, r4, r12, lr}
91 stmia r0!, {r3, r4, r12, lr}
96 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
97 stmgeia r0!, {r3, r4, r12, lr}
99 ldmia sp!, {r4} /* return r4 */
104 /* blat 12 bytes at a time */
106 ldmgeia r1!, {r3, r12, lr}
107 stmgeia r0!, {r3, r12, lr}
109 bge .Lmemmove_floop12
118 ldmgeia r1!, {r3, r12}
119 stmgeia r0!, {r3, r12}
123 /* less than 4 bytes to go */
125 ldmeqia sp!, {r0, pc} /* done */
127 /* copy the crud byte at a time */
137 /* erg - unaligned destination */
142 /* align destination with byte copies */
150 blt .Lmemmove_fl4 /* less the 4 bytes */
153 beq .Lmemmove_ft8 /* we have an aligned source */
155 /* erg - unaligned source */
156 /* This is where it gets nasty ... */
161 bgt .Lmemmove_fsrcul3
162 beq .Lmemmove_fsrcul2
164 blt .Lmemmove_fsrcul1loop4
168 .Lmemmove_fsrcul1loop16:
174 ldmia r1!, {r4, r5, r12, lr}
176 orr r3, r3, r4, lsr #24
178 orr r4, r4, r5, lsr #24
180 orr r5, r5, r12, lsr #24
182 orr r12, r12, lr, lsr #24
184 orr r3, r3, r4, lsl #24
186 orr r4, r4, r5, lsl #24
188 orr r5, r5, r12, lsl #24
190 orr r12, r12, lr, lsl #24
192 stmia r0!, {r3-r5, r12}
194 bge .Lmemmove_fsrcul1loop16
197 blt .Lmemmove_fsrcul1l4
199 .Lmemmove_fsrcul1loop4:
207 orr r12, r12, lr, lsr #24
209 orr r12, r12, lr, lsl #24
213 bge .Lmemmove_fsrcul1loop4
221 blt .Lmemmove_fsrcul2loop4
225 .Lmemmove_fsrcul2loop16:
231 ldmia r1!, {r4, r5, r12, lr}
233 orr r3, r3, r4, lsr #16
235 orr r4, r4, r5, lsr #16
237 orr r5, r5, r12, lsr #16
238 mov r12, r12, lsl #16
239 orr r12, r12, lr, lsr #16
241 orr r3, r3, r4, lsl #16
243 orr r4, r4, r5, lsl #16
245 orr r5, r5, r12, lsl #16
246 mov r12, r12, lsr #16
247 orr r12, r12, lr, lsl #16
249 stmia r0!, {r3-r5, r12}
251 bge .Lmemmove_fsrcul2loop16
254 blt .Lmemmove_fsrcul2l4
256 .Lmemmove_fsrcul2loop4:
264 orr r12, r12, lr, lsr #16
266 orr r12, r12, lr, lsl #16
270 bge .Lmemmove_fsrcul2loop4
278 blt .Lmemmove_fsrcul3loop4
282 .Lmemmove_fsrcul3loop16:
288 ldmia r1!, {r4, r5, r12, lr}
290 orr r3, r3, r4, lsr #8
292 orr r4, r4, r5, lsr #8
294 orr r5, r5, r12, lsr #8
295 mov r12, r12, lsl #24
296 orr r12, r12, lr, lsr #8
298 orr r3, r3, r4, lsl #8
300 orr r4, r4, r5, lsl #8
302 orr r5, r5, r12, lsl #8
303 mov r12, r12, lsr #24
304 orr r12, r12, lr, lsl #8
306 stmia r0!, {r3-r5, r12}
308 bge .Lmemmove_fsrcul3loop16
311 blt .Lmemmove_fsrcul3l4
313 .Lmemmove_fsrcul3loop4:
321 orr r12, r12, lr, lsr #8
323 orr r12, r12, lr, lsl #8
327 bge .Lmemmove_fsrcul3loop4
337 blt .Lmemmove_bl4 /* less than 4 bytes */
339 bne .Lmemmove_bdestul /* oh unaligned destination addr */
341 bne .Lmemmove_bsrcul /* oh unaligned source addr */
344 /* We have aligned source and destination */
346 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
348 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
351 /* blat 32 bytes at a time */
352 /* XXX for really big copies perhaps we should use more registers */
354 ldmdb r1!, {r3, r4, r12, lr}
355 stmdb r0!, {r3, r4, r12, lr}
356 ldmdb r1!, {r3, r4, r12, lr}
357 stmdb r0!, {r3, r4, r12, lr}
359 bge .Lmemmove_bloop32
363 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
364 stmgedb r0!, {r3, r4, r12, lr}
367 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
368 stmgedb r0!, {r3, r12, lr}
378 ldmgedb r1!, {r3, r12}
379 stmgedb r0!, {r3, r12}
383 /* less than 4 bytes to go */
387 /* copy the crud byte at a time */
391 ldrgeb r3, [r1, #-1]!
392 strgeb r3, [r0, #-1]!
393 ldrgtb r3, [r1, #-1]!
394 strgtb r3, [r0, #-1]!
397 /* erg - unaligned destination */
401 /* align destination with byte copies */
404 ldrgeb r3, [r1, #-1]!
405 strgeb r3, [r0, #-1]!
406 ldrgtb r3, [r1, #-1]!
407 strgtb r3, [r0, #-1]!
409 blt .Lmemmove_bl4 /* less than 4 bytes to go */
411 beq .Lmemmove_bt8 /* we have an aligned source */
413 /* erg - unaligned source */
414 /* This is where it gets nasty ... */
419 blt .Lmemmove_bsrcul1
420 beq .Lmemmove_bsrcul2
422 blt .Lmemmove_bsrcul3loop4
424 stmdb sp!, {r4, r5, lr}
426 .Lmemmove_bsrcul3loop16:
432 ldmdb r1!, {r3-r5, r12}
434 orr lr, lr, r12, lsl #24
436 orr r12, r12, r5, lsl #24
438 orr r5, r5, r4, lsl #24
440 orr r4, r4, r3, lsl #24
442 orr lr, lr, r12, lsr #24
444 orr r12, r12, r5, lsr #24
446 orr r5, r5, r4, lsr #24
448 orr r4, r4, r3, lsr #24
450 stmdb r0!, {r4, r5, r12, lr}
452 bge .Lmemmove_bsrcul3loop16
453 ldmia sp!, {r4, r5, lr}
455 blt .Lmemmove_bsrcul3l4
457 .Lmemmove_bsrcul3loop4:
465 orr r12, r12, r3, lsl #24
467 orr r12, r12, r3, lsr #24
471 bge .Lmemmove_bsrcul3loop4
479 blt .Lmemmove_bsrcul2loop4
481 stmdb sp!, {r4, r5, lr}
483 .Lmemmove_bsrcul2loop16:
489 ldmdb r1!, {r3-r5, r12}
491 orr lr, lr, r12, lsl #16
492 mov r12, r12, lsr #16
493 orr r12, r12, r5, lsl #16
495 orr r5, r5, r4, lsl #16
497 orr r4, r4, r3, lsl #16
499 orr lr, lr, r12, lsr #16
500 mov r12, r12, lsl #16
501 orr r12, r12, r5, lsr #16
503 orr r5, r5, r4, lsr #16
505 orr r4, r4, r3, lsr #16
507 stmdb r0!, {r4, r5, r12, lr}
509 bge .Lmemmove_bsrcul2loop16
510 ldmia sp!, {r4, r5, lr}
512 blt .Lmemmove_bsrcul2l4
514 .Lmemmove_bsrcul2loop4:
522 orr r12, r12, r3, lsl #16
524 orr r12, r12, r3, lsr #16
528 bge .Lmemmove_bsrcul2loop4
536 blt .Lmemmove_bsrcul1loop4
538 stmdb sp!, {r4, r5, lr}
540 .Lmemmove_bsrcul1loop32:
546 ldmdb r1!, {r3-r5, r12}
548 orr lr, lr, r12, lsl #8
549 mov r12, r12, lsr #24
550 orr r12, r12, r5, lsl #8
552 orr r5, r5, r4, lsl #8
554 orr r4, r4, r3, lsl #8
556 orr lr, lr, r12, lsr #8
557 mov r12, r12, lsl #24
558 orr r12, r12, r5, lsr #8
560 orr r5, r5, r4, lsr #8
562 orr r4, r4, r3, lsr #8
564 stmdb r0!, {r4, r5, r12, lr}
566 bge .Lmemmove_bsrcul1loop32
567 ldmia sp!, {r4, r5, lr}
569 blt .Lmemmove_bsrcul1l4
571 .Lmemmove_bsrcul1loop4:
579 orr r12, r12, r3, lsl #8
581 orr r12, r12, r3, lsr #8
585 bge .Lmemmove_bsrcul1loop4