2 * Copyright (c) 2004 Olivier Houchard
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
89 #include <machine/asm.h>
90 __FBSDID("$FreeBSD$");
97 .word _C_LABEL(_arm_memcpy)
99 .word _C_LABEL(_arm_bzero)
101 .word _C_LABEL(_min_memcpy_size)
103 .word _C_LABEL(_min_bzero_size)
105 * memset: Sets a block of memory to the specified value
110 * r2 - number of bytes to write
115 /* LINTSTUB: Func: void bzero(void *, size_t) */
121 ldr r2, .L_min_bzero_size
125 stmfd sp!, {r0, r1, lr}
130 ldmfd sp!, {r0, r1, lr}
136 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
138 and r3, r1, #0xff /* We deal with bytes */
141 cmp r1, #0x04 /* Do we have less than 4 bytes */
143 blt .Lmemset_lessthanfour
145 /* Ok first we will word align the address */
146 ands r2, ip, #0x03 /* Get the bottom two bits */
147 bne .Lmemset_wordunaligned /* The address is not word aligned */
149 /* We are now word aligned */
150 .Lmemset_wordaligned:
151 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
153 tst ip, #0x04 /* Quad-align for armv5e */
157 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
159 subne r1, r1, #0x04 /* Quad-align if necessary */
160 strne r3, [ip], #0x04
163 blt .Lmemset_loop4 /* If less than 16 then use words */
164 mov r2, r3 /* Duplicate data */
165 cmp r1, #0x80 /* If < 128 then skip the big loop */
168 /* Do 128 bytes at a time */
172 strdge r2, [ip], #0x08
173 strdge r2, [ip], #0x08
174 strdge r2, [ip], #0x08
175 strdge r2, [ip], #0x08
176 strdge r2, [ip], #0x08
177 strdge r2, [ip], #0x08
178 strdge r2, [ip], #0x08
179 strdge r2, [ip], #0x08
180 strdge r2, [ip], #0x08
181 strdge r2, [ip], #0x08
182 strdge r2, [ip], #0x08
183 strdge r2, [ip], #0x08
184 strdge r2, [ip], #0x08
185 strdge r2, [ip], #0x08
186 strdge r2, [ip], #0x08
187 strdge r2, [ip], #0x08
207 RETeq /* Zero length so just exit */
209 add r1, r1, #0x80 /* Adjust for extra sub */
211 /* Do 32 bytes at a time */
215 strdge r2, [ip], #0x08
216 strdge r2, [ip], #0x08
217 strdge r2, [ip], #0x08
218 strdge r2, [ip], #0x08
226 RETeq /* Zero length so just exit */
228 adds r1, r1, #0x10 /* Partially adjust for extra sub */
230 /* Deal with 16 bytes or more */
232 strdge r2, [ip], #0x08
233 strdge r2, [ip], #0x08
238 RETeq /* Zero length so just exit */
240 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
242 /* We have at least 4 bytes so copy as words */
245 strge r3, [ip], #0x04
247 RETeq /* Zero length so just exit */
250 /* Compensate for 64-bit alignment check */
258 strb r3, [ip], #0x01 /* Set 1 byte */
259 strbge r3, [ip], #0x01 /* Set another byte */
260 strbgt r3, [ip] /* and a third */
263 .Lmemset_wordunaligned:
265 strb r3, [ip], #0x01 /* Set 1 byte */
267 strbge r3, [ip], #0x01 /* Set another byte */
269 strbgt r3, [ip], #0x01 /* and a third */
270 cmp r1, #0x04 /* More than 4 bytes left? */
271 bge .Lmemset_wordaligned /* Yup */
273 .Lmemset_lessthanfour:
275 RETeq /* Zero length so exit */
276 strb r3, [ip], #0x01 /* Set 1 byte */
278 strbge r3, [ip], #0x01 /* Set another byte */
279 strbgt r3, [ip] /* and a third */
290 /* Are both addresses aligned the same way? */
293 RETeq /* len == 0, or same addresses! */
296 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
298 /* Word-align the addresses, if necessary */
301 add r3, r3, r3, lsl #1
302 addne pc, pc, r3, lsl #3
305 /* Compare up to 3 bytes */
313 /* Compare up to 2 bytes */
329 /* Compare 4 bytes at a time, if possible */
331 bcc .Lmemcmp_bytewise
332 .Lmemcmp_word_aligned:
337 beq .Lmemcmp_word_aligned
340 /* Correct for extra subtraction, and check if done */
342 cmpeq r0, #0x00 /* If done, did all bytes match? */
343 RETeq /* Yup. Just return */
345 /* Re-do the final word byte-wise */
356 beq .Lmemcmp_bytewise2
361 * 6 byte compares are very common, thanks to the network stack.
362 * This code is hand-scheduled to reduce the number of stalls for
363 * load results. Everything else being equal, this will be ~32%
364 * faster than a byte-wise memcmp.
368 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
369 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
370 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
371 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
372 ldrbeq r3, [ip, #0x01] /* r3 = b1#1 */
373 RETne /* Return if mismatch on #0 */
374 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
375 ldrbeq r3, [r1, #0x02] /* r3 = b2#2 */
376 ldrbeq r0, [ip, #0x02] /* r0 = b1#2 */
377 RETne /* Return if mismatch on #1 */
378 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
379 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
380 ldrbeq r3, [ip, #0x03] /* r3 = b1#3 */
381 RETne /* Return if mismatch on #2 */
382 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
383 ldrbeq r3, [r1, #0x04] /* r3 = b2#4 */
384 ldrbeq r0, [ip, #0x04] /* r0 = b1#4 */
385 RETne /* Return if mismatch on #3 */
386 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
387 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
388 ldrbeq r3, [ip, #0x05] /* r3 = b1#5 */
389 RETne /* Return if mismatch on #4 */
390 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
395 /* switch the source and destination registers */
400 /* Do the buffers overlap? */
402 RETeq /* Bail now if src/dst are the same */
403 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
404 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
405 cmp r3, r2 /* if (r3 < len) we have an overlap */
406 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
408 /* Determine copy direction */
410 bcc .Lmemmove_backwards
412 moveq r0, #0 /* Quick abort for len=0 */
415 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
417 blt .Lmemmove_fl4 /* less than 4 bytes */
419 bne .Lmemmove_fdestul /* oh unaligned destination addr */
421 bne .Lmemmove_fsrcul /* oh unaligned source addr */
424 /* We have aligned source and destination */
426 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
428 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
429 stmdb sp!, {r4} /* borrow r4 */
431 /* blat 32 bytes at a time */
432 /* XXX for really big copies perhaps we should use more registers */
434 ldmia r1!, {r3, r4, r12, lr}
435 stmia r0!, {r3, r4, r12, lr}
436 ldmia r1!, {r3, r4, r12, lr}
437 stmia r0!, {r3, r4, r12, lr}
439 bge .Lmemmove_floop32
442 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
443 stmiage r0!, {r3, r4, r12, lr}
445 ldmia sp!, {r4} /* return r4 */
450 /* blat 12 bytes at a time */
452 ldmiage r1!, {r3, r12, lr}
453 stmiage r0!, {r3, r12, lr}
455 bge .Lmemmove_floop12
464 ldmiage r1!, {r3, r12}
465 stmiage r0!, {r3, r12}
469 /* less than 4 bytes to go */
471 ldmiaeq sp!, {r0, pc} /* done */
473 /* copy the crud byte at a time */
483 /* erg - unaligned destination */
488 /* align destination with byte copies */
496 blt .Lmemmove_fl4 /* less the 4 bytes */
499 beq .Lmemmove_ft8 /* we have an aligned source */
501 /* erg - unaligned source */
502 /* This is where it gets nasty ... */
507 bgt .Lmemmove_fsrcul3
508 beq .Lmemmove_fsrcul2
510 blt .Lmemmove_fsrcul1loop4
514 .Lmemmove_fsrcul1loop16:
520 ldmia r1!, {r4, r5, r12, lr}
522 orr r3, r3, r4, lsr #24
524 orr r4, r4, r5, lsr #24
526 orr r5, r5, r12, lsr #24
528 orr r12, r12, lr, lsr #24
530 orr r3, r3, r4, lsl #24
532 orr r4, r4, r5, lsl #24
534 orr r5, r5, r12, lsl #24
536 orr r12, r12, lr, lsl #24
538 stmia r0!, {r3-r5, r12}
540 bge .Lmemmove_fsrcul1loop16
543 blt .Lmemmove_fsrcul1l4
545 .Lmemmove_fsrcul1loop4:
553 orr r12, r12, lr, lsr #24
555 orr r12, r12, lr, lsl #24
559 bge .Lmemmove_fsrcul1loop4
567 blt .Lmemmove_fsrcul2loop4
571 .Lmemmove_fsrcul2loop16:
577 ldmia r1!, {r4, r5, r12, lr}
579 orr r3, r3, r4, lsr #16
581 orr r4, r4, r5, lsr #16
583 orr r5, r5, r12, lsr #16
584 mov r12, r12, lsl #16
585 orr r12, r12, lr, lsr #16
587 orr r3, r3, r4, lsl #16
589 orr r4, r4, r5, lsl #16
591 orr r5, r5, r12, lsl #16
592 mov r12, r12, lsr #16
593 orr r12, r12, lr, lsl #16
595 stmia r0!, {r3-r5, r12}
597 bge .Lmemmove_fsrcul2loop16
600 blt .Lmemmove_fsrcul2l4
602 .Lmemmove_fsrcul2loop4:
610 orr r12, r12, lr, lsr #16
612 orr r12, r12, lr, lsl #16
616 bge .Lmemmove_fsrcul2loop4
624 blt .Lmemmove_fsrcul3loop4
628 .Lmemmove_fsrcul3loop16:
634 ldmia r1!, {r4, r5, r12, lr}
636 orr r3, r3, r4, lsr #8
638 orr r4, r4, r5, lsr #8
640 orr r5, r5, r12, lsr #8
641 mov r12, r12, lsl #24
642 orr r12, r12, lr, lsr #8
644 orr r3, r3, r4, lsl #8
646 orr r4, r4, r5, lsl #8
648 orr r5, r5, r12, lsl #8
649 mov r12, r12, lsr #24
650 orr r12, r12, lr, lsl #8
652 stmia r0!, {r3-r5, r12}
654 bge .Lmemmove_fsrcul3loop16
657 blt .Lmemmove_fsrcul3l4
659 .Lmemmove_fsrcul3loop4:
667 orr r12, r12, lr, lsr #8
669 orr r12, r12, lr, lsl #8
673 bge .Lmemmove_fsrcul3loop4
683 blt .Lmemmove_bl4 /* less than 4 bytes */
685 bne .Lmemmove_bdestul /* oh unaligned destination addr */
687 bne .Lmemmove_bsrcul /* oh unaligned source addr */
690 /* We have aligned source and destination */
692 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
694 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
697 /* blat 32 bytes at a time */
698 /* XXX for really big copies perhaps we should use more registers */
700 ldmdb r1!, {r3, r4, r12, lr}
701 stmdb r0!, {r3, r4, r12, lr}
702 ldmdb r1!, {r3, r4, r12, lr}
703 stmdb r0!, {r3, r4, r12, lr}
705 bge .Lmemmove_bloop32
709 ldmdbge r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
710 stmdbge r0!, {r3, r4, r12, lr}
713 ldmdbge r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
714 stmdbge r0!, {r3, r12, lr}
724 ldmdbge r1!, {r3, r12}
725 stmdbge r0!, {r3, r12}
729 /* less than 4 bytes to go */
733 /* copy the crud byte at a time */
737 ldrbge r3, [r1, #-1]!
738 strbge r3, [r0, #-1]!
739 ldrbgt r3, [r1, #-1]!
740 strbgt r3, [r0, #-1]!
743 /* erg - unaligned destination */
747 /* align destination with byte copies */
750 ldrbge r3, [r1, #-1]!
751 strbge r3, [r0, #-1]!
752 ldrbgt r3, [r1, #-1]!
753 strbgt r3, [r0, #-1]!
755 blt .Lmemmove_bl4 /* less than 4 bytes to go */
757 beq .Lmemmove_bt8 /* we have an aligned source */
759 /* erg - unaligned source */
760 /* This is where it gets nasty ... */
765 blt .Lmemmove_bsrcul1
766 beq .Lmemmove_bsrcul2
768 blt .Lmemmove_bsrcul3loop4
770 stmdb sp!, {r4, r5, lr}
772 .Lmemmove_bsrcul3loop16:
778 ldmdb r1!, {r3-r5, r12}
780 orr lr, lr, r12, lsl #24
782 orr r12, r12, r5, lsl #24
784 orr r5, r5, r4, lsl #24
786 orr r4, r4, r3, lsl #24
788 orr lr, lr, r12, lsr #24
790 orr r12, r12, r5, lsr #24
792 orr r5, r5, r4, lsr #24
794 orr r4, r4, r3, lsr #24
796 stmdb r0!, {r4, r5, r12, lr}
798 bge .Lmemmove_bsrcul3loop16
799 ldmia sp!, {r4, r5, lr}
801 blt .Lmemmove_bsrcul3l4
803 .Lmemmove_bsrcul3loop4:
811 orr r12, r12, r3, lsl #24
813 orr r12, r12, r3, lsr #24
817 bge .Lmemmove_bsrcul3loop4
825 blt .Lmemmove_bsrcul2loop4
827 stmdb sp!, {r4, r5, lr}
829 .Lmemmove_bsrcul2loop16:
835 ldmdb r1!, {r3-r5, r12}
837 orr lr, lr, r12, lsl #16
838 mov r12, r12, lsr #16
839 orr r12, r12, r5, lsl #16
841 orr r5, r5, r4, lsl #16
843 orr r4, r4, r3, lsl #16
845 orr lr, lr, r12, lsr #16
846 mov r12, r12, lsl #16
847 orr r12, r12, r5, lsr #16
849 orr r5, r5, r4, lsr #16
851 orr r4, r4, r3, lsr #16
853 stmdb r0!, {r4, r5, r12, lr}
855 bge .Lmemmove_bsrcul2loop16
856 ldmia sp!, {r4, r5, lr}
858 blt .Lmemmove_bsrcul2l4
860 .Lmemmove_bsrcul2loop4:
868 orr r12, r12, r3, lsl #16
870 orr r12, r12, r3, lsr #16
874 bge .Lmemmove_bsrcul2loop4
882 blt .Lmemmove_bsrcul1loop4
884 stmdb sp!, {r4, r5, lr}
886 .Lmemmove_bsrcul1loop32:
892 ldmdb r1!, {r3-r5, r12}
894 orr lr, lr, r12, lsl #8
895 mov r12, r12, lsr #24
896 orr r12, r12, r5, lsl #8
898 orr r5, r5, r4, lsl #8
900 orr r4, r4, r3, lsl #8
902 orr lr, lr, r12, lsr #8
903 mov r12, r12, lsl #24
904 orr r12, r12, r5, lsr #8
906 orr r5, r5, r4, lsr #8
908 orr r4, r4, r3, lsr #8
910 stmdb r0!, {r4, r5, r12, lr}
912 bge .Lmemmove_bsrcul1loop32
913 ldmia sp!, {r4, r5, lr}
915 blt .Lmemmove_bsrcul1l4
917 .Lmemmove_bsrcul1loop4:
925 orr r12, r12, r3, lsl #8
927 orr r12, r12, r3, lsr #8
931 bge .Lmemmove_bsrcul1loop4
939 #if !defined(_ARM_ARCH_5E)
941 /* save leaf functions having to store this away */
942 /* Do not check arm_memcpy if we're running from flash */
943 #if defined(FLASHADDR) && defined(PHYSADDR)
944 #if FLASHADDR > PHYSADDR
954 ldr r3, .L_arm_memcpy
958 ldr r3, .L_min_memcpy_size
962 stmfd sp!, {r0-r2, r4, lr}
964 ldr r4, .L_arm_memcpy
968 ldmfd sp!, {r0-r2, r4, lr}
972 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
975 blt .Lmemcpy_l4 /* less than 4 bytes */
977 bne .Lmemcpy_destul /* oh unaligned destination addr */
979 bne .Lmemcpy_srcul /* oh unaligned source addr */
982 /* We have aligned source and destination */
984 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
986 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
987 stmdb sp!, {r4} /* borrow r4 */
989 /* blat 32 bytes at a time */
990 /* XXX for really big copies perhaps we should use more registers */
992 ldmia r1!, {r3, r4, r12, lr}
993 stmia r0!, {r3, r4, r12, lr}
994 ldmia r1!, {r3, r4, r12, lr}
995 stmia r0!, {r3, r4, r12, lr}
1000 ldmiage r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
1001 stmiage r0!, {r3, r4, r12, lr}
1003 ldmia sp!, {r4} /* return r4 */
1008 /* blat 12 bytes at a time */
1010 ldmiage r1!, {r3, r12, lr}
1011 stmiage r0!, {r3, r12, lr}
1012 subsge r2, r2, #0x0c
1022 ldmiage r1!, {r3, r12}
1023 stmiage r0!, {r3, r12}
1027 /* less than 4 bytes to go */
1030 ldmiaeq sp!, {r0, pc}^ /* done */
1032 ldmiaeq sp!, {r0, pc} /* done */
1034 /* copy the crud byte at a time */
1044 /* erg - unaligned destination */
1049 /* align destination with byte copies */
1057 blt .Lmemcpy_l4 /* less the 4 bytes */
1060 beq .Lmemcpy_t8 /* we have an aligned source */
1062 /* erg - unaligned source */
1063 /* This is where it gets nasty ... */
1071 blt .Lmemcpy_srcul1loop4
1075 .Lmemcpy_srcul1loop16:
1077 ldmia r1!, {r4, r5, r12, lr}
1078 orr r3, r3, r4, lsl #24
1080 orr r4, r4, r5, lsl #24
1082 orr r5, r5, r12, lsl #24
1083 mov r12, r12, lsr #8
1084 orr r12, r12, lr, lsl #24
1085 stmia r0!, {r3-r5, r12}
1087 bge .Lmemcpy_srcul1loop16
1090 blt .Lmemcpy_srcul1l4
1092 .Lmemcpy_srcul1loop4:
1095 orr r12, r12, lr, lsl #24
1098 bge .Lmemcpy_srcul1loop4
1106 blt .Lmemcpy_srcul2loop4
1110 .Lmemcpy_srcul2loop16:
1112 ldmia r1!, {r4, r5, r12, lr}
1113 orr r3, r3, r4, lsl #16
1115 orr r4, r4, r5, lsl #16
1117 orr r5, r5, r12, lsl #16
1118 mov r12, r12, lsr #16
1119 orr r12, r12, lr, lsl #16
1120 stmia r0!, {r3-r5, r12}
1122 bge .Lmemcpy_srcul2loop16
1125 blt .Lmemcpy_srcul2l4
1127 .Lmemcpy_srcul2loop4:
1128 mov r12, lr, lsr #16
1130 orr r12, r12, lr, lsl #16
1133 bge .Lmemcpy_srcul2loop4
1141 blt .Lmemcpy_srcul3loop4
1145 .Lmemcpy_srcul3loop16:
1147 ldmia r1!, {r4, r5, r12, lr}
1148 orr r3, r3, r4, lsl #8
1150 orr r4, r4, r5, lsl #8
1152 orr r5, r5, r12, lsl #8
1153 mov r12, r12, lsr #24
1154 orr r12, r12, lr, lsl #8
1155 stmia r0!, {r3-r5, r12}
1157 bge .Lmemcpy_srcul3loop16
1160 blt .Lmemcpy_srcul3l4
1162 .Lmemcpy_srcul3loop4:
1163 mov r12, lr, lsr #24
1165 orr r12, r12, lr, lsl #8
1168 bge .Lmemcpy_srcul3loop4
1176 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1180 ble .Lmemcpy_short /* <= 12 bytes */
1182 #if FLASHADDR > PHYSADDR
1192 ldr r3, .L_arm_memcpy
1196 ldr r3, .L_min_memcpy_size
1200 stmfd sp!, {r0-r2, r4, lr}
1202 ldr r4, .L_arm_memcpy
1206 ldmfd sp!, {r0-r2, r4, lr}
1209 mov r3, r0 /* We must not clobber r0 */
1211 /* Word-align the destination buffer */
1212 ands ip, r3, #0x03 /* Already word aligned? */
1213 beq .Lmemcpy_wordaligned /* Yup */
1215 ldrb ip, [r1], #0x01
1217 strb ip, [r3], #0x01
1218 ldrble ip, [r1], #0x01
1220 strble ip, [r3], #0x01
1221 ldrblt ip, [r1], #0x01
1223 strblt ip, [r3], #0x01
1225 /* Destination buffer is now word aligned */
1226 .Lmemcpy_wordaligned:
1227 ands ip, r1, #0x03 /* Is src also word-aligned? */
1228 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1230 /* Quad-align the destination buffer */
1231 tst r3, #0x07 /* Already quad aligned? */
1232 ldrne ip, [r1], #0x04
1233 stmfd sp!, {r4-r9} /* Free up some registers */
1235 strne ip, [r3], #0x04
1237 /* Destination buffer quad aligned, source is at least word aligned */
1239 blt .Lmemcpy_w_lessthan128
1241 /* Copy 128 bytes at a time */
1243 ldr r4, [r1], #0x04 /* LD:00-03 */
1244 ldr r5, [r1], #0x04 /* LD:04-07 */
1245 pld [r1, #0x18] /* Prefetch 0x20 */
1246 ldr r6, [r1], #0x04 /* LD:08-0b */
1247 ldr r7, [r1], #0x04 /* LD:0c-0f */
1248 ldr r8, [r1], #0x04 /* LD:10-13 */
1249 ldr r9, [r1], #0x04 /* LD:14-17 */
1250 strd r4, [r3], #0x08 /* ST:00-07 */
1251 ldr r4, [r1], #0x04 /* LD:18-1b */
1252 ldr r5, [r1], #0x04 /* LD:1c-1f */
1253 strd r6, [r3], #0x08 /* ST:08-0f */
1254 ldr r6, [r1], #0x04 /* LD:20-23 */
1255 ldr r7, [r1], #0x04 /* LD:24-27 */
1256 pld [r1, #0x18] /* Prefetch 0x40 */
1257 strd r8, [r3], #0x08 /* ST:10-17 */
1258 ldr r8, [r1], #0x04 /* LD:28-2b */
1259 ldr r9, [r1], #0x04 /* LD:2c-2f */
1260 strd r4, [r3], #0x08 /* ST:18-1f */
1261 ldr r4, [r1], #0x04 /* LD:30-33 */
1262 ldr r5, [r1], #0x04 /* LD:34-37 */
1263 strd r6, [r3], #0x08 /* ST:20-27 */
1264 ldr r6, [r1], #0x04 /* LD:38-3b */
1265 ldr r7, [r1], #0x04 /* LD:3c-3f */
1266 strd r8, [r3], #0x08 /* ST:28-2f */
1267 ldr r8, [r1], #0x04 /* LD:40-43 */
1268 ldr r9, [r1], #0x04 /* LD:44-47 */
1269 pld [r1, #0x18] /* Prefetch 0x60 */
1270 strd r4, [r3], #0x08 /* ST:30-37 */
1271 ldr r4, [r1], #0x04 /* LD:48-4b */
1272 ldr r5, [r1], #0x04 /* LD:4c-4f */
1273 strd r6, [r3], #0x08 /* ST:38-3f */
1274 ldr r6, [r1], #0x04 /* LD:50-53 */
1275 ldr r7, [r1], #0x04 /* LD:54-57 */
1276 strd r8, [r3], #0x08 /* ST:40-47 */
1277 ldr r8, [r1], #0x04 /* LD:58-5b */
1278 ldr r9, [r1], #0x04 /* LD:5c-5f */
1279 strd r4, [r3], #0x08 /* ST:48-4f */
1280 ldr r4, [r1], #0x04 /* LD:60-63 */
1281 ldr r5, [r1], #0x04 /* LD:64-67 */
1282 pld [r1, #0x18] /* Prefetch 0x80 */
1283 strd r6, [r3], #0x08 /* ST:50-57 */
1284 ldr r6, [r1], #0x04 /* LD:68-6b */
1285 ldr r7, [r1], #0x04 /* LD:6c-6f */
1286 strd r8, [r3], #0x08 /* ST:58-5f */
1287 ldr r8, [r1], #0x04 /* LD:70-73 */
1288 ldr r9, [r1], #0x04 /* LD:74-77 */
1289 strd r4, [r3], #0x08 /* ST:60-67 */
1290 ldr r4, [r1], #0x04 /* LD:78-7b */
1291 ldr r5, [r1], #0x04 /* LD:7c-7f */
1292 strd r6, [r3], #0x08 /* ST:68-6f */
1293 strd r8, [r3], #0x08 /* ST:70-77 */
1295 strd r4, [r3], #0x08 /* ST:78-7f */
1296 bge .Lmemcpy_w_loop128
1298 .Lmemcpy_w_lessthan128:
1299 adds r2, r2, #0x80 /* Adjust for extra sub */
1300 ldmfdeq sp!, {r4-r9}
1301 RETeq /* Return now if done */
1303 blt .Lmemcpy_w_lessthan32
1305 /* Copy 32 bytes at a time */
1314 strd r4, [r3], #0x08
1317 strd r6, [r3], #0x08
1318 strd r8, [r3], #0x08
1320 strd r4, [r3], #0x08
1321 bge .Lmemcpy_w_loop32
1323 .Lmemcpy_w_lessthan32:
1324 adds r2, r2, #0x20 /* Adjust for extra sub */
1325 ldmfdeq sp!, {r4-r9}
1326 RETeq /* Return now if done */
1330 addne pc, pc, r4, lsl #1
1333 /* At least 24 bytes remaining */
1337 strd r4, [r3], #0x08
1339 /* At least 16 bytes remaining */
1343 strd r4, [r3], #0x08
1345 /* At least 8 bytes remaining */
1349 strd r4, [r3], #0x08
1351 /* Less than 8 bytes remaining */
1353 RETeq /* Return now if done */
1355 ldrge ip, [r1], #0x04
1356 strge ip, [r3], #0x04
1357 RETeq /* Return now if done */
1359 ldrb ip, [r1], #0x01
1361 ldrbge r2, [r1], #0x01
1362 strb ip, [r3], #0x01
1364 strbge r2, [r3], #0x01
1367 /* Place a literal pool here for the above ldr instructions to use */
1372 * At this point, it has not been possible to word align both buffers.
1373 * The destination buffer is word aligned, but the source buffer is not.
1384 .Lmemcpy_bad1_loop16:
1396 orr r4, r4, r5, lsr #24
1398 orr r5, r5, r6, lsr #24
1400 orr r6, r6, r7, lsr #24
1402 orr r7, r7, ip, lsr #24
1404 orr r4, r4, r5, lsl #24
1406 orr r5, r5, r6, lsl #24
1408 orr r6, r6, r7, lsl #24
1410 orr r7, r7, ip, lsl #24
1418 bge .Lmemcpy_bad1_loop16
1421 ldmfdeq sp!, {r4-r7}
1422 RETeq /* Return now if done */
1425 blt .Lmemcpy_bad_done
1427 .Lmemcpy_bad1_loop4:
1436 orr r4, r4, ip, lsr #24
1438 orr r4, r4, ip, lsl #24
1441 bge .Lmemcpy_bad1_loop4
1445 .Lmemcpy_bad2_loop16:
1457 orr r4, r4, r5, lsr #16
1459 orr r5, r5, r6, lsr #16
1461 orr r6, r6, r7, lsr #16
1463 orr r7, r7, ip, lsr #16
1465 orr r4, r4, r5, lsl #16
1467 orr r5, r5, r6, lsl #16
1469 orr r6, r6, r7, lsl #16
1471 orr r7, r7, ip, lsl #16
1479 bge .Lmemcpy_bad2_loop16
1482 ldmfdeq sp!, {r4-r7}
1483 RETeq /* Return now if done */
1486 blt .Lmemcpy_bad_done
1488 .Lmemcpy_bad2_loop4:
1497 orr r4, r4, ip, lsr #16
1499 orr r4, r4, ip, lsl #16
1502 bge .Lmemcpy_bad2_loop4
1506 .Lmemcpy_bad3_loop16:
1518 orr r4, r4, r5, lsr #8
1520 orr r5, r5, r6, lsr #8
1522 orr r6, r6, r7, lsr #8
1524 orr r7, r7, ip, lsr #8
1526 orr r4, r4, r5, lsl #8
1528 orr r5, r5, r6, lsl #8
1530 orr r6, r6, r7, lsl #8
1532 orr r7, r7, ip, lsl #8
1540 bge .Lmemcpy_bad3_loop16
1543 ldmfdeq sp!, {r4-r7}
1544 RETeq /* Return now if done */
1547 blt .Lmemcpy_bad_done
1549 .Lmemcpy_bad3_loop4:
1558 orr r4, r4, ip, lsr #8
1560 orr r4, r4, ip, lsl #8
1563 bge .Lmemcpy_bad3_loop4
1570 ldrb ip, [r1], #0x01
1572 ldrbge r2, [r1], #0x01
1573 strb ip, [r3], #0x01
1575 strbge r2, [r3], #0x01
1581 * Handle short copies (less than 16 bytes), possibly misaligned.
1582 * Some of these are *very* common, thanks to the network stack,
1583 * and so are handled specially.
1586 add pc, pc, r2, lsl #2
1589 b .Lmemcpy_bytewise /* 0x01 */
1590 b .Lmemcpy_bytewise /* 0x02 */
1591 b .Lmemcpy_bytewise /* 0x03 */
1592 b .Lmemcpy_4 /* 0x04 */
1593 b .Lmemcpy_bytewise /* 0x05 */
1594 b .Lmemcpy_6 /* 0x06 */
1595 b .Lmemcpy_bytewise /* 0x07 */
1596 b .Lmemcpy_8 /* 0x08 */
1597 b .Lmemcpy_bytewise /* 0x09 */
1598 b .Lmemcpy_bytewise /* 0x0a */
1599 b .Lmemcpy_bytewise /* 0x0b */
1600 b .Lmemcpy_c /* 0x0c */
1602 mov r3, r0 /* We must not clobber r0 */
1603 ldrb ip, [r1], #0x01
1604 1: subs r2, r2, #0x01
1605 strb ip, [r3], #0x01
1606 ldrbne ip, [r1], #0x01
1610 /******************************************************************************
1611 * Special case for 4 byte copies
1613 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1614 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1618 orr r2, r2, r0, lsl #2
1621 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1624 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1632 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1634 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1635 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1637 mov r3, r3, lsl #8 /* r3 = 012. */
1638 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1640 mov r3, r3, lsr #8 /* r3 = .210 */
1641 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1648 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1652 ldrh r2, [r1, #0x02]
1654 ldrh r3, [r1, #0x02]
1657 orr r3, r2, r3, lsl #16
1663 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1665 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1666 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1668 mov r3, r3, lsl #24 /* r3 = 0... */
1669 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1671 mov r3, r3, lsr #24 /* r3 = ...0 */
1672 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1679 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1683 strb r2, [r0, #0x03]
1691 strb r1, [r0, #0x03]
1693 strh r3, [r0, #0x01]
1698 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1701 ldrh r3, [r1, #0x01]
1702 ldrb r1, [r1, #0x03]
1704 strh r3, [r0, #0x01]
1705 strb r1, [r0, #0x03]
1710 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1712 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1713 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1715 mov r1, r2, lsr #8 /* r1 = ...0 */
1717 mov r2, r2, lsl #8 /* r2 = .01. */
1718 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1721 mov r2, r2, lsr #8 /* r2 = ...1 */
1722 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1723 mov r3, r3, lsr #8 /* r3 = ...3 */
1725 strh r2, [r0, #0x01]
1726 strb r3, [r0, #0x03]
1731 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1734 ldrh r3, [r1, #0x01]
1735 ldrb r1, [r1, #0x03]
1737 strh r3, [r0, #0x01]
1738 strb r1, [r0, #0x03]
1743 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1747 strh r2, [r0, #0x02]
1753 strh r3, [r0, #0x02]
1759 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1761 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1762 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1763 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1766 mov r2, r2, lsl #8 /* r2 = 012. */
1767 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1769 mov r2, r2, lsr #24 /* r2 = ...2 */
1770 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1772 strh r2, [r0, #0x02]
1777 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1780 ldrh r3, [r1, #0x02]
1782 strh r3, [r0, #0x02]
1787 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1789 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1790 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1791 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1792 strh r1, [r0, #0x02]
1794 mov r3, r3, lsr #24 /* r3 = ...1 */
1795 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1797 mov r3, r3, lsl #8 /* r3 = 321. */
1798 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1805 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1807 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1809 strb r2, [r0, #0x03]
1812 strh r3, [r0, #0x01]
1818 strh r3, [r0, #0x01]
1819 strb r1, [r0, #0x03]
1825 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1828 ldrh r3, [r1, #0x01]
1829 ldrb r1, [r1, #0x03]
1831 strh r3, [r0, #0x01]
1832 strb r1, [r0, #0x03]
1837 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1840 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1841 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1842 strb r3, [r0, #0x03]
1843 mov r3, r3, lsr #8 /* r3 = ...2 */
1844 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1845 strh r3, [r0, #0x01]
1846 mov r2, r2, lsr #8 /* r2 = ...0 */
1849 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1850 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1852 mov r2, r2, lsr #8 /* r2 = ...1 */
1853 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1854 strh r2, [r0, #0x01]
1855 mov r3, r3, lsr #8 /* r3 = ...3 */
1856 strb r3, [r0, #0x03]
1862 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1865 ldrh r3, [r1, #0x01]
1866 ldrb r1, [r1, #0x03]
1868 strh r3, [r0, #0x01]
1869 strb r1, [r0, #0x03]
1874 /******************************************************************************
1875 * Special case for 6 byte copies
1877 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1878 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1882 orr r2, r2, r0, lsl #2
1885 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1888 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1891 ldrh r3, [r1, #0x04]
1893 strh r3, [r0, #0x04]
1898 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1900 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1901 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1903 mov r2, r2, lsl #8 /* r2 = 012. */
1904 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1906 mov r2, r2, lsr #8 /* r2 = .210 */
1907 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1909 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1911 strh r3, [r0, #0x04]
1916 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1918 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1919 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1921 mov r1, r3, lsr #16 /* r1 = ..23 */
1922 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1924 strh r3, [r0, #0x04]
1926 mov r1, r3, lsr #16 /* r1 = ..54 */
1927 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1929 strh r1, [r0, #0x04]
1935 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1937 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1938 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1939 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1941 mov r2, r2, lsl #24 /* r2 = 0... */
1942 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1943 mov r3, r3, lsl #8 /* r3 = 234. */
1944 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1946 mov r2, r2, lsr #24 /* r2 = ...0 */
1947 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1948 mov r1, r1, lsl #8 /* r1 = xx5. */
1949 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1952 strh r1, [r0, #0x04]
1957 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1959 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1960 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1961 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1962 strh r1, [r0, #0x01]
1964 mov r1, r3, lsr #24 /* r1 = ...0 */
1966 mov r3, r3, lsl #8 /* r3 = 123. */
1967 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1970 mov r3, r3, lsr #24 /* r3 = ...3 */
1971 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1972 mov r2, r2, lsr #8 /* r2 = ...5 */
1974 strh r3, [r0, #0x03]
1975 strb r2, [r0, #0x05]
1980 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1983 ldrh r3, [r1, #0x01]
1984 ldrh ip, [r1, #0x03]
1985 ldrb r1, [r1, #0x05]
1987 strh r3, [r0, #0x01]
1988 strh ip, [r0, #0x03]
1989 strb r1, [r0, #0x05]
1994 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1996 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1997 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1999 mov r3, r2, lsr #8 /* r3 = ...0 */
2001 strb r1, [r0, #0x05]
2002 mov r3, r1, lsr #8 /* r3 = .234 */
2003 strh r3, [r0, #0x03]
2004 mov r3, r2, lsl #8 /* r3 = .01. */
2005 orr r3, r3, r1, lsr #24 /* r3 = .012 */
2006 strh r3, [r0, #0x01]
2010 strb r3, [r0, #0x05]
2011 mov r3, r1, lsr #8 /* r3 = .543 */
2012 strh r3, [r0, #0x03]
2013 mov r3, r2, lsr #8 /* r3 = ...1 */
2014 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2015 strh r3, [r0, #0x01]
2021 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2024 ldrh r3, [r1, #0x01]
2025 ldrh ip, [r1, #0x03]
2026 ldrb r1, [r1, #0x05]
2028 strh r3, [r0, #0x01]
2029 strh ip, [r0, #0x03]
2030 strb r1, [r0, #0x05]
2035 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2038 ldr r2, [r1] /* r2 = 0123 */
2039 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2040 mov r1, r2, lsr #16 /* r1 = ..01 */
2041 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2045 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2046 ldr r3, [r1] /* r3 = 3210 */
2047 mov r2, r2, lsl #16 /* r2 = 54.. */
2048 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2056 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2058 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2059 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2060 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2062 mov r2, r2, lsr #8 /* r2 = .345 */
2063 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2065 mov r2, r2, lsl #8 /* r2 = 543. */
2066 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2074 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2084 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2086 ldrb r3, [r1] /* r3 = ...0 */
2087 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2088 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2090 mov r3, r3, lsl #8 /* r3 = ..0. */
2091 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2092 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2094 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2095 mov r1, r1, lsl #24 /* r1 = 5... */
2096 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2104 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2106 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2107 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2109 mov r3, r2, lsr #24 /* r3 = ...0 */
2111 mov r2, r2, lsl #8 /* r2 = 123. */
2112 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2115 mov r2, r2, lsr #8 /* r2 = .321 */
2116 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2117 mov r1, r1, lsr #8 /* r1 = ...5 */
2120 strb r1, [r0, #0x05]
2125 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2128 ldrh r3, [r1, #0x01]
2129 ldrh ip, [r1, #0x03]
2130 ldrb r1, [r1, #0x05]
2132 strh r3, [r0, #0x01]
2133 strh ip, [r0, #0x03]
2134 strb r1, [r0, #0x05]
2139 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2141 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2142 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2144 mov r3, r2, lsr #8 /* r3 = ...0 */
2146 mov r2, r2, lsl #24 /* r2 = 1... */
2147 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2150 mov r2, r2, lsr #8 /* r2 = ...1 */
2151 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2152 mov r1, r1, lsr #24 /* r1 = ...5 */
2155 strb r1, [r0, #0x05]
2160 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2164 ldrb r1, [r1, #0x05]
2167 strb r1, [r0, #0x05]
2172 /******************************************************************************
2173 * Special case for 8 byte copies
2175 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2176 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2180 orr r2, r2, r0, lsl #2
2183 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2186 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2196 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2198 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2199 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2200 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2202 mov r3, r3, lsl #8 /* r3 = 012. */
2203 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2204 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2206 mov r3, r3, lsr #8 /* r3 = .210 */
2207 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2208 mov r1, r1, lsl #24 /* r1 = 7... */
2209 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2217 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2219 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2220 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2221 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2223 mov r2, r2, lsl #16 /* r2 = 01.. */
2224 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2225 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2227 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2228 mov r3, r3, lsr #16 /* r3 = ..54 */
2229 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2237 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2239 ldrb r3, [r1] /* r3 = ...0 */
2240 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2241 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2243 mov r3, r3, lsl #24 /* r3 = 0... */
2244 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2245 mov r2, r2, lsl #24 /* r2 = 4... */
2246 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2248 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2249 mov r2, r2, lsr #24 /* r2 = ...4 */
2250 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2258 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2260 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2261 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2263 mov r1, r3, lsr #24 /* r1 = ...0 */
2265 mov r1, r3, lsr #8 /* r1 = .012 */
2266 strb r2, [r0, #0x07]
2267 mov r3, r3, lsl #24 /* r3 = 3... */
2268 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2271 mov r1, r2, lsr #24 /* r1 = ...7 */
2272 strb r1, [r0, #0x07]
2273 mov r1, r3, lsr #8 /* r1 = .321 */
2274 mov r3, r3, lsr #24 /* r3 = ...3 */
2275 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2277 strh r1, [r0, #0x01]
2283 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2286 ldrh r3, [r1, #0x01]
2288 ldrb r1, [r1, #0x07]
2290 strh r3, [r0, #0x01]
2292 strb r1, [r0, #0x07]
2297 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2299 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2300 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2301 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2303 mov ip, r2, lsr #8 /* ip = ...0 */
2305 mov ip, r2, lsl #8 /* ip = .01. */
2306 orr ip, ip, r3, lsr #24 /* ip = .012 */
2307 strb r1, [r0, #0x07]
2308 mov r3, r3, lsl #8 /* r3 = 345. */
2309 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2311 strb r2, [r0] /* 0 */
2312 mov ip, r1, lsr #8 /* ip = ...7 */
2313 strb ip, [r0, #0x07] /* 7 */
2314 mov ip, r2, lsr #8 /* ip = ...1 */
2315 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2316 mov r3, r3, lsr #8 /* r3 = .543 */
2317 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2319 strh ip, [r0, #0x01]
2325 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2327 ldrb r3, [r1] /* r3 = ...0 */
2328 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2329 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2330 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2332 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2334 strh r3, [r0, #0x01]
2335 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2337 strh ip, [r0, #0x01]
2338 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2341 strb r1, [r0, #0x07]
2346 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2348 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2349 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2350 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2353 mov r1, r3, lsr #16 /* r1 = ..45 */
2354 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2357 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2358 mov r3, r3, lsr #16 /* r3 = ..76 */
2361 strh r3, [r0, #0x06]
2366 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2368 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2369 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2370 ldrb ip, [r1, #0x07] /* ip = ...7 */
2371 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2374 mov r1, r2, lsl #24 /* r1 = 2... */
2375 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2376 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2378 mov r1, r2, lsr #24 /* r1 = ...2 */
2379 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2380 mov r3, r3, lsr #24 /* r3 = ...6 */
2381 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2384 strh r3, [r0, #0x06]
2389 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2393 ldrh r3, [r1, #0x06]
2396 strh r3, [r0, #0x06]
2401 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2403 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2404 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2405 ldrb ip, [r1] /* ip = ...0 */
2406 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2407 strh r1, [r0, #0x06]
2409 mov r3, r3, lsr #24 /* r3 = ...5 */
2410 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2411 mov r2, r2, lsr #24 /* r2 = ...1 */
2412 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2414 mov r3, r3, lsl #24 /* r3 = 5... */
2415 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2416 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2424 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2426 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2427 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2428 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2429 strh r1, [r0, #0x05]
2431 strb r3, [r0, #0x07]
2432 mov r1, r2, lsr #24 /* r1 = ...0 */
2434 mov r2, r2, lsl #8 /* r2 = 123. */
2435 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2439 mov r1, r3, lsr #24 /* r1 = ...7 */
2440 strb r1, [r0, #0x07]
2441 mov r2, r2, lsr #8 /* r2 = .321 */
2442 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2449 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2451 ldrb r3, [r1] /* r3 = ...0 */
2452 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2453 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2454 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2456 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2458 strh ip, [r0, #0x05]
2459 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2461 strh r3, [r0, #0x05]
2462 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2465 strb r1, [r0, #0x07]
2470 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2472 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2473 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2474 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2476 mov ip, r2, lsr #8 /* ip = ...0 */
2478 mov ip, r2, lsl #24 /* ip = 1... */
2479 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2480 strb r1, [r0, #0x07]
2481 mov r1, r1, lsr #8 /* r1 = ...6 */
2482 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2485 mov ip, r2, lsr #8 /* ip = ...1 */
2486 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2487 mov r2, r1, lsr #8 /* r2 = ...7 */
2488 strb r2, [r0, #0x07]
2489 mov r1, r1, lsl #8 /* r1 = .76. */
2490 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2493 strh r1, [r0, #0x05]
2498 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2502 ldrh r3, [r1, #0x05]
2503 ldrb r1, [r1, #0x07]
2506 strh r3, [r0, #0x05]
2507 strb r1, [r0, #0x07]
2511 /******************************************************************************
2512 * Special case for 12 byte copies
2514 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2515 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2519 orr r2, r2, r0, lsl #2
2522 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2525 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2537 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2539 ldrb r2, [r1, #0xb] /* r2 = ...B */
2540 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2541 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2542 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2544 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2546 mov r2, ip, lsr #24 /* r2 = ...7 */
2547 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2548 mov r1, r1, lsl #8 /* r1 = 012. */
2549 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2551 mov r2, r2, lsl #24 /* r2 = B... */
2552 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2554 mov r2, ip, lsl #24 /* r2 = 7... */
2555 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2556 mov r1, r1, lsr #8 /* r1 = .210 */
2557 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2565 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2567 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2568 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2569 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2570 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2572 mov r2, r2, lsl #16 /* r2 = 01.. */
2573 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2575 mov r3, r3, lsl #16 /* r3 = 45.. */
2576 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2577 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2579 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2581 mov r3, r3, lsr #16 /* r3 = ..54 */
2582 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2583 mov r1, r1, lsl #16 /* r1 = BA.. */
2584 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2592 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2594 ldrb r2, [r1] /* r2 = ...0 */
2595 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2596 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2597 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2599 mov r2, r2, lsl #24 /* r2 = 0... */
2600 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2602 mov r3, r3, lsl #24 /* r3 = 4... */
2603 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2604 mov r1, r1, lsr #8 /* r1 = .9AB */
2605 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2607 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2609 mov r3, r3, lsr #24 /* r3 = ...4 */
2610 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2611 mov r1, r1, lsl #8 /* r1 = BA9. */
2612 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2620 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2622 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2623 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2624 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2625 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2626 strh r1, [r0, #0x01]
2628 mov r1, r2, lsr #24 /* r1 = ...0 */
2630 mov r1, r2, lsl #24 /* r1 = 3... */
2631 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2632 mov r1, r3, lsl #24 /* r1 = 7... */
2633 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2636 mov r1, r2, lsr #24 /* r1 = ...3 */
2637 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2638 mov r1, r3, lsr #24 /* r1 = ...7 */
2639 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2640 mov ip, ip, lsr #24 /* ip = ...B */
2644 strb ip, [r0, #0x0b]
2649 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2652 ldrh r3, [r1, #0x01]
2656 ldrb r1, [r1, #0x0b]
2657 strh r3, [r0, #0x01]
2660 strb r1, [r0, #0x0b]
2665 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2667 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2668 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2669 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2670 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2672 mov r2, r2, ror #8 /* r2 = 1..0 */
2674 mov r2, r2, lsr #16 /* r2 = ..1. */
2675 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2676 strh r2, [r0, #0x01]
2677 mov r2, r3, lsl #8 /* r2 = 345. */
2678 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2679 mov r2, ip, lsl #8 /* r2 = 789. */
2680 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2683 mov r2, r2, lsr #8 /* r2 = ...1 */
2684 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2685 strh r2, [r0, #0x01]
2686 mov r2, r3, lsr #8 /* r2 = .543 */
2687 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2688 mov r2, ip, lsr #8 /* r2 = .987 */
2689 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2690 mov r1, r1, lsr #8 /* r1 = ...B */
2694 strb r1, [r0, #0x0b]
2699 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2702 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2703 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2704 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2707 mov r2, r3, lsr #16 /* r2 = ..12 */
2708 strh r2, [r0, #0x01]
2709 mov r3, r3, lsl #16 /* r3 = 34.. */
2710 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2711 mov ip, ip, lsl #16 /* ip = 78.. */
2712 orr ip, ip, r1, lsr #16 /* ip = 789A */
2713 mov r1, r1, lsr #8 /* r1 = .9AB */
2715 strh r3, [r0, #0x01]
2716 mov r3, r3, lsr #16 /* r3 = ..43 */
2717 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2718 mov ip, ip, lsr #16 /* ip = ..87 */
2719 orr ip, ip, r1, lsl #16 /* ip = A987 */
2720 mov r1, r1, lsr #16 /* r1 = ..xB */
2724 strb r1, [r0, #0x0b]
2729 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2731 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2732 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2733 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2734 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2737 mov r1, ip, lsl #16 /* r1 = 23.. */
2738 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2739 mov r3, r3, lsl #16 /* r3 = 67.. */
2740 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2743 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2744 mov r3, r3, lsr #16 /* r3 = ..76 */
2745 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2746 mov r2, r2, lsr #16 /* r2 = ..BA */
2750 strh r2, [r0, #0x0a]
2755 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2757 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2758 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2759 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2761 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2762 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2764 mov r2, r2, lsl #24 /* r2 = 2... */
2765 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2766 mov r3, r3, lsl #24 /* r3 = 6... */
2767 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2768 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2770 mov r2, r2, lsr #24 /* r2 = ...2 */
2771 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2772 mov r3, r3, lsr #24 /* r3 = ...6 */
2773 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2774 mov r1, r1, lsl #8 /* r1 = ..B. */
2775 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2779 strh r1, [r0, #0x0a]
2784 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2789 ldrh r1, [r1, #0x0a]
2793 strh r1, [r0, #0x0a]
2798 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2800 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2801 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2802 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2803 strh ip, [r0, #0x0a]
2804 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2805 ldrb r1, [r1] /* r1 = ...0 */
2807 mov r2, r2, lsr #24 /* r2 = ...9 */
2808 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2809 mov r3, r3, lsr #24 /* r3 = ...5 */
2810 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2811 mov r1, r1, lsl #8 /* r1 = ..0. */
2812 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2814 mov r2, r2, lsl #24 /* r2 = 9... */
2815 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2816 mov r3, r3, lsl #24 /* r3 = 5... */
2817 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2818 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2827 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2829 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2830 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2831 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2833 mov r3, r2, lsr #24 /* r3 = ...0 */
2835 mov r2, r2, lsl #8 /* r2 = 123. */
2836 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2838 mov r2, ip, lsl #8 /* r2 = 567. */
2839 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2841 mov r2, r1, lsr #8 /* r2 = ..9A */
2842 strh r2, [r0, #0x09]
2843 strb r1, [r0, #0x0b]
2846 mov r3, r2, lsr #8 /* r3 = .321 */
2847 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2849 mov r3, ip, lsr #8 /* r3 = .765 */
2850 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2852 mov r1, r1, lsr #8 /* r1 = .BA9 */
2853 strh r1, [r0, #0x09]
2854 mov r1, r1, lsr #16 /* r1 = ...B */
2855 strb r1, [r0, #0x0b]
2861 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2863 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2864 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2865 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2866 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2867 strb r2, [r0, #0x0b]
2869 strh r3, [r0, #0x09]
2870 mov r3, r3, lsr #16 /* r3 = ..78 */
2871 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2872 mov ip, ip, lsr #16 /* ip = ..34 */
2873 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2874 mov r1, r1, lsr #16 /* r1 = ..x0 */
2876 mov r2, r3, lsr #16 /* r2 = ..A9 */
2877 strh r2, [r0, #0x09]
2878 mov r3, r3, lsl #16 /* r3 = 87.. */
2879 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2880 mov ip, ip, lsl #16 /* ip = 43.. */
2881 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2882 mov r1, r1, lsr #8 /* r1 = .210 */
2891 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2894 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2895 ldr ip, [r1, #0x06] /* ip = 6789 */
2896 ldr r3, [r1, #0x02] /* r3 = 2345 */
2897 ldrh r1, [r1] /* r1 = ..01 */
2898 strb r2, [r0, #0x0b]
2899 mov r2, r2, lsr #8 /* r2 = ...A */
2900 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2901 mov ip, ip, lsr #8 /* ip = .678 */
2902 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2903 mov r3, r3, lsr #8 /* r3 = .234 */
2904 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2905 mov r1, r1, lsr #8 /* r1 = ...0 */
2909 strh r2, [r0, #0x09]
2911 ldrh r2, [r1] /* r2 = ..10 */
2912 ldr r3, [r1, #0x02] /* r3 = 5432 */
2913 ldr ip, [r1, #0x06] /* ip = 9876 */
2914 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2916 mov r2, r2, lsr #8 /* r2 = ...1 */
2917 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2918 mov r3, r3, lsr #24 /* r3 = ...5 */
2919 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2920 mov ip, ip, lsr #24 /* ip = ...9 */
2921 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2922 mov r1, r1, lsr #8 /* r1 = ...B */
2925 strh ip, [r0, #0x09]
2926 strb r1, [r0, #0x0b]
2932 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2938 ldrh r2, [r1, #0x09]
2939 ldrb r1, [r1, #0x0b]
2942 strh r2, [r0, #0x09]
2943 strb r1, [r0, #0x0b]
2946 #endif /* _ARM_ARCH_5E */