2 * Copyright (c) 2004 Olivier Houchard
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
76 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
77 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
78 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
79 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
80 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
81 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
82 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
83 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
84 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
85 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
86 * POSSIBILITY OF SUCH DAMAGE.
89 #include <machine/asm.h>
90 #include <machine/asmacros.h>
91 __FBSDID("$FreeBSD$");
96 .word _C_LABEL(_arm_memcpy)
98 .word _C_LABEL(_arm_bzero)
100 .word _C_LABEL(_min_memcpy_size)
102 .word _C_LABEL(_min_bzero_size)
104 * memset: Sets a block of memory to the specified value
109 * r2 - number of bytes to write
114 /* LINTSTUB: Func: void bzero(void *, size_t) */
120 ldr r2, .L_min_bzero_size
124 stmfd sp!, {r0, r1, lr}
129 ldmfd sp!, {r0, r1, lr}
135 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
137 and r3, r1, #0xff /* We deal with bytes */
140 cmp r1, #0x04 /* Do we have less than 4 bytes */
142 blt .Lmemset_lessthanfour
144 /* Ok first we will word align the address */
145 ands r2, ip, #0x03 /* Get the bottom two bits */
146 bne .Lmemset_wordunaligned /* The address is not word aligned */
148 /* We are now word aligned */
149 .Lmemset_wordaligned:
150 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
152 tst ip, #0x04 /* Quad-align for armv5e */
156 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
158 subne r1, r1, #0x04 /* Quad-align if necessary */
159 strne r3, [ip], #0x04
162 blt .Lmemset_loop4 /* If less than 16 then use words */
163 mov r2, r3 /* Duplicate data */
164 cmp r1, #0x80 /* If < 128 then skip the big loop */
167 /* Do 128 bytes at a time */
171 strged r2, [ip], #0x08
172 strged r2, [ip], #0x08
173 strged r2, [ip], #0x08
174 strged r2, [ip], #0x08
175 strged r2, [ip], #0x08
176 strged r2, [ip], #0x08
177 strged r2, [ip], #0x08
178 strged r2, [ip], #0x08
179 strged r2, [ip], #0x08
180 strged r2, [ip], #0x08
181 strged r2, [ip], #0x08
182 strged r2, [ip], #0x08
183 strged r2, [ip], #0x08
184 strged r2, [ip], #0x08
185 strged r2, [ip], #0x08
186 strged r2, [ip], #0x08
206 RETeq /* Zero length so just exit */
208 add r1, r1, #0x80 /* Adjust for extra sub */
210 /* Do 32 bytes at a time */
214 strged r2, [ip], #0x08
215 strged r2, [ip], #0x08
216 strged r2, [ip], #0x08
217 strged r2, [ip], #0x08
225 RETeq /* Zero length so just exit */
227 adds r1, r1, #0x10 /* Partially adjust for extra sub */
229 /* Deal with 16 bytes or more */
231 strged r2, [ip], #0x08
232 strged r2, [ip], #0x08
237 RETeq /* Zero length so just exit */
239 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
241 /* We have at least 4 bytes so copy as words */
244 strge r3, [ip], #0x04
246 RETeq /* Zero length so just exit */
249 /* Compensate for 64-bit alignment check */
257 strb r3, [ip], #0x01 /* Set 1 byte */
258 strgeb r3, [ip], #0x01 /* Set another byte */
259 strgtb r3, [ip] /* and a third */
262 .Lmemset_wordunaligned:
264 strb r3, [ip], #0x01 /* Set 1 byte */
266 strgeb r3, [ip], #0x01 /* Set another byte */
268 strgtb r3, [ip], #0x01 /* and a third */
269 cmp r1, #0x04 /* More than 4 bytes left? */
270 bge .Lmemset_wordaligned /* Yup */
272 .Lmemset_lessthanfour:
274 RETeq /* Zero length so exit */
275 strb r3, [ip], #0x01 /* Set 1 byte */
277 strgeb r3, [ip], #0x01 /* Set another byte */
278 strgtb r3, [ip] /* and a third */
287 /* Are both addresses aligned the same way? */
290 RETeq /* len == 0, or same addresses! */
293 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
295 /* Word-align the addresses, if necessary */
298 add r3, r3, r3, lsl #1
299 addne pc, pc, r3, lsl #3
302 /* Compare up to 3 bytes */
310 /* Compare up to 2 bytes */
326 /* Compare 4 bytes at a time, if possible */
328 bcc .Lmemcmp_bytewise
329 .Lmemcmp_word_aligned:
334 beq .Lmemcmp_word_aligned
337 /* Correct for extra subtraction, and check if done */
339 cmpeq r0, #0x00 /* If done, did all bytes match? */
340 RETeq /* Yup. Just return */
342 /* Re-do the final word byte-wise */
353 beq .Lmemcmp_bytewise2
358 * 6 byte compares are very common, thanks to the network stack.
359 * This code is hand-scheduled to reduce the number of stalls for
360 * load results. Everything else being equal, this will be ~32%
361 * faster than a byte-wise memcmp.
365 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
366 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
367 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
368 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
369 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */
370 RETne /* Return if mismatch on #0 */
371 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
372 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */
373 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */
374 RETne /* Return if mismatch on #1 */
375 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
376 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
377 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */
378 RETne /* Return if mismatch on #2 */
379 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
380 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */
381 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */
382 RETne /* Return if mismatch on #3 */
383 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
384 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
385 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */
386 RETne /* Return if mismatch on #4 */
387 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
391 /* switch the source and destination registers */
396 /* Do the buffers overlap? */
398 RETeq /* Bail now if src/dst are the same */
399 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
400 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
401 cmp r3, r2 /* if (r3 < len) we have an overlap */
402 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
404 /* Determine copy direction */
406 bcc .Lmemmove_backwards
408 moveq r0, #0 /* Quick abort for len=0 */
411 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
413 blt .Lmemmove_fl4 /* less than 4 bytes */
415 bne .Lmemmove_fdestul /* oh unaligned destination addr */
417 bne .Lmemmove_fsrcul /* oh unaligned source addr */
420 /* We have aligned source and destination */
422 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
424 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
425 stmdb sp!, {r4} /* borrow r4 */
427 /* blat 32 bytes at a time */
428 /* XXX for really big copies perhaps we should use more registers */
430 ldmia r1!, {r3, r4, r12, lr}
431 stmia r0!, {r3, r4, r12, lr}
432 ldmia r1!, {r3, r4, r12, lr}
433 stmia r0!, {r3, r4, r12, lr}
435 bge .Lmemmove_floop32
438 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
439 stmgeia r0!, {r3, r4, r12, lr}
441 ldmia sp!, {r4} /* return r4 */
446 /* blat 12 bytes at a time */
448 ldmgeia r1!, {r3, r12, lr}
449 stmgeia r0!, {r3, r12, lr}
451 bge .Lmemmove_floop12
460 ldmgeia r1!, {r3, r12}
461 stmgeia r0!, {r3, r12}
465 /* less than 4 bytes to go */
467 ldmeqia sp!, {r0, pc} /* done */
469 /* copy the crud byte at a time */
479 /* erg - unaligned destination */
484 /* align destination with byte copies */
492 blt .Lmemmove_fl4 /* less the 4 bytes */
495 beq .Lmemmove_ft8 /* we have an aligned source */
497 /* erg - unaligned source */
498 /* This is where it gets nasty ... */
503 bgt .Lmemmove_fsrcul3
504 beq .Lmemmove_fsrcul2
506 blt .Lmemmove_fsrcul1loop4
510 .Lmemmove_fsrcul1loop16:
516 ldmia r1!, {r4, r5, r12, lr}
518 orr r3, r3, r4, lsr #24
520 orr r4, r4, r5, lsr #24
522 orr r5, r5, r12, lsr #24
524 orr r12, r12, lr, lsr #24
526 orr r3, r3, r4, lsl #24
528 orr r4, r4, r5, lsl #24
530 orr r5, r5, r12, lsl #24
532 orr r12, r12, lr, lsl #24
534 stmia r0!, {r3-r5, r12}
536 bge .Lmemmove_fsrcul1loop16
539 blt .Lmemmove_fsrcul1l4
541 .Lmemmove_fsrcul1loop4:
549 orr r12, r12, lr, lsr #24
551 orr r12, r12, lr, lsl #24
555 bge .Lmemmove_fsrcul1loop4
563 blt .Lmemmove_fsrcul2loop4
567 .Lmemmove_fsrcul2loop16:
573 ldmia r1!, {r4, r5, r12, lr}
575 orr r3, r3, r4, lsr #16
577 orr r4, r4, r5, lsr #16
579 orr r5, r5, r12, lsr #16
580 mov r12, r12, lsl #16
581 orr r12, r12, lr, lsr #16
583 orr r3, r3, r4, lsl #16
585 orr r4, r4, r5, lsl #16
587 orr r5, r5, r12, lsl #16
588 mov r12, r12, lsr #16
589 orr r12, r12, lr, lsl #16
591 stmia r0!, {r3-r5, r12}
593 bge .Lmemmove_fsrcul2loop16
596 blt .Lmemmove_fsrcul2l4
598 .Lmemmove_fsrcul2loop4:
606 orr r12, r12, lr, lsr #16
608 orr r12, r12, lr, lsl #16
612 bge .Lmemmove_fsrcul2loop4
620 blt .Lmemmove_fsrcul3loop4
624 .Lmemmove_fsrcul3loop16:
630 ldmia r1!, {r4, r5, r12, lr}
632 orr r3, r3, r4, lsr #8
634 orr r4, r4, r5, lsr #8
636 orr r5, r5, r12, lsr #8
637 mov r12, r12, lsl #24
638 orr r12, r12, lr, lsr #8
640 orr r3, r3, r4, lsl #8
642 orr r4, r4, r5, lsl #8
644 orr r5, r5, r12, lsl #8
645 mov r12, r12, lsr #24
646 orr r12, r12, lr, lsl #8
648 stmia r0!, {r3-r5, r12}
650 bge .Lmemmove_fsrcul3loop16
653 blt .Lmemmove_fsrcul3l4
655 .Lmemmove_fsrcul3loop4:
663 orr r12, r12, lr, lsr #8
665 orr r12, r12, lr, lsl #8
669 bge .Lmemmove_fsrcul3loop4
679 blt .Lmemmove_bl4 /* less than 4 bytes */
681 bne .Lmemmove_bdestul /* oh unaligned destination addr */
683 bne .Lmemmove_bsrcul /* oh unaligned source addr */
686 /* We have aligned source and destination */
688 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
690 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
693 /* blat 32 bytes at a time */
694 /* XXX for really big copies perhaps we should use more registers */
696 ldmdb r1!, {r3, r4, r12, lr}
697 stmdb r0!, {r3, r4, r12, lr}
698 ldmdb r1!, {r3, r4, r12, lr}
699 stmdb r0!, {r3, r4, r12, lr}
701 bge .Lmemmove_bloop32
705 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
706 stmgedb r0!, {r3, r4, r12, lr}
709 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
710 stmgedb r0!, {r3, r12, lr}
720 ldmgedb r1!, {r3, r12}
721 stmgedb r0!, {r3, r12}
725 /* less than 4 bytes to go */
729 /* copy the crud byte at a time */
733 ldrgeb r3, [r1, #-1]!
734 strgeb r3, [r0, #-1]!
735 ldrgtb r3, [r1, #-1]!
736 strgtb r3, [r0, #-1]!
739 /* erg - unaligned destination */
743 /* align destination with byte copies */
746 ldrgeb r3, [r1, #-1]!
747 strgeb r3, [r0, #-1]!
748 ldrgtb r3, [r1, #-1]!
749 strgtb r3, [r0, #-1]!
751 blt .Lmemmove_bl4 /* less than 4 bytes to go */
753 beq .Lmemmove_bt8 /* we have an aligned source */
755 /* erg - unaligned source */
756 /* This is where it gets nasty ... */
761 blt .Lmemmove_bsrcul1
762 beq .Lmemmove_bsrcul2
764 blt .Lmemmove_bsrcul3loop4
766 stmdb sp!, {r4, r5, lr}
768 .Lmemmove_bsrcul3loop16:
774 ldmdb r1!, {r3-r5, r12}
776 orr lr, lr, r12, lsl #24
778 orr r12, r12, r5, lsl #24
780 orr r5, r5, r4, lsl #24
782 orr r4, r4, r3, lsl #24
784 orr lr, lr, r12, lsr #24
786 orr r12, r12, r5, lsr #24
788 orr r5, r5, r4, lsr #24
790 orr r4, r4, r3, lsr #24
792 stmdb r0!, {r4, r5, r12, lr}
794 bge .Lmemmove_bsrcul3loop16
795 ldmia sp!, {r4, r5, lr}
797 blt .Lmemmove_bsrcul3l4
799 .Lmemmove_bsrcul3loop4:
807 orr r12, r12, r3, lsl #24
809 orr r12, r12, r3, lsr #24
813 bge .Lmemmove_bsrcul3loop4
821 blt .Lmemmove_bsrcul2loop4
823 stmdb sp!, {r4, r5, lr}
825 .Lmemmove_bsrcul2loop16:
831 ldmdb r1!, {r3-r5, r12}
833 orr lr, lr, r12, lsl #16
834 mov r12, r12, lsr #16
835 orr r12, r12, r5, lsl #16
837 orr r5, r5, r4, lsl #16
839 orr r4, r4, r3, lsl #16
841 orr lr, lr, r12, lsr #16
842 mov r12, r12, lsl #16
843 orr r12, r12, r5, lsr #16
845 orr r5, r5, r4, lsr #16
847 orr r4, r4, r3, lsr #16
849 stmdb r0!, {r4, r5, r12, lr}
851 bge .Lmemmove_bsrcul2loop16
852 ldmia sp!, {r4, r5, lr}
854 blt .Lmemmove_bsrcul2l4
856 .Lmemmove_bsrcul2loop4:
864 orr r12, r12, r3, lsl #16
866 orr r12, r12, r3, lsr #16
870 bge .Lmemmove_bsrcul2loop4
878 blt .Lmemmove_bsrcul1loop4
880 stmdb sp!, {r4, r5, lr}
882 .Lmemmove_bsrcul1loop32:
888 ldmdb r1!, {r3-r5, r12}
890 orr lr, lr, r12, lsl #8
891 mov r12, r12, lsr #24
892 orr r12, r12, r5, lsl #8
894 orr r5, r5, r4, lsl #8
896 orr r4, r4, r3, lsl #8
898 orr lr, lr, r12, lsr #8
899 mov r12, r12, lsl #24
900 orr r12, r12, r5, lsr #8
902 orr r5, r5, r4, lsr #8
904 orr r4, r4, r3, lsr #8
906 stmdb r0!, {r4, r5, r12, lr}
908 bge .Lmemmove_bsrcul1loop32
909 ldmia sp!, {r4, r5, lr}
911 blt .Lmemmove_bsrcul1l4
913 .Lmemmove_bsrcul1loop4:
921 orr r12, r12, r3, lsl #8
923 orr r12, r12, r3, lsr #8
927 bge .Lmemmove_bsrcul1loop4
933 #if !defined(_ARM_ARCH_5E)
935 /* save leaf functions having to store this away */
936 /* Do not check arm_memcpy if we're running from flash */
938 #if FLASHADDR > PHYSADDR
948 ldr r3, .L_arm_memcpy
952 ldr r3, .L_min_memcpy_size
956 stmfd sp!, {r0-r2, r4, lr}
958 ldr r4, .L_arm_memcpy
962 ldmfd sp!, {r0-r2, r4, lr}
966 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
969 blt .Lmemcpy_l4 /* less than 4 bytes */
971 bne .Lmemcpy_destul /* oh unaligned destination addr */
973 bne .Lmemcpy_srcul /* oh unaligned source addr */
976 /* We have aligned source and destination */
978 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
980 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
981 stmdb sp!, {r4} /* borrow r4 */
983 /* blat 32 bytes at a time */
984 /* XXX for really big copies perhaps we should use more registers */
986 ldmia r1!, {r3, r4, r12, lr}
987 stmia r0!, {r3, r4, r12, lr}
988 ldmia r1!, {r3, r4, r12, lr}
989 stmia r0!, {r3, r4, r12, lr}
994 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
995 stmgeia r0!, {r3, r4, r12, lr}
997 ldmia sp!, {r4} /* return r4 */
1002 /* blat 12 bytes at a time */
1004 ldmgeia r1!, {r3, r12, lr}
1005 stmgeia r0!, {r3, r12, lr}
1006 subges r2, r2, #0x0c
1016 ldmgeia r1!, {r3, r12}
1017 stmgeia r0!, {r3, r12}
1021 /* less than 4 bytes to go */
1024 ldmeqia sp!, {r0, pc}^ /* done */
1026 ldmeqia sp!, {r0, pc} /* done */
1028 /* copy the crud byte at a time */
1038 /* erg - unaligned destination */
1043 /* align destination with byte copies */
1051 blt .Lmemcpy_l4 /* less the 4 bytes */
1054 beq .Lmemcpy_t8 /* we have an aligned source */
1056 /* erg - unaligned source */
1057 /* This is where it gets nasty ... */
1065 blt .Lmemcpy_srcul1loop4
1069 .Lmemcpy_srcul1loop16:
1071 ldmia r1!, {r4, r5, r12, lr}
1072 orr r3, r3, r4, lsl #24
1074 orr r4, r4, r5, lsl #24
1076 orr r5, r5, r12, lsl #24
1077 mov r12, r12, lsr #8
1078 orr r12, r12, lr, lsl #24
1079 stmia r0!, {r3-r5, r12}
1081 bge .Lmemcpy_srcul1loop16
1084 blt .Lmemcpy_srcul1l4
1086 .Lmemcpy_srcul1loop4:
1089 orr r12, r12, lr, lsl #24
1092 bge .Lmemcpy_srcul1loop4
1100 blt .Lmemcpy_srcul2loop4
1104 .Lmemcpy_srcul2loop16:
1106 ldmia r1!, {r4, r5, r12, lr}
1107 orr r3, r3, r4, lsl #16
1109 orr r4, r4, r5, lsl #16
1111 orr r5, r5, r12, lsl #16
1112 mov r12, r12, lsr #16
1113 orr r12, r12, lr, lsl #16
1114 stmia r0!, {r3-r5, r12}
1116 bge .Lmemcpy_srcul2loop16
1119 blt .Lmemcpy_srcul2l4
1121 .Lmemcpy_srcul2loop4:
1122 mov r12, lr, lsr #16
1124 orr r12, r12, lr, lsl #16
1127 bge .Lmemcpy_srcul2loop4
1135 blt .Lmemcpy_srcul3loop4
1139 .Lmemcpy_srcul3loop16:
1141 ldmia r1!, {r4, r5, r12, lr}
1142 orr r3, r3, r4, lsl #8
1144 orr r4, r4, r5, lsl #8
1146 orr r5, r5, r12, lsl #8
1147 mov r12, r12, lsr #24
1148 orr r12, r12, lr, lsl #8
1149 stmia r0!, {r3-r5, r12}
1151 bge .Lmemcpy_srcul3loop16
1154 blt .Lmemcpy_srcul3l4
1156 .Lmemcpy_srcul3loop4:
1157 mov r12, lr, lsr #24
1159 orr r12, r12, lr, lsl #8
1162 bge .Lmemcpy_srcul3loop4
1168 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1172 ble .Lmemcpy_short /* <= 12 bytes */
1174 #if FLASHADDR > PHYSADDR
1184 ldr r3, .L_arm_memcpy
1188 ldr r3, .L_min_memcpy_size
1192 stmfd sp!, {r0-r2, r4, lr}
1194 ldr r4, .L_arm_memcpy
1198 ldmfd sp!, {r0-r2, r4, lr}
1201 mov r3, r0 /* We must not clobber r0 */
1203 /* Word-align the destination buffer */
1204 ands ip, r3, #0x03 /* Already word aligned? */
1205 beq .Lmemcpy_wordaligned /* Yup */
1207 ldrb ip, [r1], #0x01
1209 strb ip, [r3], #0x01
1210 ldrleb ip, [r1], #0x01
1212 strleb ip, [r3], #0x01
1213 ldrltb ip, [r1], #0x01
1215 strltb ip, [r3], #0x01
1217 /* Destination buffer is now word aligned */
1218 .Lmemcpy_wordaligned:
1219 ands ip, r1, #0x03 /* Is src also word-aligned? */
1220 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1222 /* Quad-align the destination buffer */
1223 tst r3, #0x07 /* Already quad aligned? */
1224 ldrne ip, [r1], #0x04
1225 stmfd sp!, {r4-r9} /* Free up some registers */
1227 strne ip, [r3], #0x04
1229 /* Destination buffer quad aligned, source is at least word aligned */
1231 blt .Lmemcpy_w_lessthan128
1233 /* Copy 128 bytes at a time */
1235 ldr r4, [r1], #0x04 /* LD:00-03 */
1236 ldr r5, [r1], #0x04 /* LD:04-07 */
1237 pld [r1, #0x18] /* Prefetch 0x20 */
1238 ldr r6, [r1], #0x04 /* LD:08-0b */
1239 ldr r7, [r1], #0x04 /* LD:0c-0f */
1240 ldr r8, [r1], #0x04 /* LD:10-13 */
1241 ldr r9, [r1], #0x04 /* LD:14-17 */
1242 strd r4, [r3], #0x08 /* ST:00-07 */
1243 ldr r4, [r1], #0x04 /* LD:18-1b */
1244 ldr r5, [r1], #0x04 /* LD:1c-1f */
1245 strd r6, [r3], #0x08 /* ST:08-0f */
1246 ldr r6, [r1], #0x04 /* LD:20-23 */
1247 ldr r7, [r1], #0x04 /* LD:24-27 */
1248 pld [r1, #0x18] /* Prefetch 0x40 */
1249 strd r8, [r3], #0x08 /* ST:10-17 */
1250 ldr r8, [r1], #0x04 /* LD:28-2b */
1251 ldr r9, [r1], #0x04 /* LD:2c-2f */
1252 strd r4, [r3], #0x08 /* ST:18-1f */
1253 ldr r4, [r1], #0x04 /* LD:30-33 */
1254 ldr r5, [r1], #0x04 /* LD:34-37 */
1255 strd r6, [r3], #0x08 /* ST:20-27 */
1256 ldr r6, [r1], #0x04 /* LD:38-3b */
1257 ldr r7, [r1], #0x04 /* LD:3c-3f */
1258 strd r8, [r3], #0x08 /* ST:28-2f */
1259 ldr r8, [r1], #0x04 /* LD:40-43 */
1260 ldr r9, [r1], #0x04 /* LD:44-47 */
1261 pld [r1, #0x18] /* Prefetch 0x60 */
1262 strd r4, [r3], #0x08 /* ST:30-37 */
1263 ldr r4, [r1], #0x04 /* LD:48-4b */
1264 ldr r5, [r1], #0x04 /* LD:4c-4f */
1265 strd r6, [r3], #0x08 /* ST:38-3f */
1266 ldr r6, [r1], #0x04 /* LD:50-53 */
1267 ldr r7, [r1], #0x04 /* LD:54-57 */
1268 strd r8, [r3], #0x08 /* ST:40-47 */
1269 ldr r8, [r1], #0x04 /* LD:58-5b */
1270 ldr r9, [r1], #0x04 /* LD:5c-5f */
1271 strd r4, [r3], #0x08 /* ST:48-4f */
1272 ldr r4, [r1], #0x04 /* LD:60-63 */
1273 ldr r5, [r1], #0x04 /* LD:64-67 */
1274 pld [r1, #0x18] /* Prefetch 0x80 */
1275 strd r6, [r3], #0x08 /* ST:50-57 */
1276 ldr r6, [r1], #0x04 /* LD:68-6b */
1277 ldr r7, [r1], #0x04 /* LD:6c-6f */
1278 strd r8, [r3], #0x08 /* ST:58-5f */
1279 ldr r8, [r1], #0x04 /* LD:70-73 */
1280 ldr r9, [r1], #0x04 /* LD:74-77 */
1281 strd r4, [r3], #0x08 /* ST:60-67 */
1282 ldr r4, [r1], #0x04 /* LD:78-7b */
1283 ldr r5, [r1], #0x04 /* LD:7c-7f */
1284 strd r6, [r3], #0x08 /* ST:68-6f */
1285 strd r8, [r3], #0x08 /* ST:70-77 */
1287 strd r4, [r3], #0x08 /* ST:78-7f */
1288 bge .Lmemcpy_w_loop128
1290 .Lmemcpy_w_lessthan128:
1291 adds r2, r2, #0x80 /* Adjust for extra sub */
1292 ldmeqfd sp!, {r4-r9}
1293 RETeq /* Return now if done */
1295 blt .Lmemcpy_w_lessthan32
1297 /* Copy 32 bytes at a time */
1306 strd r4, [r3], #0x08
1309 strd r6, [r3], #0x08
1310 strd r8, [r3], #0x08
1312 strd r4, [r3], #0x08
1313 bge .Lmemcpy_w_loop32
1315 .Lmemcpy_w_lessthan32:
1316 adds r2, r2, #0x20 /* Adjust for extra sub */
1317 ldmeqfd sp!, {r4-r9}
1318 RETeq /* Return now if done */
1322 addne pc, pc, r4, lsl #1
1325 /* At least 24 bytes remaining */
1329 strd r4, [r3], #0x08
1331 /* At least 16 bytes remaining */
1335 strd r4, [r3], #0x08
1337 /* At least 8 bytes remaining */
1341 strd r4, [r3], #0x08
1343 /* Less than 8 bytes remaining */
1345 RETeq /* Return now if done */
1347 ldrge ip, [r1], #0x04
1348 strge ip, [r3], #0x04
1349 RETeq /* Return now if done */
1351 ldrb ip, [r1], #0x01
1353 ldrgeb r2, [r1], #0x01
1354 strb ip, [r3], #0x01
1356 strgeb r2, [r3], #0x01
1362 * At this point, it has not been possible to word align both buffers.
1363 * The destination buffer is word aligned, but the source buffer is not.
1374 .Lmemcpy_bad1_loop16:
1386 orr r4, r4, r5, lsr #24
1388 orr r5, r5, r6, lsr #24
1390 orr r6, r6, r7, lsr #24
1392 orr r7, r7, ip, lsr #24
1394 orr r4, r4, r5, lsl #24
1396 orr r5, r5, r6, lsl #24
1398 orr r6, r6, r7, lsl #24
1400 orr r7, r7, ip, lsl #24
1408 bge .Lmemcpy_bad1_loop16
1411 ldmeqfd sp!, {r4-r7}
1412 RETeq /* Return now if done */
1415 blt .Lmemcpy_bad_done
1417 .Lmemcpy_bad1_loop4:
1426 orr r4, r4, ip, lsr #24
1428 orr r4, r4, ip, lsl #24
1431 bge .Lmemcpy_bad1_loop4
1435 .Lmemcpy_bad2_loop16:
1447 orr r4, r4, r5, lsr #16
1449 orr r5, r5, r6, lsr #16
1451 orr r6, r6, r7, lsr #16
1453 orr r7, r7, ip, lsr #16
1455 orr r4, r4, r5, lsl #16
1457 orr r5, r5, r6, lsl #16
1459 orr r6, r6, r7, lsl #16
1461 orr r7, r7, ip, lsl #16
1469 bge .Lmemcpy_bad2_loop16
1472 ldmeqfd sp!, {r4-r7}
1473 RETeq /* Return now if done */
1476 blt .Lmemcpy_bad_done
1478 .Lmemcpy_bad2_loop4:
1487 orr r4, r4, ip, lsr #16
1489 orr r4, r4, ip, lsl #16
1492 bge .Lmemcpy_bad2_loop4
1496 .Lmemcpy_bad3_loop16:
1508 orr r4, r4, r5, lsr #8
1510 orr r5, r5, r6, lsr #8
1512 orr r6, r6, r7, lsr #8
1514 orr r7, r7, ip, lsr #8
1516 orr r4, r4, r5, lsl #8
1518 orr r5, r5, r6, lsl #8
1520 orr r6, r6, r7, lsl #8
1522 orr r7, r7, ip, lsl #8
1530 bge .Lmemcpy_bad3_loop16
1533 ldmeqfd sp!, {r4-r7}
1534 RETeq /* Return now if done */
1537 blt .Lmemcpy_bad_done
1539 .Lmemcpy_bad3_loop4:
1548 orr r4, r4, ip, lsr #8
1550 orr r4, r4, ip, lsl #8
1553 bge .Lmemcpy_bad3_loop4
1560 ldrb ip, [r1], #0x01
1562 ldrgeb r2, [r1], #0x01
1563 strb ip, [r3], #0x01
1565 strgeb r2, [r3], #0x01
1571 * Handle short copies (less than 16 bytes), possibly misaligned.
1572 * Some of these are *very* common, thanks to the network stack,
1573 * and so are handled specially.
1576 add pc, pc, r2, lsl #2
1579 b .Lmemcpy_bytewise /* 0x01 */
1580 b .Lmemcpy_bytewise /* 0x02 */
1581 b .Lmemcpy_bytewise /* 0x03 */
1582 b .Lmemcpy_4 /* 0x04 */
1583 b .Lmemcpy_bytewise /* 0x05 */
1584 b .Lmemcpy_6 /* 0x06 */
1585 b .Lmemcpy_bytewise /* 0x07 */
1586 b .Lmemcpy_8 /* 0x08 */
1587 b .Lmemcpy_bytewise /* 0x09 */
1588 b .Lmemcpy_bytewise /* 0x0a */
1589 b .Lmemcpy_bytewise /* 0x0b */
1590 b .Lmemcpy_c /* 0x0c */
1592 mov r3, r0 /* We must not clobber r0 */
1593 ldrb ip, [r1], #0x01
1594 1: subs r2, r2, #0x01
1595 strb ip, [r3], #0x01
1596 ldrneb ip, [r1], #0x01
1600 /******************************************************************************
1601 * Special case for 4 byte copies
1603 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1604 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1608 orr r2, r2, r0, lsl #2
1611 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1614 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1622 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1624 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1625 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1627 mov r3, r3, lsl #8 /* r3 = 012. */
1628 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1630 mov r3, r3, lsr #8 /* r3 = .210 */
1631 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1638 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1642 ldrh r2, [r1, #0x02]
1644 ldrh r3, [r1, #0x02]
1647 orr r3, r2, r3, lsl #16
1653 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1655 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1656 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1658 mov r3, r3, lsl #24 /* r3 = 0... */
1659 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1661 mov r3, r3, lsr #24 /* r3 = ...0 */
1662 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1669 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1673 strb r2, [r0, #0x03]
1681 strb r1, [r0, #0x03]
1683 strh r3, [r0, #0x01]
1688 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1691 ldrh r3, [r1, #0x01]
1692 ldrb r1, [r1, #0x03]
1694 strh r3, [r0, #0x01]
1695 strb r1, [r0, #0x03]
1700 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1702 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1703 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1705 mov r1, r2, lsr #8 /* r1 = ...0 */
1707 mov r2, r2, lsl #8 /* r2 = .01. */
1708 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1711 mov r2, r2, lsr #8 /* r2 = ...1 */
1712 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1713 mov r3, r3, lsr #8 /* r3 = ...3 */
1715 strh r2, [r0, #0x01]
1716 strb r3, [r0, #0x03]
1721 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1724 ldrh r3, [r1, #0x01]
1725 ldrb r1, [r1, #0x03]
1727 strh r3, [r0, #0x01]
1728 strb r1, [r0, #0x03]
1733 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1737 strh r2, [r0, #0x02]
1743 strh r3, [r0, #0x02]
1749 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1751 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1752 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1753 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1756 mov r2, r2, lsl #8 /* r2 = 012. */
1757 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1759 mov r2, r2, lsr #24 /* r2 = ...2 */
1760 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1762 strh r2, [r0, #0x02]
1767 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1770 ldrh r3, [r1, #0x02]
1772 strh r3, [r0, #0x02]
1777 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1779 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1780 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1781 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1782 strh r1, [r0, #0x02]
1784 mov r3, r3, lsr #24 /* r3 = ...1 */
1785 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1787 mov r3, r3, lsl #8 /* r3 = 321. */
1788 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1795 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1797 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1799 strb r2, [r0, #0x03]
1802 strh r3, [r0, #0x01]
1808 strh r3, [r0, #0x01]
1809 strb r1, [r0, #0x03]
1815 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1818 ldrh r3, [r1, #0x01]
1819 ldrb r1, [r1, #0x03]
1821 strh r3, [r0, #0x01]
1822 strb r1, [r0, #0x03]
1827 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1830 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1831 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1832 strb r3, [r0, #0x03]
1833 mov r3, r3, lsr #8 /* r3 = ...2 */
1834 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1835 strh r3, [r0, #0x01]
1836 mov r2, r2, lsr #8 /* r2 = ...0 */
1839 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1840 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1842 mov r2, r2, lsr #8 /* r2 = ...1 */
1843 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1844 strh r2, [r0, #0x01]
1845 mov r3, r3, lsr #8 /* r3 = ...3 */
1846 strb r3, [r0, #0x03]
1852 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1855 ldrh r3, [r1, #0x01]
1856 ldrb r1, [r1, #0x03]
1858 strh r3, [r0, #0x01]
1859 strb r1, [r0, #0x03]
1864 /******************************************************************************
1865 * Special case for 6 byte copies
1867 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1868 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1872 orr r2, r2, r0, lsl #2
1875 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1878 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1881 ldrh r3, [r1, #0x04]
1883 strh r3, [r0, #0x04]
1888 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1890 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1891 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1893 mov r2, r2, lsl #8 /* r2 = 012. */
1894 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1896 mov r2, r2, lsr #8 /* r2 = .210 */
1897 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1899 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1901 strh r3, [r0, #0x04]
1906 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1908 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1909 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1911 mov r1, r3, lsr #16 /* r1 = ..23 */
1912 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1914 strh r3, [r0, #0x04]
1916 mov r1, r3, lsr #16 /* r1 = ..54 */
1917 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1919 strh r1, [r0, #0x04]
1925 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1927 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1928 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1929 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1931 mov r2, r2, lsl #24 /* r2 = 0... */
1932 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1933 mov r3, r3, lsl #8 /* r3 = 234. */
1934 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1936 mov r2, r2, lsr #24 /* r2 = ...0 */
1937 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1938 mov r1, r1, lsl #8 /* r1 = xx5. */
1939 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1942 strh r1, [r0, #0x04]
1947 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1949 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1950 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1951 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1952 strh r1, [r0, #0x01]
1954 mov r1, r3, lsr #24 /* r1 = ...0 */
1956 mov r3, r3, lsl #8 /* r3 = 123. */
1957 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1960 mov r3, r3, lsr #24 /* r3 = ...3 */
1961 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1962 mov r2, r2, lsr #8 /* r2 = ...5 */
1964 strh r3, [r0, #0x03]
1965 strb r2, [r0, #0x05]
1970 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1973 ldrh r3, [r1, #0x01]
1974 ldrh ip, [r1, #0x03]
1975 ldrb r1, [r1, #0x05]
1977 strh r3, [r0, #0x01]
1978 strh ip, [r0, #0x03]
1979 strb r1, [r0, #0x05]
1984 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1986 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1987 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1989 mov r3, r2, lsr #8 /* r3 = ...0 */
1991 strb r1, [r0, #0x05]
1992 mov r3, r1, lsr #8 /* r3 = .234 */
1993 strh r3, [r0, #0x03]
1994 mov r3, r2, lsl #8 /* r3 = .01. */
1995 orr r3, r3, r1, lsr #24 /* r3 = .012 */
1996 strh r3, [r0, #0x01]
2000 strb r3, [r0, #0x05]
2001 mov r3, r1, lsr #8 /* r3 = .543 */
2002 strh r3, [r0, #0x03]
2003 mov r3, r2, lsr #8 /* r3 = ...1 */
2004 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2005 strh r3, [r0, #0x01]
2011 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2014 ldrh r3, [r1, #0x01]
2015 ldrh ip, [r1, #0x03]
2016 ldrb r1, [r1, #0x05]
2018 strh r3, [r0, #0x01]
2019 strh ip, [r0, #0x03]
2020 strb r1, [r0, #0x05]
2025 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2028 ldr r2, [r1] /* r2 = 0123 */
2029 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2030 mov r1, r2, lsr #16 /* r1 = ..01 */
2031 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2035 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2036 ldr r3, [r1] /* r3 = 3210 */
2037 mov r2, r2, lsl #16 /* r2 = 54.. */
2038 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2046 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2048 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2049 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2050 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2052 mov r2, r2, lsr #8 /* r2 = .345 */
2053 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2055 mov r2, r2, lsl #8 /* r2 = 543. */
2056 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2064 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2074 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2076 ldrb r3, [r1] /* r3 = ...0 */
2077 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2078 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2080 mov r3, r3, lsl #8 /* r3 = ..0. */
2081 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2082 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2084 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2085 mov r1, r1, lsl #24 /* r1 = 5... */
2086 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2094 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2096 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2097 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2099 mov r3, r2, lsr #24 /* r3 = ...0 */
2101 mov r2, r2, lsl #8 /* r2 = 123. */
2102 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2105 mov r2, r2, lsr #8 /* r2 = .321 */
2106 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2107 mov r1, r1, lsr #8 /* r1 = ...5 */
2110 strb r1, [r0, #0x05]
2115 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2118 ldrh r3, [r1, #0x01]
2119 ldrh ip, [r1, #0x03]
2120 ldrb r1, [r1, #0x05]
2122 strh r3, [r0, #0x01]
2123 strh ip, [r0, #0x03]
2124 strb r1, [r0, #0x05]
2129 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2131 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2132 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2134 mov r3, r2, lsr #8 /* r3 = ...0 */
2136 mov r2, r2, lsl #24 /* r2 = 1... */
2137 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2140 mov r2, r2, lsr #8 /* r2 = ...1 */
2141 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2142 mov r1, r1, lsr #24 /* r1 = ...5 */
2145 strb r1, [r0, #0x05]
2150 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2154 ldrb r1, [r1, #0x05]
2157 strb r1, [r0, #0x05]
2162 /******************************************************************************
2163 * Special case for 8 byte copies
2165 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2166 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2170 orr r2, r2, r0, lsl #2
2173 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2176 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2186 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2188 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2189 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2190 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2192 mov r3, r3, lsl #8 /* r3 = 012. */
2193 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2194 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2196 mov r3, r3, lsr #8 /* r3 = .210 */
2197 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2198 mov r1, r1, lsl #24 /* r1 = 7... */
2199 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2207 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2209 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2210 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2211 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2213 mov r2, r2, lsl #16 /* r2 = 01.. */
2214 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2215 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2217 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2218 mov r3, r3, lsr #16 /* r3 = ..54 */
2219 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2227 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2229 ldrb r3, [r1] /* r3 = ...0 */
2230 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2231 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2233 mov r3, r3, lsl #24 /* r3 = 0... */
2234 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2235 mov r2, r2, lsl #24 /* r2 = 4... */
2236 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2238 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2239 mov r2, r2, lsr #24 /* r2 = ...4 */
2240 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2248 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2250 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2251 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2253 mov r1, r3, lsr #24 /* r1 = ...0 */
2255 mov r1, r3, lsr #8 /* r1 = .012 */
2256 strb r2, [r0, #0x07]
2257 mov r3, r3, lsl #24 /* r3 = 3... */
2258 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2261 mov r1, r2, lsr #24 /* r1 = ...7 */
2262 strb r1, [r0, #0x07]
2263 mov r1, r3, lsr #8 /* r1 = .321 */
2264 mov r3, r3, lsr #24 /* r3 = ...3 */
2265 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2267 strh r1, [r0, #0x01]
2273 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2276 ldrh r3, [r1, #0x01]
2278 ldrb r1, [r1, #0x07]
2280 strh r3, [r0, #0x01]
2282 strb r1, [r0, #0x07]
2287 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2289 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2290 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2291 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2293 mov ip, r2, lsr #8 /* ip = ...0 */
2295 mov ip, r2, lsl #8 /* ip = .01. */
2296 orr ip, ip, r3, lsr #24 /* ip = .012 */
2297 strb r1, [r0, #0x07]
2298 mov r3, r3, lsl #8 /* r3 = 345. */
2299 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2301 strb r2, [r0] /* 0 */
2302 mov ip, r1, lsr #8 /* ip = ...7 */
2303 strb ip, [r0, #0x07] /* 7 */
2304 mov ip, r2, lsr #8 /* ip = ...1 */
2305 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2306 mov r3, r3, lsr #8 /* r3 = .543 */
2307 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2309 strh ip, [r0, #0x01]
2315 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2317 ldrb r3, [r1] /* r3 = ...0 */
2318 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2319 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2320 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2322 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2324 strh r3, [r0, #0x01]
2325 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2327 strh ip, [r0, #0x01]
2328 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2331 strb r1, [r0, #0x07]
2336 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2338 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2339 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2340 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2343 mov r1, r3, lsr #16 /* r1 = ..45 */
2344 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2347 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2348 mov r3, r3, lsr #16 /* r3 = ..76 */
2351 strh r3, [r0, #0x06]
2356 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2358 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2359 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2360 ldrb ip, [r1, #0x07] /* ip = ...7 */
2361 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2364 mov r1, r2, lsl #24 /* r1 = 2... */
2365 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2366 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2368 mov r1, r2, lsr #24 /* r1 = ...2 */
2369 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2370 mov r3, r3, lsr #24 /* r3 = ...6 */
2371 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2374 strh r3, [r0, #0x06]
2379 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2383 ldrh r3, [r1, #0x06]
2386 strh r3, [r0, #0x06]
2391 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2393 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2394 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2395 ldrb ip, [r1] /* ip = ...0 */
2396 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2397 strh r1, [r0, #0x06]
2399 mov r3, r3, lsr #24 /* r3 = ...5 */
2400 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2401 mov r2, r2, lsr #24 /* r2 = ...1 */
2402 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2404 mov r3, r3, lsl #24 /* r3 = 5... */
2405 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2406 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2414 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2416 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2417 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2418 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2419 strh r1, [r0, #0x05]
2421 strb r3, [r0, #0x07]
2422 mov r1, r2, lsr #24 /* r1 = ...0 */
2424 mov r2, r2, lsl #8 /* r2 = 123. */
2425 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2429 mov r1, r3, lsr #24 /* r1 = ...7 */
2430 strb r1, [r0, #0x07]
2431 mov r2, r2, lsr #8 /* r2 = .321 */
2432 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2439 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2441 ldrb r3, [r1] /* r3 = ...0 */
2442 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2443 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2444 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2446 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2448 strh ip, [r0, #0x05]
2449 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2451 strh r3, [r0, #0x05]
2452 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2455 strb r1, [r0, #0x07]
2460 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2462 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2463 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2464 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2466 mov ip, r2, lsr #8 /* ip = ...0 */
2468 mov ip, r2, lsl #24 /* ip = 1... */
2469 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2470 strb r1, [r0, #0x07]
2471 mov r1, r1, lsr #8 /* r1 = ...6 */
2472 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2475 mov ip, r2, lsr #8 /* ip = ...1 */
2476 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2477 mov r2, r1, lsr #8 /* r2 = ...7 */
2478 strb r2, [r0, #0x07]
2479 mov r1, r1, lsl #8 /* r1 = .76. */
2480 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2483 strh r1, [r0, #0x05]
2488 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2492 ldrh r3, [r1, #0x05]
2493 ldrb r1, [r1, #0x07]
2496 strh r3, [r0, #0x05]
2497 strb r1, [r0, #0x07]
2501 /******************************************************************************
2502 * Special case for 12 byte copies
2504 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2505 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2509 orr r2, r2, r0, lsl #2
2512 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2515 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2527 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2529 ldrb r2, [r1, #0xb] /* r2 = ...B */
2530 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2531 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2532 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2534 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2536 mov r2, ip, lsr #24 /* r2 = ...7 */
2537 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2538 mov r1, r1, lsl #8 /* r1 = 012. */
2539 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2541 mov r2, r2, lsl #24 /* r2 = B... */
2542 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2544 mov r2, ip, lsl #24 /* r2 = 7... */
2545 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2546 mov r1, r1, lsr #8 /* r1 = .210 */
2547 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2555 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2557 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2558 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2559 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2560 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2562 mov r2, r2, lsl #16 /* r2 = 01.. */
2563 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2565 mov r3, r3, lsl #16 /* r3 = 45.. */
2566 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2567 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2569 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2571 mov r3, r3, lsr #16 /* r3 = ..54 */
2572 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2573 mov r1, r1, lsl #16 /* r1 = BA.. */
2574 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2582 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2584 ldrb r2, [r1] /* r2 = ...0 */
2585 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2586 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2587 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2589 mov r2, r2, lsl #24 /* r2 = 0... */
2590 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2592 mov r3, r3, lsl #24 /* r3 = 4... */
2593 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2594 mov r1, r1, lsr #8 /* r1 = .9AB */
2595 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2597 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2599 mov r3, r3, lsr #24 /* r3 = ...4 */
2600 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2601 mov r1, r1, lsl #8 /* r1 = BA9. */
2602 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2610 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2612 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2613 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2614 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2615 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2616 strh r1, [r0, #0x01]
2618 mov r1, r2, lsr #24 /* r1 = ...0 */
2620 mov r1, r2, lsl #24 /* r1 = 3... */
2621 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2622 mov r1, r3, lsl #24 /* r1 = 7... */
2623 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2626 mov r1, r2, lsr #24 /* r1 = ...3 */
2627 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2628 mov r1, r3, lsr #24 /* r1 = ...7 */
2629 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2630 mov ip, ip, lsr #24 /* ip = ...B */
2634 strb ip, [r0, #0x0b]
2639 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2642 ldrh r3, [r1, #0x01]
2646 ldrb r1, [r1, #0x0b]
2647 strh r3, [r0, #0x01]
2650 strb r1, [r0, #0x0b]
2655 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2657 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2658 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2659 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2660 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2662 mov r2, r2, ror #8 /* r2 = 1..0 */
2664 mov r2, r2, lsr #16 /* r2 = ..1. */
2665 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2666 strh r2, [r0, #0x01]
2667 mov r2, r3, lsl #8 /* r2 = 345. */
2668 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2669 mov r2, ip, lsl #8 /* r2 = 789. */
2670 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2673 mov r2, r2, lsr #8 /* r2 = ...1 */
2674 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2675 strh r2, [r0, #0x01]
2676 mov r2, r3, lsr #8 /* r2 = .543 */
2677 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2678 mov r2, ip, lsr #8 /* r2 = .987 */
2679 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2680 mov r1, r1, lsr #8 /* r1 = ...B */
2684 strb r1, [r0, #0x0b]
2689 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2692 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2693 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2694 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2697 mov r2, r3, lsr #16 /* r2 = ..12 */
2698 strh r2, [r0, #0x01]
2699 mov r3, r3, lsl #16 /* r3 = 34.. */
2700 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2701 mov ip, ip, lsl #16 /* ip = 78.. */
2702 orr ip, ip, r1, lsr #16 /* ip = 789A */
2703 mov r1, r1, lsr #8 /* r1 = .9AB */
2705 strh r3, [r0, #0x01]
2706 mov r3, r3, lsr #16 /* r3 = ..43 */
2707 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2708 mov ip, ip, lsr #16 /* ip = ..87 */
2709 orr ip, ip, r1, lsl #16 /* ip = A987 */
2710 mov r1, r1, lsr #16 /* r1 = ..xB */
2714 strb r1, [r0, #0x0b]
2719 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2721 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2722 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2723 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2724 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2727 mov r1, ip, lsl #16 /* r1 = 23.. */
2728 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2729 mov r3, r3, lsl #16 /* r3 = 67.. */
2730 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2733 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2734 mov r3, r3, lsr #16 /* r3 = ..76 */
2735 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2736 mov r2, r2, lsr #16 /* r2 = ..BA */
2740 strh r2, [r0, #0x0a]
2745 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2747 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2748 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2749 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2751 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2752 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2754 mov r2, r2, lsl #24 /* r2 = 2... */
2755 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2756 mov r3, r3, lsl #24 /* r3 = 6... */
2757 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2758 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2760 mov r2, r2, lsr #24 /* r2 = ...2 */
2761 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2762 mov r3, r3, lsr #24 /* r3 = ...6 */
2763 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2764 mov r1, r1, lsl #8 /* r1 = ..B. */
2765 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2769 strh r1, [r0, #0x0a]
2774 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2779 ldrh r1, [r1, #0x0a]
2783 strh r1, [r0, #0x0a]
2788 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2790 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2791 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2792 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2793 strh ip, [r0, #0x0a]
2794 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2795 ldrb r1, [r1] /* r1 = ...0 */
2797 mov r2, r2, lsr #24 /* r2 = ...9 */
2798 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2799 mov r3, r3, lsr #24 /* r3 = ...5 */
2800 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2801 mov r1, r1, lsl #8 /* r1 = ..0. */
2802 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2804 mov r2, r2, lsl #24 /* r2 = 9... */
2805 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2806 mov r3, r3, lsl #24 /* r3 = 5... */
2807 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2808 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2817 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2819 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2820 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2821 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2823 mov r3, r2, lsr #24 /* r3 = ...0 */
2825 mov r2, r2, lsl #8 /* r2 = 123. */
2826 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2828 mov r2, ip, lsl #8 /* r2 = 567. */
2829 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2831 mov r2, r1, lsr #8 /* r2 = ..9A */
2832 strh r2, [r0, #0x09]
2833 strb r1, [r0, #0x0b]
2836 mov r3, r2, lsr #8 /* r3 = .321 */
2837 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2839 mov r3, ip, lsr #8 /* r3 = .765 */
2840 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2842 mov r1, r1, lsr #8 /* r1 = .BA9 */
2843 strh r1, [r0, #0x09]
2844 mov r1, r1, lsr #16 /* r1 = ...B */
2845 strb r1, [r0, #0x0b]
2851 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2853 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2854 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2855 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2856 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2857 strb r2, [r0, #0x0b]
2859 strh r3, [r0, #0x09]
2860 mov r3, r3, lsr #16 /* r3 = ..78 */
2861 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2862 mov ip, ip, lsr #16 /* ip = ..34 */
2863 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2864 mov r1, r1, lsr #16 /* r1 = ..x0 */
2866 mov r2, r3, lsr #16 /* r2 = ..A9 */
2867 strh r2, [r0, #0x09]
2868 mov r3, r3, lsl #16 /* r3 = 87.. */
2869 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2870 mov ip, ip, lsl #16 /* ip = 43.. */
2871 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2872 mov r1, r1, lsr #8 /* r1 = .210 */
2881 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2884 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2885 ldr ip, [r1, #0x06] /* ip = 6789 */
2886 ldr r3, [r1, #0x02] /* r3 = 2345 */
2887 ldrh r1, [r1] /* r1 = ..01 */
2888 strb r2, [r0, #0x0b]
2889 mov r2, r2, lsr #8 /* r2 = ...A */
2890 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2891 mov ip, ip, lsr #8 /* ip = .678 */
2892 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2893 mov r3, r3, lsr #8 /* r3 = .234 */
2894 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2895 mov r1, r1, lsr #8 /* r1 = ...0 */
2899 strh r2, [r0, #0x09]
2901 ldrh r2, [r1] /* r2 = ..10 */
2902 ldr r3, [r1, #0x02] /* r3 = 5432 */
2903 ldr ip, [r1, #0x06] /* ip = 9876 */
2904 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2906 mov r2, r2, lsr #8 /* r2 = ...1 */
2907 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2908 mov r3, r3, lsr #24 /* r3 = ...5 */
2909 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2910 mov ip, ip, lsr #24 /* ip = ...9 */
2911 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2912 mov r1, r1, lsr #8 /* r1 = ...B */
2915 strh ip, [r0, #0x09]
2916 strb r1, [r0, #0x0b]
2922 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2928 ldrh r2, [r1, #0x09]
2929 ldrb r1, [r1, #0x0b]
2932 strh r2, [r0, #0x09]
2933 strb r1, [r0, #0x0b]
2935 #endif /* _ARM_ARCH_5E */