2 * Copyright (c) 2004 Olivier Houchard
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * Copyright 2003 Wasabi Systems, Inc.
28 * All rights reserved.
30 * Written by Steve C. Woodford for Wasabi Systems, Inc.
32 * Redistribution and use in source and binary forms, with or without
33 * modification, are permitted provided that the following conditions
35 * 1. Redistributions of source code must retain the above copyright
36 * notice, this list of conditions and the following disclaimer.
37 * 2. Redistributions in binary form must reproduce the above copyright
38 * notice, this list of conditions and the following disclaimer in the
39 * documentation and/or other materials provided with the distribution.
40 * 3. All advertising materials mentioning features or use of this software
41 * must display the following acknowledgement:
42 * This product includes software developed for the NetBSD Project by
43 * Wasabi Systems, Inc.
44 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
45 * or promote products derived from this software without specific prior
48 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
49 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
50 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
51 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
52 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
53 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
54 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
55 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
56 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
57 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
58 * POSSIBILITY OF SUCH DAMAGE.
61 * Copyright (c) 1997 The NetBSD Foundation, Inc.
62 * All rights reserved.
64 * This code is derived from software contributed to The NetBSD Foundation
65 * by Neil A. Carson and Mark Brinicombe
67 * Redistribution and use in source and binary forms, with or without
68 * modification, are permitted provided that the following conditions
70 * 1. Redistributions of source code must retain the above copyright
71 * notice, this list of conditions and the following disclaimer.
72 * 2. Redistributions in binary form must reproduce the above copyright
73 * notice, this list of conditions and the following disclaimer in the
74 * documentation and/or other materials provided with the distribution.
75 * 3. All advertising materials mentioning features or use of this software
76 * must display the following acknowledgement:
77 * This product includes software developed by the NetBSD
78 * Foundation, Inc. and its contributors.
79 * 4. Neither the name of The NetBSD Foundation nor the names of its
80 * contributors may be used to endorse or promote products derived
81 * from this software without specific prior written permission.
83 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
84 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
85 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
86 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
87 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
88 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
89 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
90 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
91 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
92 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
93 * POSSIBILITY OF SUCH DAMAGE.
96 #include <machine/asm.h>
97 #include <machine/asmacros.h>
98 __FBSDID("$FreeBSD$");
103 .word _C_LABEL(_arm_memcpy)
105 .word _C_LABEL(_arm_bzero)
107 .word _C_LABEL(_min_memcpy_size)
109 .word _C_LABEL(_min_bzero_size)
111 * memset: Sets a block of memory to the specified value
116 * r2 - number of bytes to write
121 /* LINTSTUB: Func: void bzero(void *, size_t) */
127 ldr r2, .L_min_bzero_size
131 stmfd sp!, {r0, r1, lr}
136 ldmfd sp!, {r0, r1, lr}
142 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
144 and r3, r1, #0xff /* We deal with bytes */
147 cmp r1, #0x04 /* Do we have less than 4 bytes */
149 blt .Lmemset_lessthanfour
151 /* Ok first we will word align the address */
152 ands r2, ip, #0x03 /* Get the bottom two bits */
153 bne .Lmemset_wordunaligned /* The address is not word aligned */
155 /* We are now word aligned */
156 .Lmemset_wordaligned:
157 orr r3, r3, r3, lsl #8 /* Extend value to 16-bits */
159 tst ip, #0x04 /* Quad-align for armv5e */
163 orr r3, r3, r3, lsl #16 /* Extend value to 32-bits */
165 subne r1, r1, #0x04 /* Quad-align if necessary */
166 strne r3, [ip], #0x04
169 blt .Lmemset_loop4 /* If less than 16 then use words */
170 mov r2, r3 /* Duplicate data */
171 cmp r1, #0x80 /* If < 128 then skip the big loop */
174 /* Do 128 bytes at a time */
178 strged r2, [ip], #0x08
179 strged r2, [ip], #0x08
180 strged r2, [ip], #0x08
181 strged r2, [ip], #0x08
182 strged r2, [ip], #0x08
183 strged r2, [ip], #0x08
184 strged r2, [ip], #0x08
185 strged r2, [ip], #0x08
186 strged r2, [ip], #0x08
187 strged r2, [ip], #0x08
188 strged r2, [ip], #0x08
189 strged r2, [ip], #0x08
190 strged r2, [ip], #0x08
191 strged r2, [ip], #0x08
192 strged r2, [ip], #0x08
193 strged r2, [ip], #0x08
213 RETeq /* Zero length so just exit */
215 add r1, r1, #0x80 /* Adjust for extra sub */
217 /* Do 32 bytes at a time */
221 strged r2, [ip], #0x08
222 strged r2, [ip], #0x08
223 strged r2, [ip], #0x08
224 strged r2, [ip], #0x08
232 RETeq /* Zero length so just exit */
234 adds r1, r1, #0x10 /* Partially adjust for extra sub */
236 /* Deal with 16 bytes or more */
238 strged r2, [ip], #0x08
239 strged r2, [ip], #0x08
244 RETeq /* Zero length so just exit */
246 addlt r1, r1, #0x10 /* Possibly adjust for extra sub */
248 /* We have at least 4 bytes so copy as words */
251 strge r3, [ip], #0x04
253 RETeq /* Zero length so just exit */
256 /* Compensate for 64-bit alignment check */
264 strb r3, [ip], #0x01 /* Set 1 byte */
265 strgeb r3, [ip], #0x01 /* Set another byte */
266 strgtb r3, [ip] /* and a third */
269 .Lmemset_wordunaligned:
271 strb r3, [ip], #0x01 /* Set 1 byte */
273 strgeb r3, [ip], #0x01 /* Set another byte */
275 strgtb r3, [ip], #0x01 /* and a third */
276 cmp r1, #0x04 /* More than 4 bytes left? */
277 bge .Lmemset_wordaligned /* Yup */
279 .Lmemset_lessthanfour:
281 RETeq /* Zero length so exit */
282 strb r3, [ip], #0x01 /* Set 1 byte */
284 strgeb r3, [ip], #0x01 /* Set another byte */
285 strgtb r3, [ip] /* and a third */
294 /* Are both addresses aligned the same way? */
297 RETeq /* len == 0, or same addresses! */
300 bne .Lmemcmp_bytewise2 /* Badly aligned. Do it the slow way */
302 /* Word-align the addresses, if necessary */
305 add r3, r3, r3, lsl #1
306 addne pc, pc, r3, lsl #3
309 /* Compare up to 3 bytes */
317 /* Compare up to 2 bytes */
333 /* Compare 4 bytes at a time, if possible */
335 bcc .Lmemcmp_bytewise
336 .Lmemcmp_word_aligned:
341 beq .Lmemcmp_word_aligned
344 /* Correct for extra subtraction, and check if done */
346 cmpeq r0, #0x00 /* If done, did all bytes match? */
347 RETeq /* Yup. Just return */
349 /* Re-do the final word byte-wise */
360 beq .Lmemcmp_bytewise2
365 * 6 byte compares are very common, thanks to the network stack.
366 * This code is hand-scheduled to reduce the number of stalls for
367 * load results. Everything else being equal, this will be ~32%
368 * faster than a byte-wise memcmp.
372 ldrb r3, [r1, #0x00] /* r3 = b2#0 */
373 ldrb r0, [ip, #0x00] /* r0 = b1#0 */
374 ldrb r2, [r1, #0x01] /* r2 = b2#1 */
375 subs r0, r0, r3 /* r0 = b1#0 - b2#0 */
376 ldreqb r3, [ip, #0x01] /* r3 = b1#1 */
377 RETne /* Return if mismatch on #0 */
378 subs r0, r3, r2 /* r0 = b1#1 - b2#1 */
379 ldreqb r3, [r1, #0x02] /* r3 = b2#2 */
380 ldreqb r0, [ip, #0x02] /* r0 = b1#2 */
381 RETne /* Return if mismatch on #1 */
382 ldrb r2, [r1, #0x03] /* r2 = b2#3 */
383 subs r0, r0, r3 /* r0 = b1#2 - b2#2 */
384 ldreqb r3, [ip, #0x03] /* r3 = b1#3 */
385 RETne /* Return if mismatch on #2 */
386 subs r0, r3, r2 /* r0 = b1#3 - b2#3 */
387 ldreqb r3, [r1, #0x04] /* r3 = b2#4 */
388 ldreqb r0, [ip, #0x04] /* r0 = b1#4 */
389 RETne /* Return if mismatch on #3 */
390 ldrb r2, [r1, #0x05] /* r2 = b2#5 */
391 subs r0, r0, r3 /* r0 = b1#4 - b2#4 */
392 ldreqb r3, [ip, #0x05] /* r3 = b1#5 */
393 RETne /* Return if mismatch on #4 */
394 sub r0, r3, r2 /* r0 = b1#5 - b2#5 */
398 /* switch the source and destination registers */
403 /* Do the buffers overlap? */
405 RETeq /* Bail now if src/dst are the same */
406 subcc r3, r0, r1 /* if (dst > src) r3 = dst - src */
407 subcs r3, r1, r0 /* if (src > dsr) r3 = src - dst */
408 cmp r3, r2 /* if (r3 < len) we have an overlap */
409 bcc PIC_SYM(_C_LABEL(memcpy), PLT)
411 /* Determine copy direction */
413 bcc .Lmemmove_backwards
415 moveq r0, #0 /* Quick abort for len=0 */
418 stmdb sp!, {r0, lr} /* memmove() returns dest addr */
420 blt .Lmemmove_fl4 /* less than 4 bytes */
422 bne .Lmemmove_fdestul /* oh unaligned destination addr */
424 bne .Lmemmove_fsrcul /* oh unaligned source addr */
427 /* We have aligned source and destination */
429 blt .Lmemmove_fl12 /* less than 12 bytes (4 from above) */
431 blt .Lmemmove_fl32 /* less than 32 bytes (12 from above) */
432 stmdb sp!, {r4} /* borrow r4 */
434 /* blat 32 bytes at a time */
435 /* XXX for really big copies perhaps we should use more registers */
437 ldmia r1!, {r3, r4, r12, lr}
438 stmia r0!, {r3, r4, r12, lr}
439 ldmia r1!, {r3, r4, r12, lr}
440 stmia r0!, {r3, r4, r12, lr}
442 bge .Lmemmove_floop32
445 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
446 stmgeia r0!, {r3, r4, r12, lr}
448 ldmia sp!, {r4} /* return r4 */
453 /* blat 12 bytes at a time */
455 ldmgeia r1!, {r3, r12, lr}
456 stmgeia r0!, {r3, r12, lr}
458 bge .Lmemmove_floop12
467 ldmgeia r1!, {r3, r12}
468 stmgeia r0!, {r3, r12}
472 /* less than 4 bytes to go */
474 ldmeqia sp!, {r0, pc} /* done */
476 /* copy the crud byte at a time */
486 /* erg - unaligned destination */
491 /* align destination with byte copies */
499 blt .Lmemmove_fl4 /* less the 4 bytes */
502 beq .Lmemmove_ft8 /* we have an aligned source */
504 /* erg - unaligned source */
505 /* This is where it gets nasty ... */
510 bgt .Lmemmove_fsrcul3
511 beq .Lmemmove_fsrcul2
513 blt .Lmemmove_fsrcul1loop4
517 .Lmemmove_fsrcul1loop16:
523 ldmia r1!, {r4, r5, r12, lr}
525 orr r3, r3, r4, lsr #24
527 orr r4, r4, r5, lsr #24
529 orr r5, r5, r12, lsr #24
531 orr r12, r12, lr, lsr #24
533 orr r3, r3, r4, lsl #24
535 orr r4, r4, r5, lsl #24
537 orr r5, r5, r12, lsl #24
539 orr r12, r12, lr, lsl #24
541 stmia r0!, {r3-r5, r12}
543 bge .Lmemmove_fsrcul1loop16
546 blt .Lmemmove_fsrcul1l4
548 .Lmemmove_fsrcul1loop4:
556 orr r12, r12, lr, lsr #24
558 orr r12, r12, lr, lsl #24
562 bge .Lmemmove_fsrcul1loop4
570 blt .Lmemmove_fsrcul2loop4
574 .Lmemmove_fsrcul2loop16:
580 ldmia r1!, {r4, r5, r12, lr}
582 orr r3, r3, r4, lsr #16
584 orr r4, r4, r5, lsr #16
586 orr r5, r5, r12, lsr #16
587 mov r12, r12, lsl #16
588 orr r12, r12, lr, lsr #16
590 orr r3, r3, r4, lsl #16
592 orr r4, r4, r5, lsl #16
594 orr r5, r5, r12, lsl #16
595 mov r12, r12, lsr #16
596 orr r12, r12, lr, lsl #16
598 stmia r0!, {r3-r5, r12}
600 bge .Lmemmove_fsrcul2loop16
603 blt .Lmemmove_fsrcul2l4
605 .Lmemmove_fsrcul2loop4:
613 orr r12, r12, lr, lsr #16
615 orr r12, r12, lr, lsl #16
619 bge .Lmemmove_fsrcul2loop4
627 blt .Lmemmove_fsrcul3loop4
631 .Lmemmove_fsrcul3loop16:
637 ldmia r1!, {r4, r5, r12, lr}
639 orr r3, r3, r4, lsr #8
641 orr r4, r4, r5, lsr #8
643 orr r5, r5, r12, lsr #8
644 mov r12, r12, lsl #24
645 orr r12, r12, lr, lsr #8
647 orr r3, r3, r4, lsl #8
649 orr r4, r4, r5, lsl #8
651 orr r5, r5, r12, lsl #8
652 mov r12, r12, lsr #24
653 orr r12, r12, lr, lsl #8
655 stmia r0!, {r3-r5, r12}
657 bge .Lmemmove_fsrcul3loop16
660 blt .Lmemmove_fsrcul3l4
662 .Lmemmove_fsrcul3loop4:
670 orr r12, r12, lr, lsr #8
672 orr r12, r12, lr, lsl #8
676 bge .Lmemmove_fsrcul3loop4
686 blt .Lmemmove_bl4 /* less than 4 bytes */
688 bne .Lmemmove_bdestul /* oh unaligned destination addr */
690 bne .Lmemmove_bsrcul /* oh unaligned source addr */
693 /* We have aligned source and destination */
695 blt .Lmemmove_bl12 /* less than 12 bytes (4 from above) */
697 subs r2, r2, #0x14 /* less than 32 bytes (12 from above) */
700 /* blat 32 bytes at a time */
701 /* XXX for really big copies perhaps we should use more registers */
703 ldmdb r1!, {r3, r4, r12, lr}
704 stmdb r0!, {r3, r4, r12, lr}
705 ldmdb r1!, {r3, r4, r12, lr}
706 stmdb r0!, {r3, r4, r12, lr}
708 bge .Lmemmove_bloop32
712 ldmgedb r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
713 stmgedb r0!, {r3, r4, r12, lr}
716 ldmgedb r1!, {r3, r12, lr} /* blat a remaining 12 bytes */
717 stmgedb r0!, {r3, r12, lr}
727 ldmgedb r1!, {r3, r12}
728 stmgedb r0!, {r3, r12}
732 /* less than 4 bytes to go */
736 /* copy the crud byte at a time */
740 ldrgeb r3, [r1, #-1]!
741 strgeb r3, [r0, #-1]!
742 ldrgtb r3, [r1, #-1]!
743 strgtb r3, [r0, #-1]!
746 /* erg - unaligned destination */
750 /* align destination with byte copies */
753 ldrgeb r3, [r1, #-1]!
754 strgeb r3, [r0, #-1]!
755 ldrgtb r3, [r1, #-1]!
756 strgtb r3, [r0, #-1]!
758 blt .Lmemmove_bl4 /* less than 4 bytes to go */
760 beq .Lmemmove_bt8 /* we have an aligned source */
762 /* erg - unaligned source */
763 /* This is where it gets nasty ... */
768 blt .Lmemmove_bsrcul1
769 beq .Lmemmove_bsrcul2
771 blt .Lmemmove_bsrcul3loop4
773 stmdb sp!, {r4, r5, lr}
775 .Lmemmove_bsrcul3loop16:
781 ldmdb r1!, {r3-r5, r12}
783 orr lr, lr, r12, lsl #24
785 orr r12, r12, r5, lsl #24
787 orr r5, r5, r4, lsl #24
789 orr r4, r4, r3, lsl #24
791 orr lr, lr, r12, lsr #24
793 orr r12, r12, r5, lsr #24
795 orr r5, r5, r4, lsr #24
797 orr r4, r4, r3, lsr #24
799 stmdb r0!, {r4, r5, r12, lr}
801 bge .Lmemmove_bsrcul3loop16
802 ldmia sp!, {r4, r5, lr}
804 blt .Lmemmove_bsrcul3l4
806 .Lmemmove_bsrcul3loop4:
814 orr r12, r12, r3, lsl #24
816 orr r12, r12, r3, lsr #24
820 bge .Lmemmove_bsrcul3loop4
828 blt .Lmemmove_bsrcul2loop4
830 stmdb sp!, {r4, r5, lr}
832 .Lmemmove_bsrcul2loop16:
838 ldmdb r1!, {r3-r5, r12}
840 orr lr, lr, r12, lsl #16
841 mov r12, r12, lsr #16
842 orr r12, r12, r5, lsl #16
844 orr r5, r5, r4, lsl #16
846 orr r4, r4, r3, lsl #16
848 orr lr, lr, r12, lsr #16
849 mov r12, r12, lsl #16
850 orr r12, r12, r5, lsr #16
852 orr r5, r5, r4, lsr #16
854 orr r4, r4, r3, lsr #16
856 stmdb r0!, {r4, r5, r12, lr}
858 bge .Lmemmove_bsrcul2loop16
859 ldmia sp!, {r4, r5, lr}
861 blt .Lmemmove_bsrcul2l4
863 .Lmemmove_bsrcul2loop4:
871 orr r12, r12, r3, lsl #16
873 orr r12, r12, r3, lsr #16
877 bge .Lmemmove_bsrcul2loop4
885 blt .Lmemmove_bsrcul1loop4
887 stmdb sp!, {r4, r5, lr}
889 .Lmemmove_bsrcul1loop32:
895 ldmdb r1!, {r3-r5, r12}
897 orr lr, lr, r12, lsl #8
898 mov r12, r12, lsr #24
899 orr r12, r12, r5, lsl #8
901 orr r5, r5, r4, lsl #8
903 orr r4, r4, r3, lsl #8
905 orr lr, lr, r12, lsr #8
906 mov r12, r12, lsl #24
907 orr r12, r12, r5, lsr #8
909 orr r5, r5, r4, lsr #8
911 orr r4, r4, r3, lsr #8
913 stmdb r0!, {r4, r5, r12, lr}
915 bge .Lmemmove_bsrcul1loop32
916 ldmia sp!, {r4, r5, lr}
918 blt .Lmemmove_bsrcul1l4
920 .Lmemmove_bsrcul1loop4:
928 orr r12, r12, r3, lsl #8
930 orr r12, r12, r3, lsr #8
934 bge .Lmemmove_bsrcul1loop4
940 #if !defined(_ARM_ARCH_5E)
942 /* save leaf functions having to store this away */
943 /* Do not check arm_memcpy if we're running from flash */
945 #if FLASHADDR > PHYSADDR
955 ldr r3, .L_arm_memcpy
959 ldr r3, .L_min_memcpy_size
963 stmfd sp!, {r0-r2, r4, lr}
965 ldr r4, .L_arm_memcpy
969 ldmfd sp!, {r0-r2, r4, lr}
973 stmdb sp!, {r0, lr} /* memcpy() returns dest addr */
976 blt .Lmemcpy_l4 /* less than 4 bytes */
978 bne .Lmemcpy_destul /* oh unaligned destination addr */
980 bne .Lmemcpy_srcul /* oh unaligned source addr */
983 /* We have aligned source and destination */
985 blt .Lmemcpy_l12 /* less than 12 bytes (4 from above) */
987 blt .Lmemcpy_l32 /* less than 32 bytes (12 from above) */
988 stmdb sp!, {r4} /* borrow r4 */
990 /* blat 32 bytes at a time */
991 /* XXX for really big copies perhaps we should use more registers */
993 ldmia r1!, {r3, r4, r12, lr}
994 stmia r0!, {r3, r4, r12, lr}
995 ldmia r1!, {r3, r4, r12, lr}
996 stmia r0!, {r3, r4, r12, lr}
1001 ldmgeia r1!, {r3, r4, r12, lr} /* blat a remaining 16 bytes */
1002 stmgeia r0!, {r3, r4, r12, lr}
1004 ldmia sp!, {r4} /* return r4 */
1009 /* blat 12 bytes at a time */
1011 ldmgeia r1!, {r3, r12, lr}
1012 stmgeia r0!, {r3, r12, lr}
1013 subges r2, r2, #0x0c
1023 ldmgeia r1!, {r3, r12}
1024 stmgeia r0!, {r3, r12}
1028 /* less than 4 bytes to go */
1031 ldmeqia sp!, {r0, pc}^ /* done */
1033 ldmeqia sp!, {r0, pc} /* done */
1035 /* copy the crud byte at a time */
1045 /* erg - unaligned destination */
1050 /* align destination with byte copies */
1058 blt .Lmemcpy_l4 /* less the 4 bytes */
1061 beq .Lmemcpy_t8 /* we have an aligned source */
1063 /* erg - unaligned source */
1064 /* This is where it gets nasty ... */
1072 blt .Lmemcpy_srcul1loop4
1076 .Lmemcpy_srcul1loop16:
1078 ldmia r1!, {r4, r5, r12, lr}
1079 orr r3, r3, r4, lsl #24
1081 orr r4, r4, r5, lsl #24
1083 orr r5, r5, r12, lsl #24
1084 mov r12, r12, lsr #8
1085 orr r12, r12, lr, lsl #24
1086 stmia r0!, {r3-r5, r12}
1088 bge .Lmemcpy_srcul1loop16
1091 blt .Lmemcpy_srcul1l4
1093 .Lmemcpy_srcul1loop4:
1096 orr r12, r12, lr, lsl #24
1099 bge .Lmemcpy_srcul1loop4
1107 blt .Lmemcpy_srcul2loop4
1111 .Lmemcpy_srcul2loop16:
1113 ldmia r1!, {r4, r5, r12, lr}
1114 orr r3, r3, r4, lsl #16
1116 orr r4, r4, r5, lsl #16
1118 orr r5, r5, r12, lsl #16
1119 mov r12, r12, lsr #16
1120 orr r12, r12, lr, lsl #16
1121 stmia r0!, {r3-r5, r12}
1123 bge .Lmemcpy_srcul2loop16
1126 blt .Lmemcpy_srcul2l4
1128 .Lmemcpy_srcul2loop4:
1129 mov r12, lr, lsr #16
1131 orr r12, r12, lr, lsl #16
1134 bge .Lmemcpy_srcul2loop4
1142 blt .Lmemcpy_srcul3loop4
1146 .Lmemcpy_srcul3loop16:
1148 ldmia r1!, {r4, r5, r12, lr}
1149 orr r3, r3, r4, lsl #8
1151 orr r4, r4, r5, lsl #8
1153 orr r5, r5, r12, lsl #8
1154 mov r12, r12, lsr #24
1155 orr r12, r12, lr, lsl #8
1156 stmia r0!, {r3-r5, r12}
1158 bge .Lmemcpy_srcul3loop16
1161 blt .Lmemcpy_srcul3l4
1163 .Lmemcpy_srcul3loop4:
1164 mov r12, lr, lsr #24
1166 orr r12, r12, lr, lsl #8
1169 bge .Lmemcpy_srcul3loop4
1175 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
1179 ble .Lmemcpy_short /* <= 12 bytes */
1181 #if FLASHADDR > PHYSADDR
1191 ldr r3, .L_arm_memcpy
1195 ldr r3, .L_min_memcpy_size
1199 stmfd sp!, {r0-r2, r4, lr}
1201 ldr r4, .L_arm_memcpy
1205 ldmfd sp!, {r0-r2, r4, lr}
1208 mov r3, r0 /* We must not clobber r0 */
1210 /* Word-align the destination buffer */
1211 ands ip, r3, #0x03 /* Already word aligned? */
1212 beq .Lmemcpy_wordaligned /* Yup */
1214 ldrb ip, [r1], #0x01
1216 strb ip, [r3], #0x01
1217 ldrleb ip, [r1], #0x01
1219 strleb ip, [r3], #0x01
1220 ldrltb ip, [r1], #0x01
1222 strltb ip, [r3], #0x01
1224 /* Destination buffer is now word aligned */
1225 .Lmemcpy_wordaligned:
1226 ands ip, r1, #0x03 /* Is src also word-aligned? */
1227 bne .Lmemcpy_bad_align /* Nope. Things just got bad */
1229 /* Quad-align the destination buffer */
1230 tst r3, #0x07 /* Already quad aligned? */
1231 ldrne ip, [r1], #0x04
1232 stmfd sp!, {r4-r9} /* Free up some registers */
1234 strne ip, [r3], #0x04
1236 /* Destination buffer quad aligned, source is at least word aligned */
1238 blt .Lmemcpy_w_lessthan128
1240 /* Copy 128 bytes at a time */
1242 ldr r4, [r1], #0x04 /* LD:00-03 */
1243 ldr r5, [r1], #0x04 /* LD:04-07 */
1244 pld [r1, #0x18] /* Prefetch 0x20 */
1245 ldr r6, [r1], #0x04 /* LD:08-0b */
1246 ldr r7, [r1], #0x04 /* LD:0c-0f */
1247 ldr r8, [r1], #0x04 /* LD:10-13 */
1248 ldr r9, [r1], #0x04 /* LD:14-17 */
1249 strd r4, [r3], #0x08 /* ST:00-07 */
1250 ldr r4, [r1], #0x04 /* LD:18-1b */
1251 ldr r5, [r1], #0x04 /* LD:1c-1f */
1252 strd r6, [r3], #0x08 /* ST:08-0f */
1253 ldr r6, [r1], #0x04 /* LD:20-23 */
1254 ldr r7, [r1], #0x04 /* LD:24-27 */
1255 pld [r1, #0x18] /* Prefetch 0x40 */
1256 strd r8, [r3], #0x08 /* ST:10-17 */
1257 ldr r8, [r1], #0x04 /* LD:28-2b */
1258 ldr r9, [r1], #0x04 /* LD:2c-2f */
1259 strd r4, [r3], #0x08 /* ST:18-1f */
1260 ldr r4, [r1], #0x04 /* LD:30-33 */
1261 ldr r5, [r1], #0x04 /* LD:34-37 */
1262 strd r6, [r3], #0x08 /* ST:20-27 */
1263 ldr r6, [r1], #0x04 /* LD:38-3b */
1264 ldr r7, [r1], #0x04 /* LD:3c-3f */
1265 strd r8, [r3], #0x08 /* ST:28-2f */
1266 ldr r8, [r1], #0x04 /* LD:40-43 */
1267 ldr r9, [r1], #0x04 /* LD:44-47 */
1268 pld [r1, #0x18] /* Prefetch 0x60 */
1269 strd r4, [r3], #0x08 /* ST:30-37 */
1270 ldr r4, [r1], #0x04 /* LD:48-4b */
1271 ldr r5, [r1], #0x04 /* LD:4c-4f */
1272 strd r6, [r3], #0x08 /* ST:38-3f */
1273 ldr r6, [r1], #0x04 /* LD:50-53 */
1274 ldr r7, [r1], #0x04 /* LD:54-57 */
1275 strd r8, [r3], #0x08 /* ST:40-47 */
1276 ldr r8, [r1], #0x04 /* LD:58-5b */
1277 ldr r9, [r1], #0x04 /* LD:5c-5f */
1278 strd r4, [r3], #0x08 /* ST:48-4f */
1279 ldr r4, [r1], #0x04 /* LD:60-63 */
1280 ldr r5, [r1], #0x04 /* LD:64-67 */
1281 pld [r1, #0x18] /* Prefetch 0x80 */
1282 strd r6, [r3], #0x08 /* ST:50-57 */
1283 ldr r6, [r1], #0x04 /* LD:68-6b */
1284 ldr r7, [r1], #0x04 /* LD:6c-6f */
1285 strd r8, [r3], #0x08 /* ST:58-5f */
1286 ldr r8, [r1], #0x04 /* LD:70-73 */
1287 ldr r9, [r1], #0x04 /* LD:74-77 */
1288 strd r4, [r3], #0x08 /* ST:60-67 */
1289 ldr r4, [r1], #0x04 /* LD:78-7b */
1290 ldr r5, [r1], #0x04 /* LD:7c-7f */
1291 strd r6, [r3], #0x08 /* ST:68-6f */
1292 strd r8, [r3], #0x08 /* ST:70-77 */
1294 strd r4, [r3], #0x08 /* ST:78-7f */
1295 bge .Lmemcpy_w_loop128
1297 .Lmemcpy_w_lessthan128:
1298 adds r2, r2, #0x80 /* Adjust for extra sub */
1299 ldmeqfd sp!, {r4-r9}
1300 RETeq /* Return now if done */
1302 blt .Lmemcpy_w_lessthan32
1304 /* Copy 32 bytes at a time */
1313 strd r4, [r3], #0x08
1316 strd r6, [r3], #0x08
1317 strd r8, [r3], #0x08
1319 strd r4, [r3], #0x08
1320 bge .Lmemcpy_w_loop32
1322 .Lmemcpy_w_lessthan32:
1323 adds r2, r2, #0x20 /* Adjust for extra sub */
1324 ldmeqfd sp!, {r4-r9}
1325 RETeq /* Return now if done */
1329 addne pc, pc, r4, lsl #1
1332 /* At least 24 bytes remaining */
1336 strd r4, [r3], #0x08
1338 /* At least 16 bytes remaining */
1342 strd r4, [r3], #0x08
1344 /* At least 8 bytes remaining */
1348 strd r4, [r3], #0x08
1350 /* Less than 8 bytes remaining */
1352 RETeq /* Return now if done */
1354 ldrge ip, [r1], #0x04
1355 strge ip, [r3], #0x04
1356 RETeq /* Return now if done */
1358 ldrb ip, [r1], #0x01
1360 ldrgeb r2, [r1], #0x01
1361 strb ip, [r3], #0x01
1363 strgeb r2, [r3], #0x01
1369 * At this point, it has not been possible to word align both buffers.
1370 * The destination buffer is word aligned, but the source buffer is not.
1381 .Lmemcpy_bad1_loop16:
1393 orr r4, r4, r5, lsr #24
1395 orr r5, r5, r6, lsr #24
1397 orr r6, r6, r7, lsr #24
1399 orr r7, r7, ip, lsr #24
1401 orr r4, r4, r5, lsl #24
1403 orr r5, r5, r6, lsl #24
1405 orr r6, r6, r7, lsl #24
1407 orr r7, r7, ip, lsl #24
1415 bge .Lmemcpy_bad1_loop16
1418 ldmeqfd sp!, {r4-r7}
1419 RETeq /* Return now if done */
1422 blt .Lmemcpy_bad_done
1424 .Lmemcpy_bad1_loop4:
1433 orr r4, r4, ip, lsr #24
1435 orr r4, r4, ip, lsl #24
1438 bge .Lmemcpy_bad1_loop4
1442 .Lmemcpy_bad2_loop16:
1454 orr r4, r4, r5, lsr #16
1456 orr r5, r5, r6, lsr #16
1458 orr r6, r6, r7, lsr #16
1460 orr r7, r7, ip, lsr #16
1462 orr r4, r4, r5, lsl #16
1464 orr r5, r5, r6, lsl #16
1466 orr r6, r6, r7, lsl #16
1468 orr r7, r7, ip, lsl #16
1476 bge .Lmemcpy_bad2_loop16
1479 ldmeqfd sp!, {r4-r7}
1480 RETeq /* Return now if done */
1483 blt .Lmemcpy_bad_done
1485 .Lmemcpy_bad2_loop4:
1494 orr r4, r4, ip, lsr #16
1496 orr r4, r4, ip, lsl #16
1499 bge .Lmemcpy_bad2_loop4
1503 .Lmemcpy_bad3_loop16:
1515 orr r4, r4, r5, lsr #8
1517 orr r5, r5, r6, lsr #8
1519 orr r6, r6, r7, lsr #8
1521 orr r7, r7, ip, lsr #8
1523 orr r4, r4, r5, lsl #8
1525 orr r5, r5, r6, lsl #8
1527 orr r6, r6, r7, lsl #8
1529 orr r7, r7, ip, lsl #8
1537 bge .Lmemcpy_bad3_loop16
1540 ldmeqfd sp!, {r4-r7}
1541 RETeq /* Return now if done */
1544 blt .Lmemcpy_bad_done
1546 .Lmemcpy_bad3_loop4:
1555 orr r4, r4, ip, lsr #8
1557 orr r4, r4, ip, lsl #8
1560 bge .Lmemcpy_bad3_loop4
1567 ldrb ip, [r1], #0x01
1569 ldrgeb r2, [r1], #0x01
1570 strb ip, [r3], #0x01
1572 strgeb r2, [r3], #0x01
1578 * Handle short copies (less than 16 bytes), possibly misaligned.
1579 * Some of these are *very* common, thanks to the network stack,
1580 * and so are handled specially.
1583 add pc, pc, r2, lsl #2
1586 b .Lmemcpy_bytewise /* 0x01 */
1587 b .Lmemcpy_bytewise /* 0x02 */
1588 b .Lmemcpy_bytewise /* 0x03 */
1589 b .Lmemcpy_4 /* 0x04 */
1590 b .Lmemcpy_bytewise /* 0x05 */
1591 b .Lmemcpy_6 /* 0x06 */
1592 b .Lmemcpy_bytewise /* 0x07 */
1593 b .Lmemcpy_8 /* 0x08 */
1594 b .Lmemcpy_bytewise /* 0x09 */
1595 b .Lmemcpy_bytewise /* 0x0a */
1596 b .Lmemcpy_bytewise /* 0x0b */
1597 b .Lmemcpy_c /* 0x0c */
1599 mov r3, r0 /* We must not clobber r0 */
1600 ldrb ip, [r1], #0x01
1601 1: subs r2, r2, #0x01
1602 strb ip, [r3], #0x01
1603 ldrneb ip, [r1], #0x01
1607 /******************************************************************************
1608 * Special case for 4 byte copies
1610 #define LMEMCPY_4_LOG2 6 /* 64 bytes */
1611 #define LMEMCPY_4_PAD .align LMEMCPY_4_LOG2
1615 orr r2, r2, r0, lsl #2
1618 addne pc, r3, r2, lsl #LMEMCPY_4_LOG2
1621 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1629 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1631 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
1632 ldr r2, [r1, #3] /* BE:r2 = 3xxx LE:r2 = xxx3 */
1634 mov r3, r3, lsl #8 /* r3 = 012. */
1635 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
1637 mov r3, r3, lsr #8 /* r3 = .210 */
1638 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
1645 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1649 ldrh r2, [r1, #0x02]
1651 ldrh r3, [r1, #0x02]
1654 orr r3, r2, r3, lsl #16
1660 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1662 ldr r3, [r1, #-3] /* BE:r3 = xxx0 LE:r3 = 0xxx */
1663 ldr r2, [r1, #1] /* BE:r2 = 123x LE:r2 = x321 */
1665 mov r3, r3, lsl #24 /* r3 = 0... */
1666 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
1668 mov r3, r3, lsr #24 /* r3 = ...0 */
1669 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
1676 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1680 strb r2, [r0, #0x03]
1688 strb r1, [r0, #0x03]
1690 strh r3, [r0, #0x01]
1695 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1698 ldrh r3, [r1, #0x01]
1699 ldrb r1, [r1, #0x03]
1701 strh r3, [r0, #0x01]
1702 strb r1, [r0, #0x03]
1707 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1709 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1710 ldrh r3, [r1, #0x02] /* LE:r3 = ..23 LE:r3 = ..32 */
1712 mov r1, r2, lsr #8 /* r1 = ...0 */
1714 mov r2, r2, lsl #8 /* r2 = .01. */
1715 orr r2, r2, r3, lsr #8 /* r2 = .012 */
1718 mov r2, r2, lsr #8 /* r2 = ...1 */
1719 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1720 mov r3, r3, lsr #8 /* r3 = ...3 */
1722 strh r2, [r0, #0x01]
1723 strb r3, [r0, #0x03]
1728 * 0111: dst is 8-bit aligned, src is 8-bit aligned
1731 ldrh r3, [r1, #0x01]
1732 ldrb r1, [r1, #0x03]
1734 strh r3, [r0, #0x01]
1735 strb r1, [r0, #0x03]
1740 * 1000: dst is 16-bit aligned, src is 32-bit aligned
1744 strh r2, [r0, #0x02]
1750 strh r3, [r0, #0x02]
1756 * 1001: dst is 16-bit aligned, src is 8-bit aligned
1758 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1759 ldr r3, [r1, #3] /* BE:r3 = 3xxx LE:r3 = xxx3 */
1760 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
1763 mov r2, r2, lsl #8 /* r2 = 012. */
1764 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1766 mov r2, r2, lsr #24 /* r2 = ...2 */
1767 orr r2, r2, r3, lsl #8 /* r2 = xx32 */
1769 strh r2, [r0, #0x02]
1774 * 1010: dst is 16-bit aligned, src is 16-bit aligned
1777 ldrh r3, [r1, #0x02]
1779 strh r3, [r0, #0x02]
1784 * 1011: dst is 16-bit aligned, src is 8-bit aligned
1786 ldr r3, [r1, #1] /* BE:r3 = 123x LE:r3 = x321 */
1787 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1788 mov r1, r3, lsr #8 /* BE:r1 = .123 LE:r1 = .x32 */
1789 strh r1, [r0, #0x02]
1791 mov r3, r3, lsr #24 /* r3 = ...1 */
1792 orr r3, r3, r2, lsl #8 /* r3 = xx01 */
1794 mov r3, r3, lsl #8 /* r3 = 321. */
1795 orr r3, r3, r2, lsr #24 /* r3 = 3210 */
1802 * 1100: dst is 8-bit aligned, src is 32-bit aligned
1804 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
1806 strb r2, [r0, #0x03]
1809 strh r3, [r0, #0x01]
1815 strh r3, [r0, #0x01]
1816 strb r1, [r0, #0x03]
1822 * 1101: dst is 8-bit aligned, src is 8-bit aligned
1825 ldrh r3, [r1, #0x01]
1826 ldrb r1, [r1, #0x03]
1828 strh r3, [r0, #0x01]
1829 strb r1, [r0, #0x03]
1834 * 1110: dst is 8-bit aligned, src is 16-bit aligned
1837 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1838 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1839 strb r3, [r0, #0x03]
1840 mov r3, r3, lsr #8 /* r3 = ...2 */
1841 orr r3, r3, r2, lsl #8 /* r3 = ..12 */
1842 strh r3, [r0, #0x01]
1843 mov r2, r2, lsr #8 /* r2 = ...0 */
1846 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1847 ldrh r3, [r1, #0x02] /* BE:r3 = ..23 LE:r3 = ..32 */
1849 mov r2, r2, lsr #8 /* r2 = ...1 */
1850 orr r2, r2, r3, lsl #8 /* r2 = .321 */
1851 strh r2, [r0, #0x01]
1852 mov r3, r3, lsr #8 /* r3 = ...3 */
1853 strb r3, [r0, #0x03]
1859 * 1111: dst is 8-bit aligned, src is 8-bit aligned
1862 ldrh r3, [r1, #0x01]
1863 ldrb r1, [r1, #0x03]
1865 strh r3, [r0, #0x01]
1866 strb r1, [r0, #0x03]
1871 /******************************************************************************
1872 * Special case for 6 byte copies
1874 #define LMEMCPY_6_LOG2 6 /* 64 bytes */
1875 #define LMEMCPY_6_PAD .align LMEMCPY_6_LOG2
1879 orr r2, r2, r0, lsl #2
1882 addne pc, r3, r2, lsl #LMEMCPY_6_LOG2
1885 * 0000: dst is 32-bit aligned, src is 32-bit aligned
1888 ldrh r3, [r1, #0x04]
1890 strh r3, [r0, #0x04]
1895 * 0001: dst is 32-bit aligned, src is 8-bit aligned
1897 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
1898 ldr r3, [r1, #0x03] /* BE:r3 = 345x LE:r3 = x543 */
1900 mov r2, r2, lsl #8 /* r2 = 012. */
1901 orr r2, r2, r3, lsr #24 /* r2 = 0123 */
1903 mov r2, r2, lsr #8 /* r2 = .210 */
1904 orr r2, r2, r3, lsl #24 /* r2 = 3210 */
1906 mov r3, r3, lsr #8 /* BE:r3 = .345 LE:r3 = .x54 */
1908 strh r3, [r0, #0x04]
1913 * 0010: dst is 32-bit aligned, src is 16-bit aligned
1915 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
1916 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1918 mov r1, r3, lsr #16 /* r1 = ..23 */
1919 orr r1, r1, r2, lsl #16 /* r1 = 0123 */
1921 strh r3, [r0, #0x04]
1923 mov r1, r3, lsr #16 /* r1 = ..54 */
1924 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
1926 strh r1, [r0, #0x04]
1932 * 0011: dst is 32-bit aligned, src is 8-bit aligned
1934 ldr r2, [r1, #-3] /* BE:r2 = xxx0 LE:r2 = 0xxx */
1935 ldr r3, [r1, #1] /* BE:r3 = 1234 LE:r3 = 4321 */
1936 ldr r1, [r1, #5] /* BE:r1 = 5xxx LE:r3 = xxx5 */
1938 mov r2, r2, lsl #24 /* r2 = 0... */
1939 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
1940 mov r3, r3, lsl #8 /* r3 = 234. */
1941 orr r1, r3, r1, lsr #24 /* r1 = 2345 */
1943 mov r2, r2, lsr #24 /* r2 = ...0 */
1944 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
1945 mov r1, r1, lsl #8 /* r1 = xx5. */
1946 orr r1, r1, r3, lsr #24 /* r1 = xx54 */
1949 strh r1, [r0, #0x04]
1954 * 0100: dst is 8-bit aligned, src is 32-bit aligned
1956 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
1957 ldrh r2, [r1, #0x04] /* BE:r2 = ..45 LE:r2 = ..54 */
1958 mov r1, r3, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
1959 strh r1, [r0, #0x01]
1961 mov r1, r3, lsr #24 /* r1 = ...0 */
1963 mov r3, r3, lsl #8 /* r3 = 123. */
1964 orr r3, r3, r2, lsr #8 /* r3 = 1234 */
1967 mov r3, r3, lsr #24 /* r3 = ...3 */
1968 orr r3, r3, r2, lsl #8 /* r3 = .543 */
1969 mov r2, r2, lsr #8 /* r2 = ...5 */
1971 strh r3, [r0, #0x03]
1972 strb r2, [r0, #0x05]
1977 * 0101: dst is 8-bit aligned, src is 8-bit aligned
1980 ldrh r3, [r1, #0x01]
1981 ldrh ip, [r1, #0x03]
1982 ldrb r1, [r1, #0x05]
1984 strh r3, [r0, #0x01]
1985 strh ip, [r0, #0x03]
1986 strb r1, [r0, #0x05]
1991 * 0110: dst is 8-bit aligned, src is 16-bit aligned
1993 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
1994 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
1996 mov r3, r2, lsr #8 /* r3 = ...0 */
1998 strb r1, [r0, #0x05]
1999 mov r3, r1, lsr #8 /* r3 = .234 */
2000 strh r3, [r0, #0x03]
2001 mov r3, r2, lsl #8 /* r3 = .01. */
2002 orr r3, r3, r1, lsr #24 /* r3 = .012 */
2003 strh r3, [r0, #0x01]
2007 strb r3, [r0, #0x05]
2008 mov r3, r1, lsr #8 /* r3 = .543 */
2009 strh r3, [r0, #0x03]
2010 mov r3, r2, lsr #8 /* r3 = ...1 */
2011 orr r3, r3, r1, lsl #8 /* r3 = 4321 */
2012 strh r3, [r0, #0x01]
2018 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2021 ldrh r3, [r1, #0x01]
2022 ldrh ip, [r1, #0x03]
2023 ldrb r1, [r1, #0x05]
2025 strh r3, [r0, #0x01]
2026 strh ip, [r0, #0x03]
2027 strb r1, [r0, #0x05]
2032 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2035 ldr r2, [r1] /* r2 = 0123 */
2036 ldrh r3, [r1, #0x04] /* r3 = ..45 */
2037 mov r1, r2, lsr #16 /* r1 = ..01 */
2038 orr r3, r3, r2, lsl#16 /* r3 = 2345 */
2042 ldrh r2, [r1, #0x04] /* r2 = ..54 */
2043 ldr r3, [r1] /* r3 = 3210 */
2044 mov r2, r2, lsl #16 /* r2 = 54.. */
2045 orr r2, r2, r3, lsr #16 /* r2 = 5432 */
2053 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2055 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2056 ldr r2, [r1, #3] /* BE:r2 = 345x LE:r2 = x543 */
2057 mov r1, r3, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2059 mov r2, r2, lsr #8 /* r2 = .345 */
2060 orr r2, r2, r3, lsl #24 /* r2 = 2345 */
2062 mov r2, r2, lsl #8 /* r2 = 543. */
2063 orr r2, r2, r3, lsr #24 /* r2 = 5432 */
2071 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2081 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2083 ldrb r3, [r1] /* r3 = ...0 */
2084 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2085 ldrb r1, [r1, #0x05] /* r1 = ...5 */
2087 mov r3, r3, lsl #8 /* r3 = ..0. */
2088 orr r3, r3, r2, lsr #24 /* r3 = ..01 */
2089 orr r1, r1, r2, lsl #8 /* r1 = 2345 */
2091 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2092 mov r1, r1, lsl #24 /* r1 = 5... */
2093 orr r1, r1, r2, lsr #8 /* r1 = 5432 */
2101 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2103 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2104 ldrh r1, [r1, #0x04] /* BE:r1 = ..45 LE:r1 = ..54 */
2106 mov r3, r2, lsr #24 /* r3 = ...0 */
2108 mov r2, r2, lsl #8 /* r2 = 123. */
2109 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2112 mov r2, r2, lsr #8 /* r2 = .321 */
2113 orr r2, r2, r1, lsl #24 /* r2 = 4321 */
2114 mov r1, r1, lsr #8 /* r1 = ...5 */
2117 strb r1, [r0, #0x05]
2122 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2125 ldrh r3, [r1, #0x01]
2126 ldrh ip, [r1, #0x03]
2127 ldrb r1, [r1, #0x05]
2129 strh r3, [r0, #0x01]
2130 strh ip, [r0, #0x03]
2131 strb r1, [r0, #0x05]
2136 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2138 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2139 ldr r1, [r1, #0x02] /* BE:r1 = 2345 LE:r1 = 5432 */
2141 mov r3, r2, lsr #8 /* r3 = ...0 */
2143 mov r2, r2, lsl #24 /* r2 = 1... */
2144 orr r2, r2, r1, lsr #8 /* r2 = 1234 */
2147 mov r2, r2, lsr #8 /* r2 = ...1 */
2148 orr r2, r2, r1, lsl #8 /* r2 = 4321 */
2149 mov r1, r1, lsr #24 /* r1 = ...5 */
2152 strb r1, [r0, #0x05]
2157 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2161 ldrb r1, [r1, #0x05]
2164 strb r1, [r0, #0x05]
2169 /******************************************************************************
2170 * Special case for 8 byte copies
2172 #define LMEMCPY_8_LOG2 6 /* 64 bytes */
2173 #define LMEMCPY_8_PAD .align LMEMCPY_8_LOG2
2177 orr r2, r2, r0, lsl #2
2180 addne pc, r3, r2, lsl #LMEMCPY_8_LOG2
2183 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2193 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2195 ldr r3, [r1, #-1] /* BE:r3 = x012 LE:r3 = 210x */
2196 ldr r2, [r1, #0x03] /* BE:r2 = 3456 LE:r2 = 6543 */
2197 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2199 mov r3, r3, lsl #8 /* r3 = 012. */
2200 orr r3, r3, r2, lsr #24 /* r3 = 0123 */
2201 orr r2, r1, r2, lsl #8 /* r2 = 4567 */
2203 mov r3, r3, lsr #8 /* r3 = .210 */
2204 orr r3, r3, r2, lsl #24 /* r3 = 3210 */
2205 mov r1, r1, lsl #24 /* r1 = 7... */
2206 orr r2, r1, r2, lsr #8 /* r2 = 7654 */
2214 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2216 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2217 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2218 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2220 mov r2, r2, lsl #16 /* r2 = 01.. */
2221 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2222 orr r3, r1, r3, lsl #16 /* r3 = 4567 */
2224 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2225 mov r3, r3, lsr #16 /* r3 = ..54 */
2226 orr r3, r3, r1, lsl #16 /* r3 = 7654 */
2234 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2236 ldrb r3, [r1] /* r3 = ...0 */
2237 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2238 ldr r1, [r1, #0x05] /* BE:r1 = 567x LE:r1 = x765 */
2240 mov r3, r3, lsl #24 /* r3 = 0... */
2241 orr r3, r3, r2, lsr #8 /* r3 = 0123 */
2242 mov r2, r2, lsl #24 /* r2 = 4... */
2243 orr r2, r2, r1, lsr #8 /* r2 = 4567 */
2245 orr r3, r3, r2, lsl #8 /* r3 = 3210 */
2246 mov r2, r2, lsr #24 /* r2 = ...4 */
2247 orr r2, r2, r1, lsl #8 /* r2 = 7654 */
2255 * 0100: dst is 8-bit aligned, src is 32-bit aligned
2257 ldr r3, [r1] /* BE:r3 = 0123 LE:r3 = 3210 */
2258 ldr r2, [r1, #0x04] /* BE:r2 = 4567 LE:r2 = 7654 */
2260 mov r1, r3, lsr #24 /* r1 = ...0 */
2262 mov r1, r3, lsr #8 /* r1 = .012 */
2263 strb r2, [r0, #0x07]
2264 mov r3, r3, lsl #24 /* r3 = 3... */
2265 orr r3, r3, r2, lsr #8 /* r3 = 3456 */
2268 mov r1, r2, lsr #24 /* r1 = ...7 */
2269 strb r1, [r0, #0x07]
2270 mov r1, r3, lsr #8 /* r1 = .321 */
2271 mov r3, r3, lsr #24 /* r3 = ...3 */
2272 orr r3, r3, r2, lsl #8 /* r3 = 6543 */
2274 strh r1, [r0, #0x01]
2280 * 0101: dst is 8-bit aligned, src is 8-bit aligned
2283 ldrh r3, [r1, #0x01]
2285 ldrb r1, [r1, #0x07]
2287 strh r3, [r0, #0x01]
2289 strb r1, [r0, #0x07]
2294 * 0110: dst is 8-bit aligned, src is 16-bit aligned
2296 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2297 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2298 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2300 mov ip, r2, lsr #8 /* ip = ...0 */
2302 mov ip, r2, lsl #8 /* ip = .01. */
2303 orr ip, ip, r3, lsr #24 /* ip = .012 */
2304 strb r1, [r0, #0x07]
2305 mov r3, r3, lsl #8 /* r3 = 345. */
2306 orr r3, r3, r1, lsr #8 /* r3 = 3456 */
2308 strb r2, [r0] /* 0 */
2309 mov ip, r1, lsr #8 /* ip = ...7 */
2310 strb ip, [r0, #0x07] /* 7 */
2311 mov ip, r2, lsr #8 /* ip = ...1 */
2312 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2313 mov r3, r3, lsr #8 /* r3 = .543 */
2314 orr r3, r3, r1, lsl #24 /* r3 = 6543 */
2316 strh ip, [r0, #0x01]
2322 * 0111: dst is 8-bit aligned, src is 8-bit aligned
2324 ldrb r3, [r1] /* r3 = ...0 */
2325 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2326 ldrh r2, [r1, #0x05] /* BE:r2 = ..56 LE:r2 = ..65 */
2327 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2329 mov r3, ip, lsr #16 /* BE:r3 = ..12 LE:r3 = ..43 */
2331 strh r3, [r0, #0x01]
2332 orr r2, r2, ip, lsl #16 /* r2 = 3456 */
2334 strh ip, [r0, #0x01]
2335 orr r2, r3, r2, lsl #16 /* r2 = 6543 */
2338 strb r1, [r0, #0x07]
2343 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2345 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2346 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2347 mov r1, r2, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2350 mov r1, r3, lsr #16 /* r1 = ..45 */
2351 orr r2, r1 ,r2, lsl #16 /* r2 = 2345 */
2354 orr r2, r1, r3, lsl #16 /* r2 = 5432 */
2355 mov r3, r3, lsr #16 /* r3 = ..76 */
2358 strh r3, [r0, #0x06]
2363 * 1001: dst is 16-bit aligned, src is 8-bit aligned
2365 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2366 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2367 ldrb ip, [r1, #0x07] /* ip = ...7 */
2368 mov r1, r2, lsr #8 /* BE:r1 = .x01 LE:r1 = .210 */
2371 mov r1, r2, lsl #24 /* r1 = 2... */
2372 orr r1, r1, r3, lsr #8 /* r1 = 2345 */
2373 orr r3, ip, r3, lsl #8 /* r3 = 4567 */
2375 mov r1, r2, lsr #24 /* r1 = ...2 */
2376 orr r1, r1, r3, lsl #8 /* r1 = 5432 */
2377 mov r3, r3, lsr #24 /* r3 = ...6 */
2378 orr r3, r3, ip, lsl #8 /* r3 = ..76 */
2381 strh r3, [r0, #0x06]
2386 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2390 ldrh r3, [r1, #0x06]
2393 strh r3, [r0, #0x06]
2398 * 1011: dst is 16-bit aligned, src is 8-bit aligned
2400 ldr r3, [r1, #0x05] /* BE:r3 = 567x LE:r3 = x765 */
2401 ldr r2, [r1, #0x01] /* BE:r2 = 1234 LE:r2 = 4321 */
2402 ldrb ip, [r1] /* ip = ...0 */
2403 mov r1, r3, lsr #8 /* BE:r1 = .567 LE:r1 = .x76 */
2404 strh r1, [r0, #0x06]
2406 mov r3, r3, lsr #24 /* r3 = ...5 */
2407 orr r3, r3, r2, lsl #8 /* r3 = 2345 */
2408 mov r2, r2, lsr #24 /* r2 = ...1 */
2409 orr r2, r2, ip, lsl #8 /* r2 = ..01 */
2411 mov r3, r3, lsl #24 /* r3 = 5... */
2412 orr r3, r3, r2, lsr #8 /* r3 = 5432 */
2413 orr r2, ip, r2, lsl #8 /* r2 = 3210 */
2421 * 1100: dst is 8-bit aligned, src is 32-bit aligned
2423 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2424 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2425 mov r1, r3, lsr #8 /* BE:r1 = .456 LE:r1 = .765 */
2426 strh r1, [r0, #0x05]
2428 strb r3, [r0, #0x07]
2429 mov r1, r2, lsr #24 /* r1 = ...0 */
2431 mov r2, r2, lsl #8 /* r2 = 123. */
2432 orr r2, r2, r3, lsr #24 /* r2 = 1234 */
2436 mov r1, r3, lsr #24 /* r1 = ...7 */
2437 strb r1, [r0, #0x07]
2438 mov r2, r2, lsr #8 /* r2 = .321 */
2439 orr r2, r2, r3, lsl #24 /* r2 = 4321 */
2446 * 1101: dst is 8-bit aligned, src is 8-bit aligned
2448 ldrb r3, [r1] /* r3 = ...0 */
2449 ldrh r2, [r1, #0x01] /* BE:r2 = ..12 LE:r2 = ..21 */
2450 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2451 ldrb r1, [r1, #0x07] /* r1 = ...7 */
2453 mov r3, ip, lsr #16 /* BE:r3 = ..34 LE:r3 = ..65 */
2455 strh ip, [r0, #0x05]
2456 orr r2, r3, r2, lsl #16 /* r2 = 1234 */
2458 strh r3, [r0, #0x05]
2459 orr r2, r2, ip, lsl #16 /* r2 = 4321 */
2462 strb r1, [r0, #0x07]
2467 * 1110: dst is 8-bit aligned, src is 16-bit aligned
2469 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2470 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2471 ldrh r1, [r1, #0x06] /* BE:r1 = ..67 LE:r1 = ..76 */
2473 mov ip, r2, lsr #8 /* ip = ...0 */
2475 mov ip, r2, lsl #24 /* ip = 1... */
2476 orr ip, ip, r3, lsr #8 /* ip = 1234 */
2477 strb r1, [r0, #0x07]
2478 mov r1, r1, lsr #8 /* r1 = ...6 */
2479 orr r1, r1, r3, lsl #8 /* r1 = 3456 */
2482 mov ip, r2, lsr #8 /* ip = ...1 */
2483 orr ip, ip, r3, lsl #8 /* ip = 4321 */
2484 mov r2, r1, lsr #8 /* r2 = ...7 */
2485 strb r2, [r0, #0x07]
2486 mov r1, r1, lsl #8 /* r1 = .76. */
2487 orr r1, r1, r3, lsr #24 /* r1 = .765 */
2490 strh r1, [r0, #0x05]
2495 * 1111: dst is 8-bit aligned, src is 8-bit aligned
2499 ldrh r3, [r1, #0x05]
2500 ldrb r1, [r1, #0x07]
2503 strh r3, [r0, #0x05]
2504 strb r1, [r0, #0x07]
2508 /******************************************************************************
2509 * Special case for 12 byte copies
2511 #define LMEMCPY_C_LOG2 7 /* 128 bytes */
2512 #define LMEMCPY_C_PAD .align LMEMCPY_C_LOG2
2516 orr r2, r2, r0, lsl #2
2519 addne pc, r3, r2, lsl #LMEMCPY_C_LOG2
2522 * 0000: dst is 32-bit aligned, src is 32-bit aligned
2534 * 0001: dst is 32-bit aligned, src is 8-bit aligned
2536 ldrb r2, [r1, #0xb] /* r2 = ...B */
2537 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2538 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2539 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2541 orr r2, r2, ip, lsl #8 /* r2 = 89AB */
2543 mov r2, ip, lsr #24 /* r2 = ...7 */
2544 orr r2, r2, r3, lsl #8 /* r2 = 4567 */
2545 mov r1, r1, lsl #8 /* r1 = 012. */
2546 orr r1, r1, r3, lsr #24 /* r1 = 0123 */
2548 mov r2, r2, lsl #24 /* r2 = B... */
2549 orr r2, r2, ip, lsr #8 /* r2 = BA98 */
2551 mov r2, ip, lsl #24 /* r2 = 7... */
2552 orr r2, r2, r3, lsr #8 /* r2 = 7654 */
2553 mov r1, r1, lsr #8 /* r1 = .210 */
2554 orr r1, r1, r3, lsl #24 /* r1 = 3210 */
2562 * 0010: dst is 32-bit aligned, src is 16-bit aligned
2564 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2565 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2566 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2567 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2569 mov r2, r2, lsl #16 /* r2 = 01.. */
2570 orr r2, r2, r3, lsr #16 /* r2 = 0123 */
2572 mov r3, r3, lsl #16 /* r3 = 45.. */
2573 orr r3, r3, ip, lsr #16 /* r3 = 4567 */
2574 orr r1, r1, ip, lsl #16 /* r1 = 89AB */
2576 orr r2, r2, r3, lsl #16 /* r2 = 3210 */
2578 mov r3, r3, lsr #16 /* r3 = ..54 */
2579 orr r3, r3, ip, lsl #16 /* r3 = 7654 */
2580 mov r1, r1, lsl #16 /* r1 = BA.. */
2581 orr r1, r1, ip, lsr #16 /* r1 = BA98 */
2589 * 0011: dst is 32-bit aligned, src is 8-bit aligned
2591 ldrb r2, [r1] /* r2 = ...0 */
2592 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2593 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2594 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2596 mov r2, r2, lsl #24 /* r2 = 0... */
2597 orr r2, r2, r3, lsr #8 /* r2 = 0123 */
2599 mov r3, r3, lsl #24 /* r3 = 4... */
2600 orr r3, r3, ip, lsr #8 /* r3 = 4567 */
2601 mov r1, r1, lsr #8 /* r1 = .9AB */
2602 orr r1, r1, ip, lsl #24 /* r1 = 89AB */
2604 orr r2, r2, r3, lsl #8 /* r2 = 3210 */
2606 mov r3, r3, lsr #24 /* r3 = ...4 */
2607 orr r3, r3, ip, lsl #8 /* r3 = 7654 */
2608 mov r1, r1, lsl #8 /* r1 = BA9. */
2609 orr r1, r1, ip, lsr #24 /* r1 = BA98 */
2617 * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
2619 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2620 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2621 ldr ip, [r1, #0x08] /* BE:ip = 89AB LE:ip = BA98 */
2622 mov r1, r2, lsr #8 /* BE:r1 = .012 LE:r1 = .321 */
2623 strh r1, [r0, #0x01]
2625 mov r1, r2, lsr #24 /* r1 = ...0 */
2627 mov r1, r2, lsl #24 /* r1 = 3... */
2628 orr r2, r1, r3, lsr #8 /* r1 = 3456 */
2629 mov r1, r3, lsl #24 /* r1 = 7... */
2630 orr r1, r1, ip, lsr #8 /* r1 = 789A */
2633 mov r1, r2, lsr #24 /* r1 = ...3 */
2634 orr r2, r1, r3, lsl #8 /* r1 = 6543 */
2635 mov r1, r3, lsr #24 /* r1 = ...7 */
2636 orr r1, r1, ip, lsl #8 /* r1 = A987 */
2637 mov ip, ip, lsr #24 /* ip = ...B */
2641 strb ip, [r0, #0x0b]
2646 * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
2649 ldrh r3, [r1, #0x01]
2653 ldrb r1, [r1, #0x0b]
2654 strh r3, [r0, #0x01]
2657 strb r1, [r0, #0x0b]
2662 * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
2664 ldrh r2, [r1] /* BE:r2 = ..01 LE:r2 = ..10 */
2665 ldr r3, [r1, #0x02] /* BE:r3 = 2345 LE:r3 = 5432 */
2666 ldr ip, [r1, #0x06] /* BE:ip = 6789 LE:ip = 9876 */
2667 ldrh r1, [r1, #0x0a] /* BE:r1 = ..AB LE:r1 = ..BA */
2669 mov r2, r2, ror #8 /* r2 = 1..0 */
2671 mov r2, r2, lsr #16 /* r2 = ..1. */
2672 orr r2, r2, r3, lsr #24 /* r2 = ..12 */
2673 strh r2, [r0, #0x01]
2674 mov r2, r3, lsl #8 /* r2 = 345. */
2675 orr r3, r2, ip, lsr #24 /* r3 = 3456 */
2676 mov r2, ip, lsl #8 /* r2 = 789. */
2677 orr r2, r2, r1, lsr #8 /* r2 = 789A */
2680 mov r2, r2, lsr #8 /* r2 = ...1 */
2681 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2682 strh r2, [r0, #0x01]
2683 mov r2, r3, lsr #8 /* r2 = .543 */
2684 orr r3, r2, ip, lsl #24 /* r3 = 6543 */
2685 mov r2, ip, lsr #8 /* r2 = .987 */
2686 orr r2, r2, r1, lsl #24 /* r2 = A987 */
2687 mov r1, r1, lsr #8 /* r1 = ...B */
2691 strb r1, [r0, #0x0b]
2696 * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
2699 ldr r3, [r1, #0x01] /* BE:r3 = 1234 LE:r3 = 4321 */
2700 ldr ip, [r1, #0x05] /* BE:ip = 5678 LE:ip = 8765 */
2701 ldr r1, [r1, #0x09] /* BE:r1 = 9ABx LE:r1 = xBA9 */
2704 mov r2, r3, lsr #16 /* r2 = ..12 */
2705 strh r2, [r0, #0x01]
2706 mov r3, r3, lsl #16 /* r3 = 34.. */
2707 orr r3, r3, ip, lsr #16 /* r3 = 3456 */
2708 mov ip, ip, lsl #16 /* ip = 78.. */
2709 orr ip, ip, r1, lsr #16 /* ip = 789A */
2710 mov r1, r1, lsr #8 /* r1 = .9AB */
2712 strh r3, [r0, #0x01]
2713 mov r3, r3, lsr #16 /* r3 = ..43 */
2714 orr r3, r3, ip, lsl #16 /* r3 = 6543 */
2715 mov ip, ip, lsr #16 /* ip = ..87 */
2716 orr ip, ip, r1, lsl #16 /* ip = A987 */
2717 mov r1, r1, lsr #16 /* r1 = ..xB */
2721 strb r1, [r0, #0x0b]
2726 * 1000: dst is 16-bit aligned, src is 32-bit aligned
2728 ldr ip, [r1] /* BE:ip = 0123 LE:ip = 3210 */
2729 ldr r3, [r1, #0x04] /* BE:r3 = 4567 LE:r3 = 7654 */
2730 ldr r2, [r1, #0x08] /* BE:r2 = 89AB LE:r2 = BA98 */
2731 mov r1, ip, lsr #16 /* BE:r1 = ..01 LE:r1 = ..32 */
2734 mov r1, ip, lsl #16 /* r1 = 23.. */
2735 orr r1, r1, r3, lsr #16 /* r1 = 2345 */
2736 mov r3, r3, lsl #16 /* r3 = 67.. */
2737 orr r3, r3, r2, lsr #16 /* r3 = 6789 */
2740 orr r1, r1, r3, lsl #16 /* r1 = 5432 */
2741 mov r3, r3, lsr #16 /* r3 = ..76 */
2742 orr r3, r3, r2, lsl #16 /* r3 = 9876 */
2743 mov r2, r2, lsr #16 /* r2 = ..BA */
2747 strh r2, [r0, #0x0a]
2752 * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
2754 ldr r2, [r1, #-1] /* BE:r2 = x012 LE:r2 = 210x */
2755 ldr r3, [r1, #0x03] /* BE:r3 = 3456 LE:r3 = 6543 */
2756 mov ip, r2, lsr #8 /* BE:ip = .x01 LE:ip = .210 */
2758 ldr ip, [r1, #0x07] /* BE:ip = 789A LE:ip = A987 */
2759 ldrb r1, [r1, #0x0b] /* r1 = ...B */
2761 mov r2, r2, lsl #24 /* r2 = 2... */
2762 orr r2, r2, r3, lsr #8 /* r2 = 2345 */
2763 mov r3, r3, lsl #24 /* r3 = 6... */
2764 orr r3, r3, ip, lsr #8 /* r3 = 6789 */
2765 orr r1, r1, ip, lsl #8 /* r1 = 89AB */
2767 mov r2, r2, lsr #24 /* r2 = ...2 */
2768 orr r2, r2, r3, lsl #8 /* r2 = 5432 */
2769 mov r3, r3, lsr #24 /* r3 = ...6 */
2770 orr r3, r3, ip, lsl #8 /* r3 = 9876 */
2771 mov r1, r1, lsl #8 /* r1 = ..B. */
2772 orr r1, r1, ip, lsr #24 /* r1 = ..BA */
2776 strh r1, [r0, #0x0a]
2781 * 1010: dst is 16-bit aligned, src is 16-bit aligned
2786 ldrh r1, [r1, #0x0a]
2790 strh r1, [r0, #0x0a]
2795 * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
2797 ldr r2, [r1, #0x09] /* BE:r2 = 9ABx LE:r2 = xBA9 */
2798 ldr r3, [r1, #0x05] /* BE:r3 = 5678 LE:r3 = 8765 */
2799 mov ip, r2, lsr #8 /* BE:ip = .9AB LE:ip = .xBA */
2800 strh ip, [r0, #0x0a]
2801 ldr ip, [r1, #0x01] /* BE:ip = 1234 LE:ip = 4321 */
2802 ldrb r1, [r1] /* r1 = ...0 */
2804 mov r2, r2, lsr #24 /* r2 = ...9 */
2805 orr r2, r2, r3, lsl #8 /* r2 = 6789 */
2806 mov r3, r3, lsr #24 /* r3 = ...5 */
2807 orr r3, r3, ip, lsl #8 /* r3 = 2345 */
2808 mov r1, r1, lsl #8 /* r1 = ..0. */
2809 orr r1, r1, ip, lsr #24 /* r1 = ..01 */
2811 mov r2, r2, lsl #24 /* r2 = 9... */
2812 orr r2, r2, r3, lsr #8 /* r2 = 9876 */
2813 mov r3, r3, lsl #24 /* r3 = 5... */
2814 orr r3, r3, ip, lsr #8 /* r3 = 5432 */
2815 orr r1, r1, ip, lsl #8 /* r1 = 3210 */
2824 * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2826 ldr r2, [r1] /* BE:r2 = 0123 LE:r2 = 3210 */
2827 ldr ip, [r1, #0x04] /* BE:ip = 4567 LE:ip = 7654 */
2828 ldr r1, [r1, #0x08] /* BE:r1 = 89AB LE:r1 = BA98 */
2830 mov r3, r2, lsr #24 /* r3 = ...0 */
2832 mov r2, r2, lsl #8 /* r2 = 123. */
2833 orr r2, r2, ip, lsr #24 /* r2 = 1234 */
2835 mov r2, ip, lsl #8 /* r2 = 567. */
2836 orr r2, r2, r1, lsr #24 /* r2 = 5678 */
2838 mov r2, r1, lsr #8 /* r2 = ..9A */
2839 strh r2, [r0, #0x09]
2840 strb r1, [r0, #0x0b]
2843 mov r3, r2, lsr #8 /* r3 = .321 */
2844 orr r3, r3, ip, lsl #24 /* r3 = 4321 */
2846 mov r3, ip, lsr #8 /* r3 = .765 */
2847 orr r3, r3, r1, lsl #24 /* r3 = 8765 */
2849 mov r1, r1, lsr #8 /* r1 = .BA9 */
2850 strh r1, [r0, #0x09]
2851 mov r1, r1, lsr #16 /* r1 = ...B */
2852 strb r1, [r0, #0x0b]
2858 * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2860 ldrb r2, [r1, #0x0b] /* r2 = ...B */
2861 ldr r3, [r1, #0x07] /* BE:r3 = 789A LE:r3 = A987 */
2862 ldr ip, [r1, #0x03] /* BE:ip = 3456 LE:ip = 6543 */
2863 ldr r1, [r1, #-1] /* BE:r1 = x012 LE:r1 = 210x */
2864 strb r2, [r0, #0x0b]
2866 strh r3, [r0, #0x09]
2867 mov r3, r3, lsr #16 /* r3 = ..78 */
2868 orr r3, r3, ip, lsl #16 /* r3 = 5678 */
2869 mov ip, ip, lsr #16 /* ip = ..34 */
2870 orr ip, ip, r1, lsl #16 /* ip = 1234 */
2871 mov r1, r1, lsr #16 /* r1 = ..x0 */
2873 mov r2, r3, lsr #16 /* r2 = ..A9 */
2874 strh r2, [r0, #0x09]
2875 mov r3, r3, lsl #16 /* r3 = 87.. */
2876 orr r3, r3, ip, lsr #16 /* r3 = 8765 */
2877 mov ip, ip, lsl #16 /* ip = 43.. */
2878 orr ip, ip, r1, lsr #16 /* ip = 4321 */
2879 mov r1, r1, lsr #8 /* r1 = .210 */
2888 * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2891 ldrh r2, [r1, #0x0a] /* r2 = ..AB */
2892 ldr ip, [r1, #0x06] /* ip = 6789 */
2893 ldr r3, [r1, #0x02] /* r3 = 2345 */
2894 ldrh r1, [r1] /* r1 = ..01 */
2895 strb r2, [r0, #0x0b]
2896 mov r2, r2, lsr #8 /* r2 = ...A */
2897 orr r2, r2, ip, lsl #8 /* r2 = 789A */
2898 mov ip, ip, lsr #8 /* ip = .678 */
2899 orr ip, ip, r3, lsl #24 /* ip = 5678 */
2900 mov r3, r3, lsr #8 /* r3 = .234 */
2901 orr r3, r3, r1, lsl #24 /* r3 = 1234 */
2902 mov r1, r1, lsr #8 /* r1 = ...0 */
2906 strh r2, [r0, #0x09]
2908 ldrh r2, [r1] /* r2 = ..10 */
2909 ldr r3, [r1, #0x02] /* r3 = 5432 */
2910 ldr ip, [r1, #0x06] /* ip = 9876 */
2911 ldrh r1, [r1, #0x0a] /* r1 = ..BA */
2913 mov r2, r2, lsr #8 /* r2 = ...1 */
2914 orr r2, r2, r3, lsl #8 /* r2 = 4321 */
2915 mov r3, r3, lsr #24 /* r3 = ...5 */
2916 orr r3, r3, ip, lsl #8 /* r3 = 8765 */
2917 mov ip, ip, lsr #24 /* ip = ...9 */
2918 orr ip, ip, r1, lsl #8 /* ip = .BA9 */
2919 mov r1, r1, lsr #8 /* r1 = ...B */
2922 strh ip, [r0, #0x09]
2923 strb r1, [r0, #0x0b]
2929 * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2935 ldrh r2, [r1, #0x09]
2936 ldrb r1, [r1, #0x0b]
2939 strh r2, [r0, #0x09]
2940 strb r1, [r0, #0x0b]
2942 #endif /* _ARM_ARCH_5E */