sys/arm/arm/support.S

   1 /*-
   2  * Copyright (c) 2004 Olivier Houchard
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26 /*
  27  * Copyright 2003 Wasabi Systems, Inc.
  28  * All rights reserved.
  29  *
  30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed for the NetBSD Project by
  43  *      Wasabi Systems, Inc.
  44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  45  *    or promote products derived from this software without specific prior
  46  *    written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  58  * POSSIBILITY OF SUCH DAMAGE.
  59  */
  60 /*
  61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  62  * All rights reserved.
  63  *
  64  * This code is derived from software contributed to The NetBSD Foundation
  65  * by Neil A. Carson and Mark Brinicombe
  66  *
  67  * Redistribution and use in source and binary forms, with or without
  68  * modification, are permitted provided that the following conditions
  69  * are met:
  70  * 1. Redistributions of source code must retain the above copyright
  71  *    notice, this list of conditions and the following disclaimer.
  72  * 2. Redistributions in binary form must reproduce the above copyright
  73  *    notice, this list of conditions and the following disclaimer in the
  74  *    documentation and/or other materials provided with the distribution.
  75  *
  76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  86  * POSSIBILITY OF SUCH DAMAGE.
  87  */
  88
  89 #include <machine/asm.h>
  90 __FBSDID("$FreeBSD$");
  91
  92 #include "assym.inc"
  93
  94         .syntax unified
  95
  96 /*
  97  * memset: Sets a block of memory to the specified value
  98  *
  99  * On entry:
 100  *   r0 - dest address
 101  *   r1 - byte to write
 102  *   r2 - number of bytes to write
 103  *
 104  * On exit:
 105  *   r0 - dest address
 106  */
 107 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
 108 ENTRY(memset)
 109         and     r3, r1, #0xff           /* We deal with bytes */
 110         mov     r1, r2
 111 do_memset:
 112         cmp     r1, #0x04               /* Do we have less than 4 bytes */
 113         mov     ip, r0
 114         blt     .Lmemset_lessthanfour
 115
 116         /* Ok first we will word align the address */
 117         ands    r2, ip, #0x03           /* Get the bottom two bits */
 118         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
 119
 120         /* We are now word aligned */
 121 .Lmemset_wordaligned:
 122         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
 123         tst     ip, #0x04               /* Quad-align for armv5e */
 124         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
 125         subne   r1, r1, #0x04           /* Quad-align if necessary */
 126         strne   r3, [ip], #0x04
 127         cmp     r1, #0x10
 128         blt     .Lmemset_loop4          /* If less than 16 then use words */
 129         mov     r2, r3                  /* Duplicate data */
 130         cmp     r1, #0x80               /* If < 128 then skip the big loop */
 131         blt     .Lmemset_loop32
 132
 133         /* Do 128 bytes at a time */
 134 .Lmemset_loop128:
 135         subs    r1, r1, #0x80
 136         strdge  r2, [ip], #0x08
 137         strdge  r2, [ip], #0x08
 138         strdge  r2, [ip], #0x08
 139         strdge  r2, [ip], #0x08
 140         strdge  r2, [ip], #0x08
 141         strdge  r2, [ip], #0x08
 142         strdge  r2, [ip], #0x08
 143         strdge  r2, [ip], #0x08
 144         strdge  r2, [ip], #0x08
 145         strdge  r2, [ip], #0x08
 146         strdge  r2, [ip], #0x08
 147         strdge  r2, [ip], #0x08
 148         strdge  r2, [ip], #0x08
 149         strdge  r2, [ip], #0x08
 150         strdge  r2, [ip], #0x08
 151         strdge  r2, [ip], #0x08
 152         bgt     .Lmemset_loop128
 153         RETeq                   /* Zero length so just exit */
 154
 155         add     r1, r1, #0x80           /* Adjust for extra sub */
 156
 157         /* Do 32 bytes at a time */
 158 .Lmemset_loop32:
 159         subs    r1, r1, #0x20
 160         strdge  r2, [ip], #0x08
 161         strdge  r2, [ip], #0x08
 162         strdge  r2, [ip], #0x08
 163         strdge  r2, [ip], #0x08
 164         bgt     .Lmemset_loop32
 165         RETeq                   /* Zero length so just exit */
 166
 167         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
 168
 169         /* Deal with 16 bytes or more */
 170         strdge  r2, [ip], #0x08
 171         strdge  r2, [ip], #0x08
 172         RETeq                   /* Zero length so just exit */
 173
 174         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
 175
 176         /* We have at least 4 bytes so copy as words */
 177 .Lmemset_loop4:
 178         subs    r1, r1, #0x04
 179         strge   r3, [ip], #0x04
 180         bgt     .Lmemset_loop4
 181         RETeq                   /* Zero length so just exit */
 182
 183         /* Compensate for 64-bit alignment check */
 184         adds    r1, r1, #0x04
 185         RETeq
 186         cmp     r1, #2
 187
 188         strb    r3, [ip], #0x01         /* Set 1 byte */
 189         strbge  r3, [ip], #0x01         /* Set another byte */
 190         strbgt  r3, [ip]                /* and a third */
 191         RET                     /* Exit */
 192
 193 .Lmemset_wordunaligned:
 194         rsb     r2, r2, #0x004
 195         strb    r3, [ip], #0x01         /* Set 1 byte */
 196         cmp     r2, #0x02
 197         strbge  r3, [ip], #0x01         /* Set another byte */
 198         sub     r1, r1, r2
 199         strbgt  r3, [ip], #0x01         /* and a third */
 200         cmp     r1, #0x04               /* More than 4 bytes left? */
 201         bge     .Lmemset_wordaligned    /* Yup */
 202
 203 .Lmemset_lessthanfour:
 204         cmp     r1, #0x00
 205         RETeq                   /* Zero length so exit */
 206         strb    r3, [ip], #0x01         /* Set 1 byte */
 207         cmp     r1, #0x02
 208         strbge  r3, [ip], #0x01         /* Set another byte */
 209         strbgt  r3, [ip]                /* and a third */
 210         RET                     /* Exit */
 211 END(memset)
 212
 213 ENTRY(memcmp)
 214         mov     ip, r0
 215         cmp     r2, #0x06
 216         beq     .Lmemcmp_6bytes
 217         mov     r0, #0x00
 218
 219         /* Are both addresses aligned the same way? */
 220         cmp     r2, #0x00
 221         eorsne  r3, ip, r1
 222         RETeq                   /* len == 0, or same addresses! */
 223         tst     r3, #0x03
 224         subne   r2, r2, #0x01
 225         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
 226
 227         /* Word-align the addresses, if necessary */
 228         sub     r3, r1, #0x05
 229         ands    r3, r3, #0x03
 230         add     r3, r3, r3, lsl #1
 231         addne   pc, pc, r3, lsl #3
 232         nop
 233
 234         /* Compare up to 3 bytes */
 235         ldrb    r0, [ip], #0x01
 236         ldrb    r3, [r1], #0x01
 237         subs    r0, r0, r3
 238         RETne
 239         subs    r2, r2, #0x01
 240         RETeq
 241
 242         /* Compare up to 2 bytes */
 243         ldrb    r0, [ip], #0x01
 244         ldrb    r3, [r1], #0x01
 245         subs    r0, r0, r3
 246         RETne
 247         subs    r2, r2, #0x01
 248         RETeq
 249
 250         /* Compare 1 byte */
 251         ldrb    r0, [ip], #0x01
 252         ldrb    r3, [r1], #0x01
 253         subs    r0, r0, r3
 254         RETne
 255         subs    r2, r2, #0x01
 256         RETeq
 257
 258         /* Compare 4 bytes at a time, if possible */
 259         subs    r2, r2, #0x04
 260         bcc     .Lmemcmp_bytewise
 261 .Lmemcmp_word_aligned:
 262         ldr     r0, [ip], #0x04
 263         ldr     r3, [r1], #0x04
 264         subs    r2, r2, #0x04
 265         cmpcs   r0, r3
 266         beq     .Lmemcmp_word_aligned
 267         sub     r0, r0, r3
 268
 269         /* Correct for extra subtraction, and check if done */
 270         adds    r2, r2, #0x04
 271         cmpeq   r0, #0x00               /* If done, did all bytes match? */
 272         RETeq                   /* Yup. Just return */
 273
 274         /* Re-do the final word byte-wise */
 275         sub     ip, ip, #0x04
 276         sub     r1, r1, #0x04
 277
 278 .Lmemcmp_bytewise:
 279         add     r2, r2, #0x03
 280 .Lmemcmp_bytewise2:
 281         ldrb    r0, [ip], #0x01
 282         ldrb    r3, [r1], #0x01
 283         subs    r2, r2, #0x01
 284         cmpcs   r0, r3
 285         beq     .Lmemcmp_bytewise2
 286         sub     r0, r0, r3
 287         RET
 288
 289         /*
 290          * 6 byte compares are very common, thanks to the network stack.
 291          * This code is hand-scheduled to reduce the number of stalls for
 292          * load results. Everything else being equal, this will be ~32%
 293          * faster than a byte-wise memcmp.
 294          */
 295         .align  5
 296 .Lmemcmp_6bytes:
 297         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
 298         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
 299         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
 300         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
 301         ldrbeq  r3, [ip, #0x01]         /* r3 = b1#1 */
 302         RETne                   /* Return if mismatch on #0 */
 303         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
 304         ldrbeq  r3, [r1, #0x02]         /* r3 = b2#2 */
 305         ldrbeq  r0, [ip, #0x02]         /* r0 = b1#2 */
 306         RETne                   /* Return if mismatch on #1 */
 307         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
 308         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
 309         ldrbeq  r3, [ip, #0x03]         /* r3 = b1#3 */
 310         RETne                   /* Return if mismatch on #2 */
 311         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
 312         ldrbeq  r3, [r1, #0x04]         /* r3 = b2#4 */
 313         ldrbeq  r0, [ip, #0x04]         /* r0 = b1#4 */
 314         RETne                   /* Return if mismatch on #3 */
 315         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
 316         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
 317         ldrbeq  r3, [ip, #0x05]         /* r3 = b1#5 */
 318         RETne                   /* Return if mismatch on #4 */
 319         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
 320         RET
 321 END(memcmp)
 322
 323 ENTRY(memmove)
 324         /* Do the buffers overlap? */
 325         cmp     r0, r1
 326         RETeq           /* Bail now if src/dst are the same */
 327         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
 328         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
 329         cmp     r3, r2          /* if (r3 < len) we have an overlap */
 330         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
 331
 332         /* Determine copy direction */
 333         cmp     r1, r0
 334         bcc     .Lmemmove_backwards
 335
 336         moveq   r0, #0                  /* Quick abort for len=0 */
 337         RETeq
 338
 339         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
 340         subs    r2, r2, #4
 341         blt     .Lmemmove_fl4           /* less than 4 bytes */
 342         ands    r12, r0, #3
 343         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
 344         ands    r12, r1, #3
 345         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
 346
 347 .Lmemmove_ft8:
 348         /* We have aligned source and destination */
 349         subs    r2, r2, #8
 350         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
 351         subs    r2, r2, #0x14
 352         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
 353         stmdb   sp!, {r4}               /* borrow r4 */
 354
 355         /* blat 32 bytes at a time */
 356         /* XXX for really big copies perhaps we should use more registers */
 357 .Lmemmove_floop32:
 358         ldmia   r1!, {r3, r4, r12, lr}
 359         stmia   r0!, {r3, r4, r12, lr}
 360         ldmia   r1!, {r3, r4, r12, lr}
 361         stmia   r0!, {r3, r4, r12, lr}
 362         subs    r2, r2, #0x20
 363         bge     .Lmemmove_floop32
 364
 365         cmn     r2, #0x10
 366         ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 367         stmiage r0!, {r3, r4, r12, lr}
 368         subge   r2, r2, #0x10
 369         ldmia   sp!, {r4}               /* return r4 */
 370
 371 .Lmemmove_fl32:
 372         adds    r2, r2, #0x14
 373
 374         /* blat 12 bytes at a time */
 375 .Lmemmove_floop12:
 376         ldmiage r1!, {r3, r12, lr}
 377         stmiage r0!, {r3, r12, lr}
 378         subsge  r2, r2, #0x0c
 379         bge     .Lmemmove_floop12
 380
 381 .Lmemmove_fl12:
 382         adds    r2, r2, #8
 383         blt     .Lmemmove_fl4
 384
 385         subs    r2, r2, #4
 386         ldrlt   r3, [r1], #4
 387         strlt   r3, [r0], #4
 388         ldmiage r1!, {r3, r12}
 389         stmiage r0!, {r3, r12}
 390         subge   r2, r2, #4
 391
 392 .Lmemmove_fl4:
 393         /* less than 4 bytes to go */
 394         adds    r2, r2, #4
 395         ldmiaeq sp!, {r0, pc}           /* done */
 396
 397         /* copy the crud byte at a time */
 398         cmp     r2, #2
 399         ldrb    r3, [r1], #1
 400         strb    r3, [r0], #1
 401         ldrbge  r3, [r1], #1
 402         strbge  r3, [r0], #1
 403         ldrbgt  r3, [r1], #1
 404         strbgt  r3, [r0], #1
 405         ldmia   sp!, {r0, pc}
 406
 407         /* erg - unaligned destination */
 408 .Lmemmove_fdestul:
 409         rsb     r12, r12, #4
 410         cmp     r12, #2
 411
 412         /* align destination with byte copies */
 413         ldrb    r3, [r1], #1
 414         strb    r3, [r0], #1
 415         ldrbge  r3, [r1], #1
 416         strbge  r3, [r0], #1
 417         ldrbgt  r3, [r1], #1
 418         strbgt  r3, [r0], #1
 419         subs    r2, r2, r12
 420         blt     .Lmemmove_fl4           /* less the 4 bytes */
 421
 422         ands    r12, r1, #3
 423         beq     .Lmemmove_ft8           /* we have an aligned source */
 424
 425         /* erg - unaligned source */
 426         /* This is where it gets nasty ... */
 427 .Lmemmove_fsrcul:
 428         bic     r1, r1, #3
 429         ldr     lr, [r1], #4
 430         cmp     r12, #2
 431         bgt     .Lmemmove_fsrcul3
 432         beq     .Lmemmove_fsrcul2
 433         cmp     r2, #0x0c
 434         blt     .Lmemmove_fsrcul1loop4
 435         sub     r2, r2, #0x0c
 436         stmdb   sp!, {r4, r5}
 437
 438 .Lmemmove_fsrcul1loop16:
 439         mov     r3, lr, lsr #8
 440         ldmia   r1!, {r4, r5, r12, lr}
 441         orr     r3, r3, r4, lsl #24
 442         mov     r4, r4, lsr #8
 443         orr     r4, r4, r5, lsl #24
 444         mov     r5, r5, lsr #8
 445         orr     r5, r5, r12, lsl #24
 446         mov     r12, r12, lsr #8
 447         orr     r12, r12, lr, lsl #24
 448         stmia   r0!, {r3-r5, r12}
 449         subs    r2, r2, #0x10
 450         bge     .Lmemmove_fsrcul1loop16
 451         ldmia   sp!, {r4, r5}
 452         adds    r2, r2, #0x0c
 453         blt     .Lmemmove_fsrcul1l4
 454
 455 .Lmemmove_fsrcul1loop4:
 456         mov     r12, lr, lsr #8
 457         ldr     lr, [r1], #4
 458         orr     r12, r12, lr, lsl #24
 459         str     r12, [r0], #4
 460         subs    r2, r2, #4
 461         bge     .Lmemmove_fsrcul1loop4
 462
 463 .Lmemmove_fsrcul1l4:
 464         sub     r1, r1, #3
 465         b       .Lmemmove_fl4
 466
 467 .Lmemmove_fsrcul2:
 468         cmp     r2, #0x0c
 469         blt     .Lmemmove_fsrcul2loop4
 470         sub     r2, r2, #0x0c
 471         stmdb   sp!, {r4, r5}
 472
 473 .Lmemmove_fsrcul2loop16:
 474         mov     r3, lr, lsr #16
 475         ldmia   r1!, {r4, r5, r12, lr}
 476         orr     r3, r3, r4, lsl #16
 477         mov     r4, r4, lsr #16
 478         orr     r4, r4, r5, lsl #16
 479         mov     r5, r5, lsr #16
 480         orr     r5, r5, r12, lsl #16
 481         mov     r12, r12, lsr #16
 482         orr     r12, r12, lr, lsl #16
 483         stmia   r0!, {r3-r5, r12}
 484         subs    r2, r2, #0x10
 485         bge     .Lmemmove_fsrcul2loop16
 486         ldmia   sp!, {r4, r5}
 487         adds    r2, r2, #0x0c
 488         blt     .Lmemmove_fsrcul2l4
 489
 490 .Lmemmove_fsrcul2loop4:
 491         mov     r12, lr, lsr #16
 492         ldr     lr, [r1], #4
 493         orr     r12, r12, lr, lsl #16
 494         str     r12, [r0], #4
 495         subs    r2, r2, #4
 496         bge     .Lmemmove_fsrcul2loop4
 497
 498 .Lmemmove_fsrcul2l4:
 499         sub     r1, r1, #2
 500         b       .Lmemmove_fl4
 501
 502 .Lmemmove_fsrcul3:
 503         cmp     r2, #0x0c
 504         blt     .Lmemmove_fsrcul3loop4
 505         sub     r2, r2, #0x0c
 506         stmdb   sp!, {r4, r5}
 507
 508 .Lmemmove_fsrcul3loop16:
 509         mov     r3, lr, lsr #24
 510         ldmia   r1!, {r4, r5, r12, lr}
 511         orr     r3, r3, r4, lsl #8
 512         mov     r4, r4, lsr #24
 513         orr     r4, r4, r5, lsl #8
 514         mov     r5, r5, lsr #24
 515         orr     r5, r5, r12, lsl #8
 516         mov     r12, r12, lsr #24
 517         orr     r12, r12, lr, lsl #8
 518         stmia   r0!, {r3-r5, r12}
 519         subs    r2, r2, #0x10
 520         bge     .Lmemmove_fsrcul3loop16
 521         ldmia   sp!, {r4, r5}
 522         adds    r2, r2, #0x0c
 523         blt     .Lmemmove_fsrcul3l4
 524
 525 .Lmemmove_fsrcul3loop4:
 526         mov     r12, lr, lsr #24
 527         ldr     lr, [r1], #4
 528         orr     r12, r12, lr, lsl #8
 529         str     r12, [r0], #4
 530         subs    r2, r2, #4
 531         bge     .Lmemmove_fsrcul3loop4
 532
 533 .Lmemmove_fsrcul3l4:
 534         sub     r1, r1, #1
 535         b       .Lmemmove_fl4
 536
 537 .Lmemmove_backwards:
 538         add     r1, r1, r2
 539         add     r0, r0, r2
 540         subs    r2, r2, #4
 541         blt     .Lmemmove_bl4           /* less than 4 bytes */
 542         ands    r12, r0, #3
 543         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
 544         ands    r12, r1, #3
 545         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
 546
 547 .Lmemmove_bt8:
 548         /* We have aligned source and destination */
 549         subs    r2, r2, #8
 550         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
 551         stmdb   sp!, {r4, lr}
 552         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
 553         blt     .Lmemmove_bl32
 554
 555         /* blat 32 bytes at a time */
 556         /* XXX for really big copies perhaps we should use more registers */
 557 .Lmemmove_bloop32:
 558         ldmdb   r1!, {r3, r4, r12, lr}
 559         stmdb   r0!, {r3, r4, r12, lr}
 560         ldmdb   r1!, {r3, r4, r12, lr}
 561         stmdb   r0!, {r3, r4, r12, lr}
 562         subs    r2, r2, #0x20
 563         bge     .Lmemmove_bloop32
 564
 565 .Lmemmove_bl32:
 566         cmn     r2, #0x10
 567         ldmdbge r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 568         stmdbge r0!, {r3, r4, r12, lr}
 569         subge   r2, r2, #0x10
 570         adds    r2, r2, #0x14
 571         ldmdbge r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
 572         stmdbge r0!, {r3, r12, lr}
 573         subge   r2, r2, #0x0c
 574         ldmia   sp!, {r4, lr}
 575
 576 .Lmemmove_bl12:
 577         adds    r2, r2, #8
 578         blt     .Lmemmove_bl4
 579         subs    r2, r2, #4
 580         ldrlt   r3, [r1, #-4]!
 581         strlt   r3, [r0, #-4]!
 582         ldmdbge r1!, {r3, r12}
 583         stmdbge r0!, {r3, r12}
 584         subge   r2, r2, #4
 585
 586 .Lmemmove_bl4:
 587         /* less than 4 bytes to go */
 588         adds    r2, r2, #4
 589         RETeq                   /* done */
 590
 591         /* copy the crud byte at a time */
 592         cmp     r2, #2
 593         ldrb    r3, [r1, #-1]!
 594         strb    r3, [r0, #-1]!
 595         ldrbge  r3, [r1, #-1]!
 596         strbge  r3, [r0, #-1]!
 597         ldrbgt  r3, [r1, #-1]!
 598         strbgt  r3, [r0, #-1]!
 599         RET
 600
 601         /* erg - unaligned destination */
 602 .Lmemmove_bdestul:
 603         cmp     r12, #2
 604
 605         /* align destination with byte copies */
 606         ldrb    r3, [r1, #-1]!
 607         strb    r3, [r0, #-1]!
 608         ldrbge  r3, [r1, #-1]!
 609         strbge  r3, [r0, #-1]!
 610         ldrbgt  r3, [r1, #-1]!
 611         strbgt  r3, [r0, #-1]!
 612         subs    r2, r2, r12
 613         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
 614         ands    r12, r1, #3
 615         beq     .Lmemmove_bt8           /* we have an aligned source */
 616
 617         /* erg - unaligned source */
 618         /* This is where it gets nasty ... */
 619 .Lmemmove_bsrcul:
 620         bic     r1, r1, #3
 621         ldr     r3, [r1, #0]
 622         cmp     r12, #2
 623         blt     .Lmemmove_bsrcul1
 624         beq     .Lmemmove_bsrcul2
 625         cmp     r2, #0x0c
 626         blt     .Lmemmove_bsrcul3loop4
 627         sub     r2, r2, #0x0c
 628         stmdb   sp!, {r4, r5, lr}
 629
 630 .Lmemmove_bsrcul3loop16:
 631         mov     lr, r3, lsl #8
 632         ldmdb   r1!, {r3-r5, r12}
 633         orr     lr, lr, r12, lsr #24
 634         mov     r12, r12, lsl #8
 635         orr     r12, r12, r5, lsr #24
 636         mov     r5, r5, lsl #8
 637         orr     r5, r5, r4, lsr #24
 638         mov     r4, r4, lsl #8
 639         orr     r4, r4, r3, lsr #24
 640         stmdb   r0!, {r4, r5, r12, lr}
 641         subs    r2, r2, #0x10
 642         bge     .Lmemmove_bsrcul3loop16
 643         ldmia   sp!, {r4, r5, lr}
 644         adds    r2, r2, #0x0c
 645         blt     .Lmemmove_bsrcul3l4
 646
 647 .Lmemmove_bsrcul3loop4:
 648         mov     r12, r3, lsl #8
 649         ldr     r3, [r1, #-4]!
 650         orr     r12, r12, r3, lsr #24
 651         str     r12, [r0, #-4]!
 652         subs    r2, r2, #4
 653         bge     .Lmemmove_bsrcul3loop4
 654
 655 .Lmemmove_bsrcul3l4:
 656         add     r1, r1, #3
 657         b       .Lmemmove_bl4
 658
 659 .Lmemmove_bsrcul2:
 660         cmp     r2, #0x0c
 661         blt     .Lmemmove_bsrcul2loop4
 662         sub     r2, r2, #0x0c
 663         stmdb   sp!, {r4, r5, lr}
 664
 665 .Lmemmove_bsrcul2loop16:
 666         mov     lr, r3, lsl #16
 667         ldmdb   r1!, {r3-r5, r12}
 668         orr     lr, lr, r12, lsr #16
 669         mov     r12, r12, lsl #16
 670         orr     r12, r12, r5, lsr #16
 671         mov     r5, r5, lsl #16
 672         orr     r5, r5, r4, lsr #16
 673         mov     r4, r4, lsl #16
 674         orr     r4, r4, r3, lsr #16
 675         stmdb   r0!, {r4, r5, r12, lr}
 676         subs    r2, r2, #0x10
 677         bge     .Lmemmove_bsrcul2loop16
 678         ldmia   sp!, {r4, r5, lr}
 679         adds    r2, r2, #0x0c
 680         blt     .Lmemmove_bsrcul2l4
 681
 682 .Lmemmove_bsrcul2loop4:
 683         mov     r12, r3, lsl #16
 684         ldr     r3, [r1, #-4]!
 685         orr     r12, r12, r3, lsr #16
 686         str     r12, [r0, #-4]!
 687         subs    r2, r2, #4
 688         bge     .Lmemmove_bsrcul2loop4
 689
 690 .Lmemmove_bsrcul2l4:
 691         add     r1, r1, #2
 692         b       .Lmemmove_bl4
 693
 694 .Lmemmove_bsrcul1:
 695         cmp     r2, #0x0c
 696         blt     .Lmemmove_bsrcul1loop4
 697         sub     r2, r2, #0x0c
 698         stmdb   sp!, {r4, r5, lr}
 699
 700 .Lmemmove_bsrcul1loop32:
 701         mov     lr, r3, lsl #24
 702         ldmdb   r1!, {r3-r5, r12}
 703         orr     lr, lr, r12, lsr #8
 704         mov     r12, r12, lsl #24
 705         orr     r12, r12, r5, lsr #8
 706         mov     r5, r5, lsl #24
 707         orr     r5, r5, r4, lsr #8
 708         mov     r4, r4, lsl #24
 709         orr     r4, r4, r3, lsr #8
 710         stmdb   r0!, {r4, r5, r12, lr}
 711         subs    r2, r2, #0x10
 712         bge     .Lmemmove_bsrcul1loop32
 713         ldmia   sp!, {r4, r5, lr}
 714         adds    r2, r2, #0x0c
 715         blt     .Lmemmove_bsrcul1l4
 716
 717 .Lmemmove_bsrcul1loop4:
 718         mov     r12, r3, lsl #24
 719         ldr     r3, [r1, #-4]!
 720         orr     r12, r12, r3, lsr #8
 721         str     r12, [r0, #-4]!
 722         subs    r2, r2, #4
 723         bge     .Lmemmove_bsrcul1loop4
 724
 725 .Lmemmove_bsrcul1l4:
 726         add     r1, r1, #1
 727         b       .Lmemmove_bl4
 728 END(memmove)
 729
 730 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 731 ENTRY(memcpy)
 732         pld     [r1]
 733         cmp     r2, #0x0c
 734         ble     .Lmemcpy_short          /* <= 12 bytes */
 735 #ifdef FLASHADDR
 736 #if FLASHADDR > PHYSADDR
 737         ldr     r3, =FLASHADDR
 738         cmp     r3, pc
 739         bls     .Lnormal
 740 #else
 741         ldr     r3, =FLASHADDR
 742         cmp     r3, pc
 743         bhi     .Lnormal
 744 #endif
 745 #endif
 746         mov     r3, r0                  /* We must not clobber r0 */
 747
 748         /* Word-align the destination buffer */
 749         ands    ip, r3, #0x03           /* Already word aligned? */
 750         beq     .Lmemcpy_wordaligned    /* Yup */
 751         cmp     ip, #0x02
 752         ldrb    ip, [r1], #0x01
 753         sub     r2, r2, #0x01
 754         strb    ip, [r3], #0x01
 755         ldrble  ip, [r1], #0x01
 756         suble   r2, r2, #0x01
 757         strble  ip, [r3], #0x01
 758         ldrblt  ip, [r1], #0x01
 759         sublt   r2, r2, #0x01
 760         strblt  ip, [r3], #0x01
 761
 762         /* Destination buffer is now word aligned */
 763 .Lmemcpy_wordaligned:
 764         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 765         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 766
 767         /* Quad-align the destination buffer */
 768         tst     r3, #0x07               /* Already quad aligned? */
 769         ldrne   ip, [r1], #0x04
 770         stmfd   sp!, {r4-r9}            /* Free up some registers */
 771         subne   r2, r2, #0x04
 772         strne   ip, [r3], #0x04
 773
 774         /* Destination buffer quad aligned, source is at least word aligned */
 775         subs    r2, r2, #0x80
 776         blt     .Lmemcpy_w_lessthan128
 777
 778         /* Copy 128 bytes at a time */
 779 .Lmemcpy_w_loop128:
 780         ldr     r4, [r1], #0x04         /* LD:00-03 */
 781         ldr     r5, [r1], #0x04         /* LD:04-07 */
 782         pld     [r1, #0x18]             /* Prefetch 0x20 */
 783         ldr     r6, [r1], #0x04         /* LD:08-0b */
 784         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 785         ldr     r8, [r1], #0x04         /* LD:10-13 */
 786         ldr     r9, [r1], #0x04         /* LD:14-17 */
 787         strd    r4, [r3], #0x08         /* ST:00-07 */
 788         ldr     r4, [r1], #0x04         /* LD:18-1b */
 789         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 790         strd    r6, [r3], #0x08         /* ST:08-0f */
 791         ldr     r6, [r1], #0x04         /* LD:20-23 */
 792         ldr     r7, [r1], #0x04         /* LD:24-27 */
 793         pld     [r1, #0x18]             /* Prefetch 0x40 */
 794         strd    r8, [r3], #0x08         /* ST:10-17 */
 795         ldr     r8, [r1], #0x04         /* LD:28-2b */
 796         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 797         strd    r4, [r3], #0x08         /* ST:18-1f */
 798         ldr     r4, [r1], #0x04         /* LD:30-33 */
 799         ldr     r5, [r1], #0x04         /* LD:34-37 */
 800         strd    r6, [r3], #0x08         /* ST:20-27 */
 801         ldr     r6, [r1], #0x04         /* LD:38-3b */
 802         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 803         strd    r8, [r3], #0x08         /* ST:28-2f */
 804         ldr     r8, [r1], #0x04         /* LD:40-43 */
 805         ldr     r9, [r1], #0x04         /* LD:44-47 */
 806         pld     [r1, #0x18]             /* Prefetch 0x60 */
 807         strd    r4, [r3], #0x08         /* ST:30-37 */
 808         ldr     r4, [r1], #0x04         /* LD:48-4b */
 809         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 810         strd    r6, [r3], #0x08         /* ST:38-3f */
 811         ldr     r6, [r1], #0x04         /* LD:50-53 */
 812         ldr     r7, [r1], #0x04         /* LD:54-57 */
 813         strd    r8, [r3], #0x08         /* ST:40-47 */
 814         ldr     r8, [r1], #0x04         /* LD:58-5b */
 815         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 816         strd    r4, [r3], #0x08         /* ST:48-4f */
 817         ldr     r4, [r1], #0x04         /* LD:60-63 */
 818         ldr     r5, [r1], #0x04         /* LD:64-67 */
 819         pld     [r1, #0x18]             /* Prefetch 0x80 */
 820         strd    r6, [r3], #0x08         /* ST:50-57 */
 821         ldr     r6, [r1], #0x04         /* LD:68-6b */
 822         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 823         strd    r8, [r3], #0x08         /* ST:58-5f */
 824         ldr     r8, [r1], #0x04         /* LD:70-73 */
 825         ldr     r9, [r1], #0x04         /* LD:74-77 */
 826         strd    r4, [r3], #0x08         /* ST:60-67 */
 827         ldr     r4, [r1], #0x04         /* LD:78-7b */
 828         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 829         strd    r6, [r3], #0x08         /* ST:68-6f */
 830         strd    r8, [r3], #0x08         /* ST:70-77 */
 831         subs    r2, r2, #0x80
 832         strd    r4, [r3], #0x08         /* ST:78-7f */
 833         bge     .Lmemcpy_w_loop128
 834
 835 .Lmemcpy_w_lessthan128:
 836         adds    r2, r2, #0x80           /* Adjust for extra sub */
 837         ldmfdeq sp!, {r4-r9}
 838         RETeq                   /* Return now if done */
 839         subs    r2, r2, #0x20
 840         blt     .Lmemcpy_w_lessthan32
 841
 842         /* Copy 32 bytes at a time */
 843 .Lmemcpy_w_loop32:
 844         ldr     r4, [r1], #0x04
 845         ldr     r5, [r1], #0x04
 846         pld     [r1, #0x18]
 847         ldr     r6, [r1], #0x04
 848         ldr     r7, [r1], #0x04
 849         ldr     r8, [r1], #0x04
 850         ldr     r9, [r1], #0x04
 851         strd    r4, [r3], #0x08
 852         ldr     r4, [r1], #0x04
 853         ldr     r5, [r1], #0x04
 854         strd    r6, [r3], #0x08
 855         strd    r8, [r3], #0x08
 856         subs    r2, r2, #0x20
 857         strd    r4, [r3], #0x08
 858         bge     .Lmemcpy_w_loop32
 859
 860 .Lmemcpy_w_lessthan32:
 861         adds    r2, r2, #0x20           /* Adjust for extra sub */
 862         ldmfdeq sp!, {r4-r9}
 863         RETeq                   /* Return now if done */
 864
 865         and     r4, r2, #0x18
 866         rsbs    r4, r4, #0x18
 867         addne   pc, pc, r4, lsl #1
 868         nop
 869
 870         /* At least 24 bytes remaining */
 871         ldr     r4, [r1], #0x04
 872         ldr     r5, [r1], #0x04
 873         sub     r2, r2, #0x08
 874         strd    r4, [r3], #0x08
 875
 876         /* At least 16 bytes remaining */
 877         ldr     r4, [r1], #0x04
 878         ldr     r5, [r1], #0x04
 879         sub     r2, r2, #0x08
 880         strd    r4, [r3], #0x08
 881
 882         /* At least 8 bytes remaining */
 883         ldr     r4, [r1], #0x04
 884         ldr     r5, [r1], #0x04
 885         subs    r2, r2, #0x08
 886         strd    r4, [r3], #0x08
 887
 888         /* Less than 8 bytes remaining */
 889         ldmfd   sp!, {r4-r9}
 890         RETeq                   /* Return now if done */
 891         subs    r2, r2, #0x04
 892         ldrge   ip, [r1], #0x04
 893         strge   ip, [r3], #0x04
 894         RETeq                   /* Return now if done */
 895         addlt   r2, r2, #0x04
 896         ldrb    ip, [r1], #0x01
 897         cmp     r2, #0x02
 898         ldrbge  r2, [r1], #0x01
 899         strb    ip, [r3], #0x01
 900         ldrbgt  ip, [r1]
 901         strbge  r2, [r3], #0x01
 902         strbgt  ip, [r3]
 903         RET
 904 /* Place a literal pool here for the above ldr instructions to use */
 905 .ltorg
 906
 907
 908 /*
 909  * At this point, it has not been possible to word align both buffers.
 910  * The destination buffer is word aligned, but the source buffer is not.
 911  */
 912 .Lmemcpy_bad_align:
 913         stmfd   sp!, {r4-r7}
 914         bic     r1, r1, #0x03
 915         cmp     ip, #2
 916         ldr     ip, [r1], #0x04
 917         bgt     .Lmemcpy_bad3
 918         beq     .Lmemcpy_bad2
 919         b       .Lmemcpy_bad1
 920
 921 .Lmemcpy_bad1_loop16:
 922         mov     r4, ip, lsr #8
 923         ldr     r5, [r1], #0x04
 924         pld     [r1, #0x018]
 925         ldr     r6, [r1], #0x04
 926         ldr     r7, [r1], #0x04
 927         ldr     ip, [r1], #0x04
 928         orr     r4, r4, r5, lsl #24
 929         mov     r5, r5, lsr #8
 930         orr     r5, r5, r6, lsl #24
 931         mov     r6, r6, lsr #8
 932         orr     r6, r6, r7, lsl #24
 933         mov     r7, r7, lsr #8
 934         orr     r7, r7, ip, lsl #24
 935         str     r4, [r3], #0x04
 936         str     r5, [r3], #0x04
 937         str     r6, [r3], #0x04
 938         str     r7, [r3], #0x04
 939 .Lmemcpy_bad1:
 940         subs    r2, r2, #0x10
 941         bge     .Lmemcpy_bad1_loop16
 942
 943         adds    r2, r2, #0x10
 944         ldmfdeq sp!, {r4-r7}
 945         RETeq                   /* Return now if done */
 946         subs    r2, r2, #0x04
 947         sublt   r1, r1, #0x03
 948         blt     .Lmemcpy_bad_done
 949
 950 .Lmemcpy_bad1_loop4:
 951         mov     r4, ip, lsr #8
 952         ldr     ip, [r1], #0x04
 953         subs    r2, r2, #0x04
 954         orr     r4, r4, ip, lsl #24
 955         str     r4, [r3], #0x04
 956         bge     .Lmemcpy_bad1_loop4
 957         sub     r1, r1, #0x03
 958         b       .Lmemcpy_bad_done
 959
 960 .Lmemcpy_bad2_loop16:
 961         mov     r4, ip, lsr #16
 962         ldr     r5, [r1], #0x04
 963         pld     [r1, #0x018]
 964         ldr     r6, [r1], #0x04
 965         ldr     r7, [r1], #0x04
 966         ldr     ip, [r1], #0x04
 967         orr     r4, r4, r5, lsl #16
 968         mov     r5, r5, lsr #16
 969         orr     r5, r5, r6, lsl #16
 970         mov     r6, r6, lsr #16
 971         orr     r6, r6, r7, lsl #16
 972         mov     r7, r7, lsr #16
 973         orr     r7, r7, ip, lsl #16
 974         str     r4, [r3], #0x04
 975         str     r5, [r3], #0x04
 976         str     r6, [r3], #0x04
 977         str     r7, [r3], #0x04
 978 .Lmemcpy_bad2:
 979         subs    r2, r2, #0x10
 980         bge     .Lmemcpy_bad2_loop16
 981
 982         adds    r2, r2, #0x10
 983         ldmfdeq sp!, {r4-r7}
 984         RETeq                   /* Return now if done */
 985         subs    r2, r2, #0x04
 986         sublt   r1, r1, #0x02
 987         blt     .Lmemcpy_bad_done
 988
 989 .Lmemcpy_bad2_loop4:
 990         mov     r4, ip, lsr #16
 991         ldr     ip, [r1], #0x04
 992         subs    r2, r2, #0x04
 993         orr     r4, r4, ip, lsl #16
 994         str     r4, [r3], #0x04
 995         bge     .Lmemcpy_bad2_loop4
 996         sub     r1, r1, #0x02
 997         b       .Lmemcpy_bad_done
 998
 999 .Lmemcpy_bad3_loop16:
1000         mov     r4, ip, lsr #24
1001         ldr     r5, [r1], #0x04
1002         pld     [r1, #0x018]
1003         ldr     r6, [r1], #0x04
1004         ldr     r7, [r1], #0x04
1005         ldr     ip, [r1], #0x04
1006         orr     r4, r4, r5, lsl #8
1007         mov     r5, r5, lsr #24
1008         orr     r5, r5, r6, lsl #8
1009         mov     r6, r6, lsr #24
1010         orr     r6, r6, r7, lsl #8
1011         mov     r7, r7, lsr #24
1012         orr     r7, r7, ip, lsl #8
1013         str     r4, [r3], #0x04
1014         str     r5, [r3], #0x04
1015         str     r6, [r3], #0x04
1016         str     r7, [r3], #0x04
1017 .Lmemcpy_bad3:
1018         subs    r2, r2, #0x10
1019         bge     .Lmemcpy_bad3_loop16
1020
1021         adds    r2, r2, #0x10
1022         ldmfdeq sp!, {r4-r7}
1023         RETeq                   /* Return now if done */
1024         subs    r2, r2, #0x04
1025         sublt   r1, r1, #0x01
1026         blt     .Lmemcpy_bad_done
1027
1028 .Lmemcpy_bad3_loop4:
1029         mov     r4, ip, lsr #24
1030         ldr     ip, [r1], #0x04
1031         subs    r2, r2, #0x04
1032         orr     r4, r4, ip, lsl #8
1033         str     r4, [r3], #0x04
1034         bge     .Lmemcpy_bad3_loop4
1035         sub     r1, r1, #0x01
1036
1037 .Lmemcpy_bad_done:
1038         ldmfd   sp!, {r4-r7}
1039         adds    r2, r2, #0x04
1040         RETeq
1041         ldrb    ip, [r1], #0x01
1042         cmp     r2, #0x02
1043         ldrbge  r2, [r1], #0x01
1044         strb    ip, [r3], #0x01
1045         ldrbgt  ip, [r1]
1046         strbge  r2, [r3], #0x01
1047         strbgt  ip, [r3]
1048         RET
1049
1050
1051 /*
1052  * Handle short copies (less than 16 bytes), possibly misaligned.
1053  * Some of these are *very* common, thanks to the network stack,
1054  * and so are handled specially.
1055  */
1056 .Lmemcpy_short:
1057         add     pc, pc, r2, lsl #2
1058         nop
1059         RET                     /* 0x00 */
1060         b       .Lmemcpy_bytewise       /* 0x01 */
1061         b       .Lmemcpy_bytewise       /* 0x02 */
1062         b       .Lmemcpy_bytewise       /* 0x03 */
1063         b       .Lmemcpy_4              /* 0x04 */
1064         b       .Lmemcpy_bytewise       /* 0x05 */
1065         b       .Lmemcpy_6              /* 0x06 */
1066         b       .Lmemcpy_bytewise       /* 0x07 */
1067         b       .Lmemcpy_8              /* 0x08 */
1068         b       .Lmemcpy_bytewise       /* 0x09 */
1069         b       .Lmemcpy_bytewise       /* 0x0a */
1070         b       .Lmemcpy_bytewise       /* 0x0b */
1071         b       .Lmemcpy_c              /* 0x0c */
1072 .Lmemcpy_bytewise:
1073         mov     r3, r0                  /* We must not clobber r0 */
1074         ldrb    ip, [r1], #0x01
1075 1:      subs    r2, r2, #0x01
1076         strb    ip, [r3], #0x01
1077         ldrbne  ip, [r1], #0x01
1078         bne     1b
1079         RET
1080
1081 /******************************************************************************
1082  * Special case for 4 byte copies
1083  */
1084 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
1085 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
1086         LMEMCPY_4_PAD
1087 .Lmemcpy_4:
1088         and     r2, r1, #0x03
1089         orr     r2, r2, r0, lsl #2
1090         ands    r2, r2, #0x0f
1091         sub     r3, pc, #0x14
1092         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
1093
1094 /*
1095  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1096  */
1097         ldr     r2, [r1]
1098         str     r2, [r0]
1099         RET
1100         LMEMCPY_4_PAD
1101
1102 /*
1103  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1104  */
1105         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1106         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
1107         mov     r3, r3, lsr #8          /* r3 = .210 */
1108         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1109         str     r3, [r0]
1110         RET
1111         LMEMCPY_4_PAD
1112
1113 /*
1114  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1115  */
1116         ldrh    r3, [r1, #0x02]
1117         ldrh    r2, [r1]
1118         orr     r3, r2, r3, lsl #16
1119         str     r3, [r0]
1120         RET
1121         LMEMCPY_4_PAD
1122
1123 /*
1124  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1125  */
1126         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
1127         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
1128         mov     r3, r3, lsr #24         /* r3 = ...0 */
1129         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1130         str     r3, [r0]
1131         RET
1132         LMEMCPY_4_PAD
1133
1134 /*
1135  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1136  */
1137         ldr     r2, [r1]
1138         strb    r2, [r0]
1139         mov     r3, r2, lsr #8
1140         mov     r1, r2, lsr #24
1141         strb    r1, [r0, #0x03]
1142         strh    r3, [r0, #0x01]
1143         RET
1144         LMEMCPY_4_PAD
1145
1146 /*
1147  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1148  */
1149         ldrb    r2, [r1]
1150         ldrh    r3, [r1, #0x01]
1151         ldrb    r1, [r1, #0x03]
1152         strb    r2, [r0]
1153         strh    r3, [r0, #0x01]
1154         strb    r1, [r0, #0x03]
1155         RET
1156         LMEMCPY_4_PAD
1157
1158 /*
1159  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1160  */
1161         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1162         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
1163         strb    r2, [r0]
1164         mov     r2, r2, lsr #8          /* r2 = ...1 */
1165         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
1166         mov     r3, r3, lsr #8          /* r3 = ...3 */
1167         strh    r2, [r0, #0x01]
1168         strb    r3, [r0, #0x03]
1169         RET
1170         LMEMCPY_4_PAD
1171
1172 /*
1173  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1174  */
1175         ldrb    r2, [r1]
1176         ldrh    r3, [r1, #0x01]
1177         ldrb    r1, [r1, #0x03]
1178         strb    r2, [r0]
1179         strh    r3, [r0, #0x01]
1180         strb    r1, [r0, #0x03]
1181         RET
1182         LMEMCPY_4_PAD
1183
1184 /*
1185  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1186  */
1187         ldr     r2, [r1]
1188         strh    r2, [r0]
1189         mov     r3, r2, lsr #16
1190         strh    r3, [r0, #0x02]
1191         RET
1192         LMEMCPY_4_PAD
1193
1194 /*
1195  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1196  */
1197         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1198         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
1199         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1200         strh    r1, [r0]
1201         mov     r2, r2, lsr #24         /* r2 = ...2 */
1202         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
1203         strh    r2, [r0, #0x02]
1204         RET
1205         LMEMCPY_4_PAD
1206
1207 /*
1208  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1209  */
1210         ldrh    r2, [r1]
1211         ldrh    r3, [r1, #0x02]
1212         strh    r2, [r0]
1213         strh    r3, [r0, #0x02]
1214         RET
1215         LMEMCPY_4_PAD
1216
1217 /*
1218  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1219  */
1220         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
1221         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
1222         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
1223         strh    r1, [r0, #0x02]
1224         mov     r3, r3, lsl #8          /* r3 = 321. */
1225         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
1226         strh    r3, [r0]
1227         RET
1228         LMEMCPY_4_PAD
1229
1230 /*
1231  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1232  */
1233         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1234         strb    r2, [r0]
1235         mov     r3, r2, lsr #8
1236         mov     r1, r2, lsr #24
1237         strh    r3, [r0, #0x01]
1238         strb    r1, [r0, #0x03]
1239         RET
1240         LMEMCPY_4_PAD
1241
1242 /*
1243  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1244  */
1245         ldrb    r2, [r1]
1246         ldrh    r3, [r1, #0x01]
1247         ldrb    r1, [r1, #0x03]
1248         strb    r2, [r0]
1249         strh    r3, [r0, #0x01]
1250         strb    r1, [r0, #0x03]
1251         RET
1252         LMEMCPY_4_PAD
1253
1254 /*
1255  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1256  */
1257         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1258         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
1259         strb    r2, [r0]
1260         mov     r2, r2, lsr #8          /* r2 = ...1 */
1261         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
1262         strh    r2, [r0, #0x01]
1263         mov     r3, r3, lsr #8          /* r3 = ...3 */
1264         strb    r3, [r0, #0x03]
1265         RET
1266         LMEMCPY_4_PAD
1267
1268 /*
1269  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1270  */
1271         ldrb    r2, [r1]
1272         ldrh    r3, [r1, #0x01]
1273         ldrb    r1, [r1, #0x03]
1274         strb    r2, [r0]
1275         strh    r3, [r0, #0x01]
1276         strb    r1, [r0, #0x03]
1277         RET
1278         LMEMCPY_4_PAD
1279
1280
1281 /******************************************************************************
1282  * Special case for 6 byte copies
1283  */
1284 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
1285 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
1286         LMEMCPY_6_PAD
1287 .Lmemcpy_6:
1288         and     r2, r1, #0x03
1289         orr     r2, r2, r0, lsl #2
1290         ands    r2, r2, #0x0f
1291         sub     r3, pc, #0x14
1292         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
1293
1294 /*
1295  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1296  */
1297         ldr     r2, [r1]
1298         ldrh    r3, [r1, #0x04]
1299         str     r2, [r0]
1300         strh    r3, [r0, #0x04]
1301         RET
1302         LMEMCPY_6_PAD
1303
1304 /*
1305  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1306  */
1307         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1308         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
1309         mov     r2, r2, lsr #8          /* r2 = .210 */
1310         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
1311         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
1312         str     r2, [r0]
1313         strh    r3, [r0, #0x04]
1314         RET
1315         LMEMCPY_6_PAD
1316
1317 /*
1318  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1319  */
1320         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1321         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1322         mov     r1, r3, lsr #16         /* r1 = ..54 */
1323         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1324         str     r2, [r0]
1325         strh    r1, [r0, #0x04]
1326         RET
1327         LMEMCPY_6_PAD
1328
1329 /*
1330  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1331  */
1332         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
1333         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
1334         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
1335         mov     r2, r2, lsr #24         /* r2 = ...0 */
1336         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1337         mov     r1, r1, lsl #8          /* r1 = xx5. */
1338         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
1339         str     r2, [r0]
1340         strh    r1, [r0, #0x04]
1341         RET
1342         LMEMCPY_6_PAD
1343
1344 /*
1345  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1346  */
1347         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1348         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
1349         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1350         strh    r1, [r0, #0x01]
1351         strb    r3, [r0]
1352         mov     r3, r3, lsr #24         /* r3 = ...3 */
1353         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
1354         mov     r2, r2, lsr #8          /* r2 = ...5 */
1355         strh    r3, [r0, #0x03]
1356         strb    r2, [r0, #0x05]
1357         RET
1358         LMEMCPY_6_PAD
1359
1360 /*
1361  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1362  */
1363         ldrb    r2, [r1]
1364         ldrh    r3, [r1, #0x01]
1365         ldrh    ip, [r1, #0x03]
1366         ldrb    r1, [r1, #0x05]
1367         strb    r2, [r0]
1368         strh    r3, [r0, #0x01]
1369         strh    ip, [r0, #0x03]
1370         strb    r1, [r0, #0x05]
1371         RET
1372         LMEMCPY_6_PAD
1373
1374 /*
1375  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1376  */
1377         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1378         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
1379         strb    r2, [r0]
1380         mov     r3, r1, lsr #24
1381         strb    r3, [r0, #0x05]
1382         mov     r3, r1, lsr #8          /* r3 = .543 */
1383         strh    r3, [r0, #0x03]
1384         mov     r3, r2, lsr #8          /* r3 = ...1 */
1385         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
1386         strh    r3, [r0, #0x01]
1387         RET
1388         LMEMCPY_6_PAD
1389
1390 /*
1391  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1392  */
1393         ldrb    r2, [r1]
1394         ldrh    r3, [r1, #0x01]
1395         ldrh    ip, [r1, #0x03]
1396         ldrb    r1, [r1, #0x05]
1397         strb    r2, [r0]
1398         strh    r3, [r0, #0x01]
1399         strh    ip, [r0, #0x03]
1400         strb    r1, [r0, #0x05]
1401         RET
1402         LMEMCPY_6_PAD
1403
1404 /*
1405  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1406  */
1407         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
1408         ldr     r3, [r1]                /* r3 = 3210 */
1409         mov     r2, r2, lsl #16         /* r2 = 54.. */
1410         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
1411         strh    r3, [r0]
1412         str     r2, [r0, #0x02]
1413         RET
1414         LMEMCPY_6_PAD
1415
1416 /*
1417  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1418  */
1419         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1420         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
1421         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1422         mov     r2, r2, lsl #8          /* r2 = 543. */
1423         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
1424         strh    r1, [r0]
1425         str     r2, [r0, #0x02]
1426         RET
1427         LMEMCPY_6_PAD
1428
1429 /*
1430  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1431  */
1432         ldrh    r2, [r1]
1433         ldr     r3, [r1, #0x02]
1434         strh    r2, [r0]
1435         str     r3, [r0, #0x02]
1436         RET
1437         LMEMCPY_6_PAD
1438
1439 /*
1440  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1441  */
1442         ldrb    r3, [r1]                /* r3 = ...0 */
1443         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1444         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
1445         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1446         mov     r1, r1, lsl #24         /* r1 = 5... */
1447         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
1448         strh    r3, [r0]
1449         str     r1, [r0, #0x02]
1450         RET
1451         LMEMCPY_6_PAD
1452
1453 /*
1454  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1455  */
1456         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1457         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
1458         strb    r2, [r0]
1459         mov     r2, r2, lsr #8          /* r2 = .321 */
1460         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
1461         mov     r1, r1, lsr #8          /* r1 = ...5 */
1462         str     r2, [r0, #0x01]
1463         strb    r1, [r0, #0x05]
1464         RET
1465         LMEMCPY_6_PAD
1466
1467 /*
1468  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1469  */
1470         ldrb    r2, [r1]
1471         ldrh    r3, [r1, #0x01]
1472         ldrh    ip, [r1, #0x03]
1473         ldrb    r1, [r1, #0x05]
1474         strb    r2, [r0]
1475         strh    r3, [r0, #0x01]
1476         strh    ip, [r0, #0x03]
1477         strb    r1, [r0, #0x05]
1478         RET
1479         LMEMCPY_6_PAD
1480
1481 /*
1482  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1483  */
1484         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1485         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
1486         strb    r2, [r0]
1487         mov     r2, r2, lsr #8          /* r2 = ...1 */
1488         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
1489         mov     r1, r1, lsr #24         /* r1 = ...5 */
1490         str     r2, [r0, #0x01]
1491         strb    r1, [r0, #0x05]
1492         RET
1493         LMEMCPY_6_PAD
1494
1495 /*
1496  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1497  */
1498         ldrb    r2, [r1]
1499         ldr     r3, [r1, #0x01]
1500         ldrb    r1, [r1, #0x05]
1501         strb    r2, [r0]
1502         str     r3, [r0, #0x01]
1503         strb    r1, [r0, #0x05]
1504         RET
1505         LMEMCPY_6_PAD
1506
1507
1508 /******************************************************************************
1509  * Special case for 8 byte copies
1510  */
1511 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
1512 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
1513         LMEMCPY_8_PAD
1514 .Lmemcpy_8:
1515         and     r2, r1, #0x03
1516         orr     r2, r2, r0, lsl #2
1517         ands    r2, r2, #0x0f
1518         sub     r3, pc, #0x14
1519         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
1520
1521 /*
1522  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1523  */
1524         ldr     r2, [r1]
1525         ldr     r3, [r1, #0x04]
1526         str     r2, [r0]
1527         str     r3, [r0, #0x04]
1528         RET
1529         LMEMCPY_8_PAD
1530
1531 /*
1532  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1533  */
1534         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1535         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
1536         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1537         mov     r3, r3, lsr #8          /* r3 = .210 */
1538         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1539         mov     r1, r1, lsl #24         /* r1 = 7... */
1540         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
1541         str     r3, [r0]
1542         str     r2, [r0, #0x04]
1543         RET
1544         LMEMCPY_8_PAD
1545
1546 /*
1547  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1548  */
1549         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1550         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1551         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1552         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1553         mov     r3, r3, lsr #16         /* r3 = ..54 */
1554         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
1555         str     r2, [r0]
1556         str     r3, [r0, #0x04]
1557         RET
1558         LMEMCPY_8_PAD
1559
1560 /*
1561  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1562  */
1563         ldrb    r3, [r1]                /* r3 = ...0 */
1564         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1565         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
1566         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1567         mov     r2, r2, lsr #24         /* r2 = ...4 */
1568         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
1569         str     r3, [r0]
1570         str     r2, [r0, #0x04]
1571         RET
1572         LMEMCPY_8_PAD
1573
1574 /*
1575  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1576  */
1577         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1578         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
1579         strb    r3, [r0]
1580         mov     r1, r2, lsr #24         /* r1 = ...7 */
1581         strb    r1, [r0, #0x07]
1582         mov     r1, r3, lsr #8          /* r1 = .321 */
1583         mov     r3, r3, lsr #24         /* r3 = ...3 */
1584         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
1585         strh    r1, [r0, #0x01]
1586         str     r3, [r0, #0x03]
1587         RET
1588         LMEMCPY_8_PAD
1589
1590 /*
1591  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1592  */
1593         ldrb    r2, [r1]
1594         ldrh    r3, [r1, #0x01]
1595         ldr     ip, [r1, #0x03]
1596         ldrb    r1, [r1, #0x07]
1597         strb    r2, [r0]
1598         strh    r3, [r0, #0x01]
1599         str     ip, [r0, #0x03]
1600         strb    r1, [r0, #0x07]
1601         RET
1602         LMEMCPY_8_PAD
1603
1604 /*
1605  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1606  */
1607         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1608         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1609         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1610         strb    r2, [r0]                /* 0 */
1611         mov     ip, r1, lsr #8          /* ip = ...7 */
1612         strb    ip, [r0, #0x07]         /* 7 */
1613         mov     ip, r2, lsr #8          /* ip = ...1 */
1614         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1615         mov     r3, r3, lsr #8          /* r3 = .543 */
1616         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
1617         strh    ip, [r0, #0x01]
1618         str     r3, [r0, #0x03]
1619         RET
1620         LMEMCPY_8_PAD
1621
1622 /*
1623  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1624  */
1625         ldrb    r3, [r1]                /* r3 = ...0 */
1626         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1627         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
1628         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1629         strb    r3, [r0]
1630         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
1631         strh    ip, [r0, #0x01]
1632         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
1633         str     r2, [r0, #0x03]
1634         strb    r1, [r0, #0x07]
1635         RET
1636         LMEMCPY_8_PAD
1637
1638 /*
1639  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1640  */
1641         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1642         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1643         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1644         strh    r2, [r0]
1645         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
1646         mov     r3, r3, lsr #16         /* r3 = ..76 */
1647         str     r2, [r0, #0x02]
1648         strh    r3, [r0, #0x06]
1649         RET
1650         LMEMCPY_8_PAD
1651
1652 /*
1653  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1654  */
1655         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1656         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1657         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
1658         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1659         strh    r1, [r0]
1660         mov     r1, r2, lsr #24         /* r1 = ...2 */
1661         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
1662         mov     r3, r3, lsr #24         /* r3 = ...6 */
1663         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
1664         str     r1, [r0, #0x02]
1665         strh    r3, [r0, #0x06]
1666         RET
1667         LMEMCPY_8_PAD
1668
1669 /*
1670  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1671  */
1672         ldrh    r2, [r1]
1673         ldr     ip, [r1, #0x02]
1674         ldrh    r3, [r1, #0x06]
1675         strh    r2, [r0]
1676         str     ip, [r0, #0x02]
1677         strh    r3, [r0, #0x06]
1678         RET
1679         LMEMCPY_8_PAD
1680
1681 /*
1682  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1683  */
1684         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
1685         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1686         ldrb    ip, [r1]                /* ip = ...0 */
1687         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
1688         strh    r1, [r0, #0x06]
1689         mov     r3, r3, lsl #24         /* r3 = 5... */
1690         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
1691         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
1692         str     r3, [r0, #0x02]
1693         strh    r2, [r0]
1694         RET
1695         LMEMCPY_8_PAD
1696
1697 /*
1698  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1699  */
1700         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1701         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1702         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
1703         strh    r1, [r0, #0x05]
1704         strb    r2, [r0]
1705         mov     r1, r3, lsr #24         /* r1 = ...7 */
1706         strb    r1, [r0, #0x07]
1707         mov     r2, r2, lsr #8          /* r2 = .321 */
1708         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
1709         str     r2, [r0, #0x01]
1710         RET
1711         LMEMCPY_8_PAD
1712
1713 /*
1714  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1715  */
1716         ldrb    r3, [r1]                /* r3 = ...0 */
1717         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
1718         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1719         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1720         strb    r3, [r0]
1721         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
1722         strh    r3, [r0, #0x05]
1723         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
1724         str     r2, [r0, #0x01]
1725         strb    r1, [r0, #0x07]
1726         RET
1727         LMEMCPY_8_PAD
1728
1729 /*
1730  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1731  */
1732         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1733         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1734         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1735         strb    r2, [r0]
1736         mov     ip, r2, lsr #8          /* ip = ...1 */
1737         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1738         mov     r2, r1, lsr #8          /* r2 = ...7 */
1739         strb    r2, [r0, #0x07]
1740         mov     r1, r1, lsl #8          /* r1 = .76. */
1741         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
1742         str     ip, [r0, #0x01]
1743         strh    r1, [r0, #0x05]
1744         RET
1745         LMEMCPY_8_PAD
1746
1747 /*
1748  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1749  */
1750         ldrb    r2, [r1]
1751         ldr     ip, [r1, #0x01]
1752         ldrh    r3, [r1, #0x05]
1753         ldrb    r1, [r1, #0x07]
1754         strb    r2, [r0]
1755         str     ip, [r0, #0x01]
1756         strh    r3, [r0, #0x05]
1757         strb    r1, [r0, #0x07]
1758         RET
1759         LMEMCPY_8_PAD
1760
1761 /******************************************************************************
1762  * Special case for 12 byte copies
1763  */
1764 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
1765 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
1766         LMEMCPY_C_PAD
1767 .Lmemcpy_c:
1768         and     r2, r1, #0x03
1769         orr     r2, r2, r0, lsl #2
1770         ands    r2, r2, #0x0f
1771         sub     r3, pc, #0x14
1772         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
1773
1774 /*
1775  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1776  */
1777         ldr     r2, [r1]
1778         ldr     r3, [r1, #0x04]
1779         ldr     r1, [r1, #0x08]
1780         str     r2, [r0]
1781         str     r3, [r0, #0x04]
1782         str     r1, [r0, #0x08]
1783         RET
1784         LMEMCPY_C_PAD
1785
1786 /*
1787  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1788  */
1789         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
1790         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1791         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1792         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1793         mov     r2, r2, lsl #24         /* r2 = B... */
1794         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
1795         str     r2, [r0, #0x08]
1796         mov     r2, ip, lsl #24         /* r2 = 7... */
1797         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
1798         mov     r1, r1, lsr #8          /* r1 = .210 */
1799         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
1800         str     r2, [r0, #0x04]
1801         str     r1, [r0]
1802         RET
1803         LMEMCPY_C_PAD
1804
1805 /*
1806  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1807  */
1808         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1809         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1810         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1811         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1812         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1813         str     r2, [r0]
1814         mov     r3, r3, lsr #16         /* r3 = ..54 */
1815         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
1816         mov     r1, r1, lsl #16         /* r1 = BA.. */
1817         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
1818         str     r3, [r0, #0x04]
1819         str     r1, [r0, #0x08]
1820         RET
1821         LMEMCPY_C_PAD
1822
1823 /*
1824  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1825  */
1826         ldrb    r2, [r1]                /* r2 = ...0 */
1827         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1828         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1829         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1830         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1831         str     r2, [r0]
1832         mov     r3, r3, lsr #24         /* r3 = ...4 */
1833         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
1834         mov     r1, r1, lsl #8          /* r1 = BA9. */
1835         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
1836         str     r3, [r0, #0x04]
1837         str     r1, [r0, #0x08]
1838         RET
1839         LMEMCPY_C_PAD
1840
1841 /*
1842  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1843  */
1844         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1845         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1846         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
1847         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1848         strh    r1, [r0, #0x01]
1849         strb    r2, [r0]
1850         mov     r1, r2, lsr #24         /* r1 = ...3 */
1851         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
1852         mov     r1, r3, lsr #24         /* r1 = ...7 */
1853         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
1854         mov     ip, ip, lsr #24         /* ip = ...B */
1855         str     r2, [r0, #0x03]
1856         str     r1, [r0, #0x07]
1857         strb    ip, [r0, #0x0b]
1858         RET
1859         LMEMCPY_C_PAD
1860
1861 /*
1862  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1863  */
1864         ldrb    r2, [r1]
1865         ldrh    r3, [r1, #0x01]
1866         ldr     ip, [r1, #0x03]
1867         strb    r2, [r0]
1868         ldr     r2, [r1, #0x07]
1869         ldrb    r1, [r1, #0x0b]
1870         strh    r3, [r0, #0x01]
1871         str     ip, [r0, #0x03]
1872         str     r2, [r0, #0x07]
1873         strb    r1, [r0, #0x0b]
1874         RET
1875         LMEMCPY_C_PAD
1876
1877 /*
1878  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1879  */
1880         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1881         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1882         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1883         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1884         strb    r2, [r0]
1885         mov     r2, r2, lsr #8          /* r2 = ...1 */
1886         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1887         strh    r2, [r0, #0x01]
1888         mov     r2, r3, lsr #8          /* r2 = .543 */
1889         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
1890         mov     r2, ip, lsr #8          /* r2 = .987 */
1891         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
1892         mov     r1, r1, lsr #8          /* r1 = ...B */
1893         str     r3, [r0, #0x03]
1894         str     r2, [r0, #0x07]
1895         strb    r1, [r0, #0x0b]
1896         RET
1897         LMEMCPY_C_PAD
1898
1899 /*
1900  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1901  */
1902         ldrb    r2, [r1]
1903         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1904         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1905         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1906         strb    r2, [r0]
1907         strh    r3, [r0, #0x01]
1908         mov     r3, r3, lsr #16         /* r3 = ..43 */
1909         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
1910         mov     ip, ip, lsr #16         /* ip = ..87 */
1911         orr     ip, ip, r1, lsl #16     /* ip = A987 */
1912         mov     r1, r1, lsr #16         /* r1 = ..xB */
1913         str     r3, [r0, #0x03]
1914         str     ip, [r0, #0x07]
1915         strb    r1, [r0, #0x0b]
1916         RET
1917         LMEMCPY_C_PAD
1918
1919 /*
1920  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1921  */
1922         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
1923         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1924         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
1925         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1926         strh    ip, [r0]
1927         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
1928         mov     r3, r3, lsr #16         /* r3 = ..76 */
1929         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
1930         mov     r2, r2, lsr #16         /* r2 = ..BA */
1931         str     r1, [r0, #0x02]
1932         str     r3, [r0, #0x06]
1933         strh    r2, [r0, #0x0a]
1934         RET
1935         LMEMCPY_C_PAD
1936
1937 /*
1938  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1939  */
1940         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1941         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1942         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
1943         strh    ip, [r0]
1944         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1945         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
1946         mov     r2, r2, lsr #24         /* r2 = ...2 */
1947         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
1948         mov     r3, r3, lsr #24         /* r3 = ...6 */
1949         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
1950         mov     r1, r1, lsl #8          /* r1 = ..B. */
1951         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
1952         str     r2, [r0, #0x02]
1953         str     r3, [r0, #0x06]
1954         strh    r1, [r0, #0x0a]
1955         RET
1956         LMEMCPY_C_PAD
1957
1958 /*
1959  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1960  */
1961         ldrh    r2, [r1]
1962         ldr     r3, [r1, #0x02]
1963         ldr     ip, [r1, #0x06]
1964         ldrh    r1, [r1, #0x0a]
1965         strh    r2, [r0]
1966         str     r3, [r0, #0x02]
1967         str     ip, [r0, #0x06]
1968         strh    r1, [r0, #0x0a]
1969         RET
1970         LMEMCPY_C_PAD
1971
1972 /*
1973  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1974  */
1975         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
1976         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
1977         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
1978         strh    ip, [r0, #0x0a]
1979         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1980         ldrb    r1, [r1]                /* r1 = ...0 */
1981         mov     r2, r2, lsl #24         /* r2 = 9... */
1982         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
1983         mov     r3, r3, lsl #24         /* r3 = 5... */
1984         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
1985         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
1986         str     r2, [r0, #0x06]
1987         str     r3, [r0, #0x02]
1988         strh    r1, [r0]
1989         RET
1990         LMEMCPY_C_PAD
1991
1992 /*
1993  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1994  */
1995         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1996         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
1997         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
1998         strb    r2, [r0]
1999         mov     r3, r2, lsr #8          /* r3 = .321 */
2000         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
2001         str     r3, [r0, #0x01]
2002         mov     r3, ip, lsr #8          /* r3 = .765 */
2003         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
2004         str     r3, [r0, #0x05]
2005         mov     r1, r1, lsr #8          /* r1 = .BA9 */
2006         strh    r1, [r0, #0x09]
2007         mov     r1, r1, lsr #16         /* r1 = ...B */
2008         strb    r1, [r0, #0x0b]
2009         RET
2010         LMEMCPY_C_PAD
2011
2012 /*
2013  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2014  */
2015         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
2016         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
2017         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
2018         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
2019         strb    r2, [r0, #0x0b]
2020         mov     r2, r3, lsr #16         /* r2 = ..A9 */
2021         strh    r2, [r0, #0x09]
2022         mov     r3, r3, lsl #16         /* r3 = 87.. */
2023         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
2024         mov     ip, ip, lsl #16         /* ip = 43.. */
2025         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
2026         mov     r1, r1, lsr #8          /* r1 = .210 */
2027         str     r3, [r0, #0x05]
2028         str     ip, [r0, #0x01]
2029         strb    r1, [r0]
2030         RET
2031         LMEMCPY_C_PAD
2032
2033 /*
2034  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2035  */
2036         ldrh    r2, [r1]                /* r2 = ..10 */
2037         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
2038         ldr     ip, [r1, #0x06]         /* ip = 9876 */
2039         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
2040         strb    r2, [r0]
2041         mov     r2, r2, lsr #8          /* r2 = ...1 */
2042         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
2043         mov     r3, r3, lsr #24         /* r3 = ...5 */
2044         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
2045         mov     ip, ip, lsr #24         /* ip = ...9 */
2046         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
2047         mov     r1, r1, lsr #8          /* r1 = ...B */
2048         str     r2, [r0, #0x01]
2049         str     r3, [r0, #0x05]
2050         strh    ip, [r0, #0x09]
2051         strb    r1, [r0, #0x0b]
2052         RET
2053         LMEMCPY_C_PAD
2054
2055 /*
2056  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2057  */
2058         ldrb    r2, [r1]
2059         ldr     r3, [r1, #0x01]
2060         ldr     ip, [r1, #0x05]
2061         strb    r2, [r0]
2062         ldrh    r2, [r1, #0x09]
2063         ldrb    r1, [r1, #0x0b]
2064         str     r3, [r0, #0x01]
2065         str     ip, [r0, #0x05]
2066         strh    r2, [r0, #0x09]
2067         strb    r1, [r0, #0x0b]
2068         RET
2069 END(memcpy)