sys/arm/arm/support.S

   1 /*-
   2  * Copyright (c) 2004 Olivier Houchard
   3  * All rights reserved.
   4  *
   5  * Redistribution and use in source and binary forms, with or without
   6  * modification, are permitted provided that the following conditions
   7  * are met:
   8  * 1. Redistributions of source code must retain the above copyright
   9  *    notice, this list of conditions and the following disclaimer.
  10  * 2. Redistributions in binary form must reproduce the above copyright
  11  *    notice, this list of conditions and the following disclaimer in the
  12  *    documentation and/or other materials provided with the distribution.
  13  *
  14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
  18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  24  * SUCH DAMAGE.
  25  */
  26 /*
  27  * Copyright 2003 Wasabi Systems, Inc.
  28  * All rights reserved.
  29  *
  30  * Written by Steve C. Woodford for Wasabi Systems, Inc.
  31  *
  32  * Redistribution and use in source and binary forms, with or without
  33  * modification, are permitted provided that the following conditions
  34  * are met:
  35  * 1. Redistributions of source code must retain the above copyright
  36  *    notice, this list of conditions and the following disclaimer.
  37  * 2. Redistributions in binary form must reproduce the above copyright
  38  *    notice, this list of conditions and the following disclaimer in the
  39  *    documentation and/or other materials provided with the distribution.
  40  * 3. All advertising materials mentioning features or use of this software
  41  *    must display the following acknowledgement:
  42  *      This product includes software developed for the NetBSD Project by
  43  *      Wasabi Systems, Inc.
  44  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  45  *    or promote products derived from this software without specific prior
  46  *    written permission.
  47  *
  48  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  49  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  50  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  51  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  52  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  53  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  54  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  55  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  56  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  57  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  58  * POSSIBILITY OF SUCH DAMAGE.
  59  */
  60 /*
  61  * Copyright (c) 1997 The NetBSD Foundation, Inc.
  62  * All rights reserved.
  63  *
  64  * This code is derived from software contributed to The NetBSD Foundation
  65  * by Neil A. Carson and Mark Brinicombe
  66  *
  67  * Redistribution and use in source and binary forms, with or without
  68  * modification, are permitted provided that the following conditions
  69  * are met:
  70  * 1. Redistributions of source code must retain the above copyright
  71  *    notice, this list of conditions and the following disclaimer.
  72  * 2. Redistributions in binary form must reproduce the above copyright
  73  *    notice, this list of conditions and the following disclaimer in the
  74  *    documentation and/or other materials provided with the distribution.
  75  *
  76  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
  77  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  78  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  79  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
  80  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  81  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  82  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  83  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  84  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  85  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  86  * POSSIBILITY OF SUCH DAMAGE.
  87  */
  88
  89 #include <machine/asm.h>
  90 __FBSDID("$FreeBSD$");
  91
  92 #include "assym.inc"
  93
  94         .syntax unified
  95
  96 /*
  97  * memset: Sets a block of memory to the specified value
  98  *
  99  * On entry:
 100  *   r0 - dest address
 101  *   r1 - byte to write
 102  *   r2 - number of bytes to write
 103  *
 104  * On exit:
 105  *   r0 - dest address
 106  */
 107 /* LINTSTUB: Func: void bzero(void *, size_t) */
 108 ENTRY(bzero)
 109         mov     r3, #0x00
 110         b       do_memset
 111 END(bzero)
 112 /* LINTSTUB: Func: void *memset(void *, int, size_t) */
 113 ENTRY(memset)
 114         and     r3, r1, #0xff           /* We deal with bytes */
 115         mov     r1, r2
 116 do_memset:
 117         cmp     r1, #0x04               /* Do we have less than 4 bytes */
 118         mov     ip, r0
 119         blt     .Lmemset_lessthanfour
 120
 121         /* Ok first we will word align the address */
 122         ands    r2, ip, #0x03           /* Get the bottom two bits */
 123         bne     .Lmemset_wordunaligned  /* The address is not word aligned */
 124
 125         /* We are now word aligned */
 126 .Lmemset_wordaligned:
 127         orr     r3, r3, r3, lsl #8      /* Extend value to 16-bits */
 128         tst     ip, #0x04               /* Quad-align for armv5e */
 129         orr     r3, r3, r3, lsl #16     /* Extend value to 32-bits */
 130         subne   r1, r1, #0x04           /* Quad-align if necessary */
 131         strne   r3, [ip], #0x04
 132         cmp     r1, #0x10
 133         blt     .Lmemset_loop4          /* If less than 16 then use words */
 134         mov     r2, r3                  /* Duplicate data */
 135         cmp     r1, #0x80               /* If < 128 then skip the big loop */
 136         blt     .Lmemset_loop32
 137
 138         /* Do 128 bytes at a time */
 139 .Lmemset_loop128:
 140         subs    r1, r1, #0x80
 141         strdge  r2, [ip], #0x08
 142         strdge  r2, [ip], #0x08
 143         strdge  r2, [ip], #0x08
 144         strdge  r2, [ip], #0x08
 145         strdge  r2, [ip], #0x08
 146         strdge  r2, [ip], #0x08
 147         strdge  r2, [ip], #0x08
 148         strdge  r2, [ip], #0x08
 149         strdge  r2, [ip], #0x08
 150         strdge  r2, [ip], #0x08
 151         strdge  r2, [ip], #0x08
 152         strdge  r2, [ip], #0x08
 153         strdge  r2, [ip], #0x08
 154         strdge  r2, [ip], #0x08
 155         strdge  r2, [ip], #0x08
 156         strdge  r2, [ip], #0x08
 157         bgt     .Lmemset_loop128
 158         RETeq                   /* Zero length so just exit */
 159
 160         add     r1, r1, #0x80           /* Adjust for extra sub */
 161
 162         /* Do 32 bytes at a time */
 163 .Lmemset_loop32:
 164         subs    r1, r1, #0x20
 165         strdge  r2, [ip], #0x08
 166         strdge  r2, [ip], #0x08
 167         strdge  r2, [ip], #0x08
 168         strdge  r2, [ip], #0x08
 169         bgt     .Lmemset_loop32
 170         RETeq                   /* Zero length so just exit */
 171
 172         adds    r1, r1, #0x10           /* Partially adjust for extra sub */
 173
 174         /* Deal with 16 bytes or more */
 175         strdge  r2, [ip], #0x08
 176         strdge  r2, [ip], #0x08
 177         RETeq                   /* Zero length so just exit */
 178
 179         addlt   r1, r1, #0x10           /* Possibly adjust for extra sub */
 180
 181         /* We have at least 4 bytes so copy as words */
 182 .Lmemset_loop4:
 183         subs    r1, r1, #0x04
 184         strge   r3, [ip], #0x04
 185         bgt     .Lmemset_loop4
 186         RETeq                   /* Zero length so just exit */
 187
 188         /* Compensate for 64-bit alignment check */
 189         adds    r1, r1, #0x04
 190         RETeq
 191         cmp     r1, #2
 192
 193         strb    r3, [ip], #0x01         /* Set 1 byte */
 194         strbge  r3, [ip], #0x01         /* Set another byte */
 195         strbgt  r3, [ip]                /* and a third */
 196         RET                     /* Exit */
 197
 198 .Lmemset_wordunaligned:
 199         rsb     r2, r2, #0x004
 200         strb    r3, [ip], #0x01         /* Set 1 byte */
 201         cmp     r2, #0x02
 202         strbge  r3, [ip], #0x01         /* Set another byte */
 203         sub     r1, r1, r2
 204         strbgt  r3, [ip], #0x01         /* and a third */
 205         cmp     r1, #0x04               /* More than 4 bytes left? */
 206         bge     .Lmemset_wordaligned    /* Yup */
 207
 208 .Lmemset_lessthanfour:
 209         cmp     r1, #0x00
 210         RETeq                   /* Zero length so exit */
 211         strb    r3, [ip], #0x01         /* Set 1 byte */
 212         cmp     r1, #0x02
 213         strbge  r3, [ip], #0x01         /* Set another byte */
 214         strbgt  r3, [ip]                /* and a third */
 215         RET                     /* Exit */
 216 EEND(memset)
 217 END(bzero)
 218
 219 ENTRY(bcmp)
 220         mov     ip, r0
 221         cmp     r2, #0x06
 222         beq     .Lmemcmp_6bytes
 223         mov     r0, #0x00
 224
 225         /* Are both addresses aligned the same way? */
 226         cmp     r2, #0x00
 227         eorsne  r3, ip, r1
 228         RETeq                   /* len == 0, or same addresses! */
 229         tst     r3, #0x03
 230         subne   r2, r2, #0x01
 231         bne     .Lmemcmp_bytewise2      /* Badly aligned. Do it the slow way */
 232
 233         /* Word-align the addresses, if necessary */
 234         sub     r3, r1, #0x05
 235         ands    r3, r3, #0x03
 236         add     r3, r3, r3, lsl #1
 237         addne   pc, pc, r3, lsl #3
 238         nop
 239
 240         /* Compare up to 3 bytes */
 241         ldrb    r0, [ip], #0x01
 242         ldrb    r3, [r1], #0x01
 243         subs    r0, r0, r3
 244         RETne
 245         subs    r2, r2, #0x01
 246         RETeq
 247
 248         /* Compare up to 2 bytes */
 249         ldrb    r0, [ip], #0x01
 250         ldrb    r3, [r1], #0x01
 251         subs    r0, r0, r3
 252         RETne
 253         subs    r2, r2, #0x01
 254         RETeq
 255
 256         /* Compare 1 byte */
 257         ldrb    r0, [ip], #0x01
 258         ldrb    r3, [r1], #0x01
 259         subs    r0, r0, r3
 260         RETne
 261         subs    r2, r2, #0x01
 262         RETeq
 263
 264         /* Compare 4 bytes at a time, if possible */
 265         subs    r2, r2, #0x04
 266         bcc     .Lmemcmp_bytewise
 267 .Lmemcmp_word_aligned:
 268         ldr     r0, [ip], #0x04
 269         ldr     r3, [r1], #0x04
 270         subs    r2, r2, #0x04
 271         cmpcs   r0, r3
 272         beq     .Lmemcmp_word_aligned
 273         sub     r0, r0, r3
 274
 275         /* Correct for extra subtraction, and check if done */
 276         adds    r2, r2, #0x04
 277         cmpeq   r0, #0x00               /* If done, did all bytes match? */
 278         RETeq                   /* Yup. Just return */
 279
 280         /* Re-do the final word byte-wise */
 281         sub     ip, ip, #0x04
 282         sub     r1, r1, #0x04
 283
 284 .Lmemcmp_bytewise:
 285         add     r2, r2, #0x03
 286 .Lmemcmp_bytewise2:
 287         ldrb    r0, [ip], #0x01
 288         ldrb    r3, [r1], #0x01
 289         subs    r2, r2, #0x01
 290         cmpcs   r0, r3
 291         beq     .Lmemcmp_bytewise2
 292         sub     r0, r0, r3
 293         RET
 294
 295         /*
 296          * 6 byte compares are very common, thanks to the network stack.
 297          * This code is hand-scheduled to reduce the number of stalls for
 298          * load results. Everything else being equal, this will be ~32%
 299          * faster than a byte-wise memcmp.
 300          */
 301         .align  5
 302 .Lmemcmp_6bytes:
 303         ldrb    r3, [r1, #0x00]         /* r3 = b2#0 */
 304         ldrb    r0, [ip, #0x00]         /* r0 = b1#0 */
 305         ldrb    r2, [r1, #0x01]         /* r2 = b2#1 */
 306         subs    r0, r0, r3              /* r0 = b1#0 - b2#0 */
 307         ldrbeq  r3, [ip, #0x01]         /* r3 = b1#1 */
 308         RETne                   /* Return if mismatch on #0 */
 309         subs    r0, r3, r2              /* r0 = b1#1 - b2#1 */
 310         ldrbeq  r3, [r1, #0x02]         /* r3 = b2#2 */
 311         ldrbeq  r0, [ip, #0x02]         /* r0 = b1#2 */
 312         RETne                   /* Return if mismatch on #1 */
 313         ldrb    r2, [r1, #0x03]         /* r2 = b2#3 */
 314         subs    r0, r0, r3              /* r0 = b1#2 - b2#2 */
 315         ldrbeq  r3, [ip, #0x03]         /* r3 = b1#3 */
 316         RETne                   /* Return if mismatch on #2 */
 317         subs    r0, r3, r2              /* r0 = b1#3 - b2#3 */
 318         ldrbeq  r3, [r1, #0x04]         /* r3 = b2#4 */
 319         ldrbeq  r0, [ip, #0x04]         /* r0 = b1#4 */
 320         RETne                   /* Return if mismatch on #3 */
 321         ldrb    r2, [r1, #0x05]         /* r2 = b2#5 */
 322         subs    r0, r0, r3              /* r0 = b1#4 - b2#4 */
 323         ldrbeq  r3, [ip, #0x05]         /* r3 = b1#5 */
 324         RETne                   /* Return if mismatch on #4 */
 325         sub     r0, r3, r2              /* r0 = b1#5 - b2#5 */
 326         RET
 327 END(bcmp)
 328
 329 ENTRY(bcopy)
 330         /* switch the source and destination registers */
 331         eor     r0, r1, r0
 332         eor     r1, r0, r1
 333         eor     r0, r1, r0
 334 EENTRY(memmove)
 335         /* Do the buffers overlap? */
 336         cmp     r0, r1
 337         RETeq           /* Bail now if src/dst are the same */
 338         subcc   r3, r0, r1      /* if (dst > src) r3 = dst - src */
 339         subcs   r3, r1, r0      /* if (src > dsr) r3 = src - dst */
 340         cmp     r3, r2          /* if (r3 < len) we have an overlap */
 341         bcc     PIC_SYM(_C_LABEL(memcpy), PLT)
 342
 343         /* Determine copy direction */
 344         cmp     r1, r0
 345         bcc     .Lmemmove_backwards
 346
 347         moveq   r0, #0                  /* Quick abort for len=0 */
 348         RETeq
 349
 350         stmdb   sp!, {r0, lr}           /* memmove() returns dest addr */
 351         subs    r2, r2, #4
 352         blt     .Lmemmove_fl4           /* less than 4 bytes */
 353         ands    r12, r0, #3
 354         bne     .Lmemmove_fdestul       /* oh unaligned destination addr */
 355         ands    r12, r1, #3
 356         bne     .Lmemmove_fsrcul                /* oh unaligned source addr */
 357
 358 .Lmemmove_ft8:
 359         /* We have aligned source and destination */
 360         subs    r2, r2, #8
 361         blt     .Lmemmove_fl12          /* less than 12 bytes (4 from above) */
 362         subs    r2, r2, #0x14
 363         blt     .Lmemmove_fl32          /* less than 32 bytes (12 from above) */
 364         stmdb   sp!, {r4}               /* borrow r4 */
 365
 366         /* blat 32 bytes at a time */
 367         /* XXX for really big copies perhaps we should use more registers */
 368 .Lmemmove_floop32:
 369         ldmia   r1!, {r3, r4, r12, lr}
 370         stmia   r0!, {r3, r4, r12, lr}
 371         ldmia   r1!, {r3, r4, r12, lr}
 372         stmia   r0!, {r3, r4, r12, lr}
 373         subs    r2, r2, #0x20
 374         bge     .Lmemmove_floop32
 375
 376         cmn     r2, #0x10
 377         ldmiage r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 378         stmiage r0!, {r3, r4, r12, lr}
 379         subge   r2, r2, #0x10
 380         ldmia   sp!, {r4}               /* return r4 */
 381
 382 .Lmemmove_fl32:
 383         adds    r2, r2, #0x14
 384
 385         /* blat 12 bytes at a time */
 386 .Lmemmove_floop12:
 387         ldmiage r1!, {r3, r12, lr}
 388         stmiage r0!, {r3, r12, lr}
 389         subsge  r2, r2, #0x0c
 390         bge     .Lmemmove_floop12
 391
 392 .Lmemmove_fl12:
 393         adds    r2, r2, #8
 394         blt     .Lmemmove_fl4
 395
 396         subs    r2, r2, #4
 397         ldrlt   r3, [r1], #4
 398         strlt   r3, [r0], #4
 399         ldmiage r1!, {r3, r12}
 400         stmiage r0!, {r3, r12}
 401         subge   r2, r2, #4
 402
 403 .Lmemmove_fl4:
 404         /* less than 4 bytes to go */
 405         adds    r2, r2, #4
 406         ldmiaeq sp!, {r0, pc}           /* done */
 407
 408         /* copy the crud byte at a time */
 409         cmp     r2, #2
 410         ldrb    r3, [r1], #1
 411         strb    r3, [r0], #1
 412         ldrbge  r3, [r1], #1
 413         strbge  r3, [r0], #1
 414         ldrbgt  r3, [r1], #1
 415         strbgt  r3, [r0], #1
 416         ldmia   sp!, {r0, pc}
 417
 418         /* erg - unaligned destination */
 419 .Lmemmove_fdestul:
 420         rsb     r12, r12, #4
 421         cmp     r12, #2
 422
 423         /* align destination with byte copies */
 424         ldrb    r3, [r1], #1
 425         strb    r3, [r0], #1
 426         ldrbge  r3, [r1], #1
 427         strbge  r3, [r0], #1
 428         ldrbgt  r3, [r1], #1
 429         strbgt  r3, [r0], #1
 430         subs    r2, r2, r12
 431         blt     .Lmemmove_fl4           /* less the 4 bytes */
 432
 433         ands    r12, r1, #3
 434         beq     .Lmemmove_ft8           /* we have an aligned source */
 435
 436         /* erg - unaligned source */
 437         /* This is where it gets nasty ... */
 438 .Lmemmove_fsrcul:
 439         bic     r1, r1, #3
 440         ldr     lr, [r1], #4
 441         cmp     r12, #2
 442         bgt     .Lmemmove_fsrcul3
 443         beq     .Lmemmove_fsrcul2
 444         cmp     r2, #0x0c
 445         blt     .Lmemmove_fsrcul1loop4
 446         sub     r2, r2, #0x0c
 447         stmdb   sp!, {r4, r5}
 448
 449 .Lmemmove_fsrcul1loop16:
 450         mov     r3, lr, lsr #8
 451         ldmia   r1!, {r4, r5, r12, lr}
 452         orr     r3, r3, r4, lsl #24
 453         mov     r4, r4, lsr #8
 454         orr     r4, r4, r5, lsl #24
 455         mov     r5, r5, lsr #8
 456         orr     r5, r5, r12, lsl #24
 457         mov     r12, r12, lsr #8
 458         orr     r12, r12, lr, lsl #24
 459         stmia   r0!, {r3-r5, r12}
 460         subs    r2, r2, #0x10
 461         bge     .Lmemmove_fsrcul1loop16
 462         ldmia   sp!, {r4, r5}
 463         adds    r2, r2, #0x0c
 464         blt     .Lmemmove_fsrcul1l4
 465
 466 .Lmemmove_fsrcul1loop4:
 467         mov     r12, lr, lsr #8
 468         ldr     lr, [r1], #4
 469         orr     r12, r12, lr, lsl #24
 470         str     r12, [r0], #4
 471         subs    r2, r2, #4
 472         bge     .Lmemmove_fsrcul1loop4
 473
 474 .Lmemmove_fsrcul1l4:
 475         sub     r1, r1, #3
 476         b       .Lmemmove_fl4
 477
 478 .Lmemmove_fsrcul2:
 479         cmp     r2, #0x0c
 480         blt     .Lmemmove_fsrcul2loop4
 481         sub     r2, r2, #0x0c
 482         stmdb   sp!, {r4, r5}
 483
 484 .Lmemmove_fsrcul2loop16:
 485         mov     r3, lr, lsr #16
 486         ldmia   r1!, {r4, r5, r12, lr}
 487         orr     r3, r3, r4, lsl #16
 488         mov     r4, r4, lsr #16
 489         orr     r4, r4, r5, lsl #16
 490         mov     r5, r5, lsr #16
 491         orr     r5, r5, r12, lsl #16
 492         mov     r12, r12, lsr #16
 493         orr     r12, r12, lr, lsl #16
 494         stmia   r0!, {r3-r5, r12}
 495         subs    r2, r2, #0x10
 496         bge     .Lmemmove_fsrcul2loop16
 497         ldmia   sp!, {r4, r5}
 498         adds    r2, r2, #0x0c
 499         blt     .Lmemmove_fsrcul2l4
 500
 501 .Lmemmove_fsrcul2loop4:
 502         mov     r12, lr, lsr #16
 503         ldr     lr, [r1], #4
 504         orr     r12, r12, lr, lsl #16
 505         str     r12, [r0], #4
 506         subs    r2, r2, #4
 507         bge     .Lmemmove_fsrcul2loop4
 508
 509 .Lmemmove_fsrcul2l4:
 510         sub     r1, r1, #2
 511         b       .Lmemmove_fl4
 512
 513 .Lmemmove_fsrcul3:
 514         cmp     r2, #0x0c
 515         blt     .Lmemmove_fsrcul3loop4
 516         sub     r2, r2, #0x0c
 517         stmdb   sp!, {r4, r5}
 518
 519 .Lmemmove_fsrcul3loop16:
 520         mov     r3, lr, lsr #24
 521         ldmia   r1!, {r4, r5, r12, lr}
 522         orr     r3, r3, r4, lsl #8
 523         mov     r4, r4, lsr #24
 524         orr     r4, r4, r5, lsl #8
 525         mov     r5, r5, lsr #24
 526         orr     r5, r5, r12, lsl #8
 527         mov     r12, r12, lsr #24
 528         orr     r12, r12, lr, lsl #8
 529         stmia   r0!, {r3-r5, r12}
 530         subs    r2, r2, #0x10
 531         bge     .Lmemmove_fsrcul3loop16
 532         ldmia   sp!, {r4, r5}
 533         adds    r2, r2, #0x0c
 534         blt     .Lmemmove_fsrcul3l4
 535
 536 .Lmemmove_fsrcul3loop4:
 537         mov     r12, lr, lsr #24
 538         ldr     lr, [r1], #4
 539         orr     r12, r12, lr, lsl #8
 540         str     r12, [r0], #4
 541         subs    r2, r2, #4
 542         bge     .Lmemmove_fsrcul3loop4
 543
 544 .Lmemmove_fsrcul3l4:
 545         sub     r1, r1, #1
 546         b       .Lmemmove_fl4
 547
 548 .Lmemmove_backwards:
 549         add     r1, r1, r2
 550         add     r0, r0, r2
 551         subs    r2, r2, #4
 552         blt     .Lmemmove_bl4           /* less than 4 bytes */
 553         ands    r12, r0, #3
 554         bne     .Lmemmove_bdestul       /* oh unaligned destination addr */
 555         ands    r12, r1, #3
 556         bne     .Lmemmove_bsrcul                /* oh unaligned source addr */
 557
 558 .Lmemmove_bt8:
 559         /* We have aligned source and destination */
 560         subs    r2, r2, #8
 561         blt     .Lmemmove_bl12          /* less than 12 bytes (4 from above) */
 562         stmdb   sp!, {r4, lr}
 563         subs    r2, r2, #0x14           /* less than 32 bytes (12 from above) */
 564         blt     .Lmemmove_bl32
 565
 566         /* blat 32 bytes at a time */
 567         /* XXX for really big copies perhaps we should use more registers */
 568 .Lmemmove_bloop32:
 569         ldmdb   r1!, {r3, r4, r12, lr}
 570         stmdb   r0!, {r3, r4, r12, lr}
 571         ldmdb   r1!, {r3, r4, r12, lr}
 572         stmdb   r0!, {r3, r4, r12, lr}
 573         subs    r2, r2, #0x20
 574         bge     .Lmemmove_bloop32
 575
 576 .Lmemmove_bl32:
 577         cmn     r2, #0x10
 578         ldmdbge r1!, {r3, r4, r12, lr}  /* blat a remaining 16 bytes */
 579         stmdbge r0!, {r3, r4, r12, lr}
 580         subge   r2, r2, #0x10
 581         adds    r2, r2, #0x14
 582         ldmdbge r1!, {r3, r12, lr}      /* blat a remaining 12 bytes */
 583         stmdbge r0!, {r3, r12, lr}
 584         subge   r2, r2, #0x0c
 585         ldmia   sp!, {r4, lr}
 586
 587 .Lmemmove_bl12:
 588         adds    r2, r2, #8
 589         blt     .Lmemmove_bl4
 590         subs    r2, r2, #4
 591         ldrlt   r3, [r1, #-4]!
 592         strlt   r3, [r0, #-4]!
 593         ldmdbge r1!, {r3, r12}
 594         stmdbge r0!, {r3, r12}
 595         subge   r2, r2, #4
 596
 597 .Lmemmove_bl4:
 598         /* less than 4 bytes to go */
 599         adds    r2, r2, #4
 600         RETeq                   /* done */
 601
 602         /* copy the crud byte at a time */
 603         cmp     r2, #2
 604         ldrb    r3, [r1, #-1]!
 605         strb    r3, [r0, #-1]!
 606         ldrbge  r3, [r1, #-1]!
 607         strbge  r3, [r0, #-1]!
 608         ldrbgt  r3, [r1, #-1]!
 609         strbgt  r3, [r0, #-1]!
 610         RET
 611
 612         /* erg - unaligned destination */
 613 .Lmemmove_bdestul:
 614         cmp     r12, #2
 615
 616         /* align destination with byte copies */
 617         ldrb    r3, [r1, #-1]!
 618         strb    r3, [r0, #-1]!
 619         ldrbge  r3, [r1, #-1]!
 620         strbge  r3, [r0, #-1]!
 621         ldrbgt  r3, [r1, #-1]!
 622         strbgt  r3, [r0, #-1]!
 623         subs    r2, r2, r12
 624         blt     .Lmemmove_bl4           /* less than 4 bytes to go */
 625         ands    r12, r1, #3
 626         beq     .Lmemmove_bt8           /* we have an aligned source */
 627
 628         /* erg - unaligned source */
 629         /* This is where it gets nasty ... */
 630 .Lmemmove_bsrcul:
 631         bic     r1, r1, #3
 632         ldr     r3, [r1, #0]
 633         cmp     r12, #2
 634         blt     .Lmemmove_bsrcul1
 635         beq     .Lmemmove_bsrcul2
 636         cmp     r2, #0x0c
 637         blt     .Lmemmove_bsrcul3loop4
 638         sub     r2, r2, #0x0c
 639         stmdb   sp!, {r4, r5, lr}
 640
 641 .Lmemmove_bsrcul3loop16:
 642         mov     lr, r3, lsl #8
 643         ldmdb   r1!, {r3-r5, r12}
 644         orr     lr, lr, r12, lsr #24
 645         mov     r12, r12, lsl #8
 646         orr     r12, r12, r5, lsr #24
 647         mov     r5, r5, lsl #8
 648         orr     r5, r5, r4, lsr #24
 649         mov     r4, r4, lsl #8
 650         orr     r4, r4, r3, lsr #24
 651         stmdb   r0!, {r4, r5, r12, lr}
 652         subs    r2, r2, #0x10
 653         bge     .Lmemmove_bsrcul3loop16
 654         ldmia   sp!, {r4, r5, lr}
 655         adds    r2, r2, #0x0c
 656         blt     .Lmemmove_bsrcul3l4
 657
 658 .Lmemmove_bsrcul3loop4:
 659         mov     r12, r3, lsl #8
 660         ldr     r3, [r1, #-4]!
 661         orr     r12, r12, r3, lsr #24
 662         str     r12, [r0, #-4]!
 663         subs    r2, r2, #4
 664         bge     .Lmemmove_bsrcul3loop4
 665
 666 .Lmemmove_bsrcul3l4:
 667         add     r1, r1, #3
 668         b       .Lmemmove_bl4
 669
 670 .Lmemmove_bsrcul2:
 671         cmp     r2, #0x0c
 672         blt     .Lmemmove_bsrcul2loop4
 673         sub     r2, r2, #0x0c
 674         stmdb   sp!, {r4, r5, lr}
 675
 676 .Lmemmove_bsrcul2loop16:
 677         mov     lr, r3, lsl #16
 678         ldmdb   r1!, {r3-r5, r12}
 679         orr     lr, lr, r12, lsr #16
 680         mov     r12, r12, lsl #16
 681         orr     r12, r12, r5, lsr #16
 682         mov     r5, r5, lsl #16
 683         orr     r5, r5, r4, lsr #16
 684         mov     r4, r4, lsl #16
 685         orr     r4, r4, r3, lsr #16
 686         stmdb   r0!, {r4, r5, r12, lr}
 687         subs    r2, r2, #0x10
 688         bge     .Lmemmove_bsrcul2loop16
 689         ldmia   sp!, {r4, r5, lr}
 690         adds    r2, r2, #0x0c
 691         blt     .Lmemmove_bsrcul2l4
 692
 693 .Lmemmove_bsrcul2loop4:
 694         mov     r12, r3, lsl #16
 695         ldr     r3, [r1, #-4]!
 696         orr     r12, r12, r3, lsr #16
 697         str     r12, [r0, #-4]!
 698         subs    r2, r2, #4
 699         bge     .Lmemmove_bsrcul2loop4
 700
 701 .Lmemmove_bsrcul2l4:
 702         add     r1, r1, #2
 703         b       .Lmemmove_bl4
 704
 705 .Lmemmove_bsrcul1:
 706         cmp     r2, #0x0c
 707         blt     .Lmemmove_bsrcul1loop4
 708         sub     r2, r2, #0x0c
 709         stmdb   sp!, {r4, r5, lr}
 710
 711 .Lmemmove_bsrcul1loop32:
 712         mov     lr, r3, lsl #24
 713         ldmdb   r1!, {r3-r5, r12}
 714         orr     lr, lr, r12, lsr #8
 715         mov     r12, r12, lsl #24
 716         orr     r12, r12, r5, lsr #8
 717         mov     r5, r5, lsl #24
 718         orr     r5, r5, r4, lsr #8
 719         mov     r4, r4, lsl #24
 720         orr     r4, r4, r3, lsr #8
 721         stmdb   r0!, {r4, r5, r12, lr}
 722         subs    r2, r2, #0x10
 723         bge     .Lmemmove_bsrcul1loop32
 724         ldmia   sp!, {r4, r5, lr}
 725         adds    r2, r2, #0x0c
 726         blt     .Lmemmove_bsrcul1l4
 727
 728 .Lmemmove_bsrcul1loop4:
 729         mov     r12, r3, lsl #24
 730         ldr     r3, [r1, #-4]!
 731         orr     r12, r12, r3, lsr #8
 732         str     r12, [r0, #-4]!
 733         subs    r2, r2, #4
 734         bge     .Lmemmove_bsrcul1loop4
 735
 736 .Lmemmove_bsrcul1l4:
 737         add     r1, r1, #1
 738         b       .Lmemmove_bl4
 739 EEND(memmove)
 740 END(bcopy)
 741
 742 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
 743 ENTRY(memcpy)
 744         pld     [r1]
 745         cmp     r2, #0x0c
 746         ble     .Lmemcpy_short          /* <= 12 bytes */
 747 #ifdef FLASHADDR
 748 #if FLASHADDR > PHYSADDR
 749         ldr     r3, =FLASHADDR
 750         cmp     r3, pc
 751         bls     .Lnormal
 752 #else
 753         ldr     r3, =FLASHADDR
 754         cmp     r3, pc
 755         bhi     .Lnormal
 756 #endif
 757 #endif
 758         mov     r3, r0                  /* We must not clobber r0 */
 759
 760         /* Word-align the destination buffer */
 761         ands    ip, r3, #0x03           /* Already word aligned? */
 762         beq     .Lmemcpy_wordaligned    /* Yup */
 763         cmp     ip, #0x02
 764         ldrb    ip, [r1], #0x01
 765         sub     r2, r2, #0x01
 766         strb    ip, [r3], #0x01
 767         ldrble  ip, [r1], #0x01
 768         suble   r2, r2, #0x01
 769         strble  ip, [r3], #0x01
 770         ldrblt  ip, [r1], #0x01
 771         sublt   r2, r2, #0x01
 772         strblt  ip, [r3], #0x01
 773
 774         /* Destination buffer is now word aligned */
 775 .Lmemcpy_wordaligned:
 776         ands    ip, r1, #0x03           /* Is src also word-aligned? */
 777         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
 778
 779         /* Quad-align the destination buffer */
 780         tst     r3, #0x07               /* Already quad aligned? */
 781         ldrne   ip, [r1], #0x04
 782         stmfd   sp!, {r4-r9}            /* Free up some registers */
 783         subne   r2, r2, #0x04
 784         strne   ip, [r3], #0x04
 785
 786         /* Destination buffer quad aligned, source is at least word aligned */
 787         subs    r2, r2, #0x80
 788         blt     .Lmemcpy_w_lessthan128
 789
 790         /* Copy 128 bytes at a time */
 791 .Lmemcpy_w_loop128:
 792         ldr     r4, [r1], #0x04         /* LD:00-03 */
 793         ldr     r5, [r1], #0x04         /* LD:04-07 */
 794         pld     [r1, #0x18]             /* Prefetch 0x20 */
 795         ldr     r6, [r1], #0x04         /* LD:08-0b */
 796         ldr     r7, [r1], #0x04         /* LD:0c-0f */
 797         ldr     r8, [r1], #0x04         /* LD:10-13 */
 798         ldr     r9, [r1], #0x04         /* LD:14-17 */
 799         strd    r4, [r3], #0x08         /* ST:00-07 */
 800         ldr     r4, [r1], #0x04         /* LD:18-1b */
 801         ldr     r5, [r1], #0x04         /* LD:1c-1f */
 802         strd    r6, [r3], #0x08         /* ST:08-0f */
 803         ldr     r6, [r1], #0x04         /* LD:20-23 */
 804         ldr     r7, [r1], #0x04         /* LD:24-27 */
 805         pld     [r1, #0x18]             /* Prefetch 0x40 */
 806         strd    r8, [r3], #0x08         /* ST:10-17 */
 807         ldr     r8, [r1], #0x04         /* LD:28-2b */
 808         ldr     r9, [r1], #0x04         /* LD:2c-2f */
 809         strd    r4, [r3], #0x08         /* ST:18-1f */
 810         ldr     r4, [r1], #0x04         /* LD:30-33 */
 811         ldr     r5, [r1], #0x04         /* LD:34-37 */
 812         strd    r6, [r3], #0x08         /* ST:20-27 */
 813         ldr     r6, [r1], #0x04         /* LD:38-3b */
 814         ldr     r7, [r1], #0x04         /* LD:3c-3f */
 815         strd    r8, [r3], #0x08         /* ST:28-2f */
 816         ldr     r8, [r1], #0x04         /* LD:40-43 */
 817         ldr     r9, [r1], #0x04         /* LD:44-47 */
 818         pld     [r1, #0x18]             /* Prefetch 0x60 */
 819         strd    r4, [r3], #0x08         /* ST:30-37 */
 820         ldr     r4, [r1], #0x04         /* LD:48-4b */
 821         ldr     r5, [r1], #0x04         /* LD:4c-4f */
 822         strd    r6, [r3], #0x08         /* ST:38-3f */
 823         ldr     r6, [r1], #0x04         /* LD:50-53 */
 824         ldr     r7, [r1], #0x04         /* LD:54-57 */
 825         strd    r8, [r3], #0x08         /* ST:40-47 */
 826         ldr     r8, [r1], #0x04         /* LD:58-5b */
 827         ldr     r9, [r1], #0x04         /* LD:5c-5f */
 828         strd    r4, [r3], #0x08         /* ST:48-4f */
 829         ldr     r4, [r1], #0x04         /* LD:60-63 */
 830         ldr     r5, [r1], #0x04         /* LD:64-67 */
 831         pld     [r1, #0x18]             /* Prefetch 0x80 */
 832         strd    r6, [r3], #0x08         /* ST:50-57 */
 833         ldr     r6, [r1], #0x04         /* LD:68-6b */
 834         ldr     r7, [r1], #0x04         /* LD:6c-6f */
 835         strd    r8, [r3], #0x08         /* ST:58-5f */
 836         ldr     r8, [r1], #0x04         /* LD:70-73 */
 837         ldr     r9, [r1], #0x04         /* LD:74-77 */
 838         strd    r4, [r3], #0x08         /* ST:60-67 */
 839         ldr     r4, [r1], #0x04         /* LD:78-7b */
 840         ldr     r5, [r1], #0x04         /* LD:7c-7f */
 841         strd    r6, [r3], #0x08         /* ST:68-6f */
 842         strd    r8, [r3], #0x08         /* ST:70-77 */
 843         subs    r2, r2, #0x80
 844         strd    r4, [r3], #0x08         /* ST:78-7f */
 845         bge     .Lmemcpy_w_loop128
 846
 847 .Lmemcpy_w_lessthan128:
 848         adds    r2, r2, #0x80           /* Adjust for extra sub */
 849         ldmfdeq sp!, {r4-r9}
 850         RETeq                   /* Return now if done */
 851         subs    r2, r2, #0x20
 852         blt     .Lmemcpy_w_lessthan32
 853
 854         /* Copy 32 bytes at a time */
 855 .Lmemcpy_w_loop32:
 856         ldr     r4, [r1], #0x04
 857         ldr     r5, [r1], #0x04
 858         pld     [r1, #0x18]
 859         ldr     r6, [r1], #0x04
 860         ldr     r7, [r1], #0x04
 861         ldr     r8, [r1], #0x04
 862         ldr     r9, [r1], #0x04
 863         strd    r4, [r3], #0x08
 864         ldr     r4, [r1], #0x04
 865         ldr     r5, [r1], #0x04
 866         strd    r6, [r3], #0x08
 867         strd    r8, [r3], #0x08
 868         subs    r2, r2, #0x20
 869         strd    r4, [r3], #0x08
 870         bge     .Lmemcpy_w_loop32
 871
 872 .Lmemcpy_w_lessthan32:
 873         adds    r2, r2, #0x20           /* Adjust for extra sub */
 874         ldmfdeq sp!, {r4-r9}
 875         RETeq                   /* Return now if done */
 876
 877         and     r4, r2, #0x18
 878         rsbs    r4, r4, #0x18
 879         addne   pc, pc, r4, lsl #1
 880         nop
 881
 882         /* At least 24 bytes remaining */
 883         ldr     r4, [r1], #0x04
 884         ldr     r5, [r1], #0x04
 885         sub     r2, r2, #0x08
 886         strd    r4, [r3], #0x08
 887
 888         /* At least 16 bytes remaining */
 889         ldr     r4, [r1], #0x04
 890         ldr     r5, [r1], #0x04
 891         sub     r2, r2, #0x08
 892         strd    r4, [r3], #0x08
 893
 894         /* At least 8 bytes remaining */
 895         ldr     r4, [r1], #0x04
 896         ldr     r5, [r1], #0x04
 897         subs    r2, r2, #0x08
 898         strd    r4, [r3], #0x08
 899
 900         /* Less than 8 bytes remaining */
 901         ldmfd   sp!, {r4-r9}
 902         RETeq                   /* Return now if done */
 903         subs    r2, r2, #0x04
 904         ldrge   ip, [r1], #0x04
 905         strge   ip, [r3], #0x04
 906         RETeq                   /* Return now if done */
 907         addlt   r2, r2, #0x04
 908         ldrb    ip, [r1], #0x01
 909         cmp     r2, #0x02
 910         ldrbge  r2, [r1], #0x01
 911         strb    ip, [r3], #0x01
 912         ldrbgt  ip, [r1]
 913         strbge  r2, [r3], #0x01
 914         strbgt  ip, [r3]
 915         RET
 916 /* Place a literal pool here for the above ldr instructions to use */
 917 .ltorg
 918
 919
 920 /*
 921  * At this point, it has not been possible to word align both buffers.
 922  * The destination buffer is word aligned, but the source buffer is not.
 923  */
 924 .Lmemcpy_bad_align:
 925         stmfd   sp!, {r4-r7}
 926         bic     r1, r1, #0x03
 927         cmp     ip, #2
 928         ldr     ip, [r1], #0x04
 929         bgt     .Lmemcpy_bad3
 930         beq     .Lmemcpy_bad2
 931         b       .Lmemcpy_bad1
 932
 933 .Lmemcpy_bad1_loop16:
 934         mov     r4, ip, lsr #8
 935         ldr     r5, [r1], #0x04
 936         pld     [r1, #0x018]
 937         ldr     r6, [r1], #0x04
 938         ldr     r7, [r1], #0x04
 939         ldr     ip, [r1], #0x04
 940         orr     r4, r4, r5, lsl #24
 941         mov     r5, r5, lsr #8
 942         orr     r5, r5, r6, lsl #24
 943         mov     r6, r6, lsr #8
 944         orr     r6, r6, r7, lsl #24
 945         mov     r7, r7, lsr #8
 946         orr     r7, r7, ip, lsl #24
 947         str     r4, [r3], #0x04
 948         str     r5, [r3], #0x04
 949         str     r6, [r3], #0x04
 950         str     r7, [r3], #0x04
 951 .Lmemcpy_bad1:
 952         subs    r2, r2, #0x10
 953         bge     .Lmemcpy_bad1_loop16
 954
 955         adds    r2, r2, #0x10
 956         ldmfdeq sp!, {r4-r7}
 957         RETeq                   /* Return now if done */
 958         subs    r2, r2, #0x04
 959         sublt   r1, r1, #0x03
 960         blt     .Lmemcpy_bad_done
 961
 962 .Lmemcpy_bad1_loop4:
 963         mov     r4, ip, lsr #8
 964         ldr     ip, [r1], #0x04
 965         subs    r2, r2, #0x04
 966         orr     r4, r4, ip, lsl #24
 967         str     r4, [r3], #0x04
 968         bge     .Lmemcpy_bad1_loop4
 969         sub     r1, r1, #0x03
 970         b       .Lmemcpy_bad_done
 971
 972 .Lmemcpy_bad2_loop16:
 973         mov     r4, ip, lsr #16
 974         ldr     r5, [r1], #0x04
 975         pld     [r1, #0x018]
 976         ldr     r6, [r1], #0x04
 977         ldr     r7, [r1], #0x04
 978         ldr     ip, [r1], #0x04
 979         orr     r4, r4, r5, lsl #16
 980         mov     r5, r5, lsr #16
 981         orr     r5, r5, r6, lsl #16
 982         mov     r6, r6, lsr #16
 983         orr     r6, r6, r7, lsl #16
 984         mov     r7, r7, lsr #16
 985         orr     r7, r7, ip, lsl #16
 986         str     r4, [r3], #0x04
 987         str     r5, [r3], #0x04
 988         str     r6, [r3], #0x04
 989         str     r7, [r3], #0x04
 990 .Lmemcpy_bad2:
 991         subs    r2, r2, #0x10
 992         bge     .Lmemcpy_bad2_loop16
 993
 994         adds    r2, r2, #0x10
 995         ldmfdeq sp!, {r4-r7}
 996         RETeq                   /* Return now if done */
 997         subs    r2, r2, #0x04
 998         sublt   r1, r1, #0x02
 999         blt     .Lmemcpy_bad_done
1000
1001 .Lmemcpy_bad2_loop4:
1002         mov     r4, ip, lsr #16
1003         ldr     ip, [r1], #0x04
1004         subs    r2, r2, #0x04
1005         orr     r4, r4, ip, lsl #16
1006         str     r4, [r3], #0x04
1007         bge     .Lmemcpy_bad2_loop4
1008         sub     r1, r1, #0x02
1009         b       .Lmemcpy_bad_done
1010
1011 .Lmemcpy_bad3_loop16:
1012         mov     r4, ip, lsr #24
1013         ldr     r5, [r1], #0x04
1014         pld     [r1, #0x018]
1015         ldr     r6, [r1], #0x04
1016         ldr     r7, [r1], #0x04
1017         ldr     ip, [r1], #0x04
1018         orr     r4, r4, r5, lsl #8
1019         mov     r5, r5, lsr #24
1020         orr     r5, r5, r6, lsl #8
1021         mov     r6, r6, lsr #24
1022         orr     r6, r6, r7, lsl #8
1023         mov     r7, r7, lsr #24
1024         orr     r7, r7, ip, lsl #8
1025         str     r4, [r3], #0x04
1026         str     r5, [r3], #0x04
1027         str     r6, [r3], #0x04
1028         str     r7, [r3], #0x04
1029 .Lmemcpy_bad3:
1030         subs    r2, r2, #0x10
1031         bge     .Lmemcpy_bad3_loop16
1032
1033         adds    r2, r2, #0x10
1034         ldmfdeq sp!, {r4-r7}
1035         RETeq                   /* Return now if done */
1036         subs    r2, r2, #0x04
1037         sublt   r1, r1, #0x01
1038         blt     .Lmemcpy_bad_done
1039
1040 .Lmemcpy_bad3_loop4:
1041         mov     r4, ip, lsr #24
1042         ldr     ip, [r1], #0x04
1043         subs    r2, r2, #0x04
1044         orr     r4, r4, ip, lsl #8
1045         str     r4, [r3], #0x04
1046         bge     .Lmemcpy_bad3_loop4
1047         sub     r1, r1, #0x01
1048
1049 .Lmemcpy_bad_done:
1050         ldmfd   sp!, {r4-r7}
1051         adds    r2, r2, #0x04
1052         RETeq
1053         ldrb    ip, [r1], #0x01
1054         cmp     r2, #0x02
1055         ldrbge  r2, [r1], #0x01
1056         strb    ip, [r3], #0x01
1057         ldrbgt  ip, [r1]
1058         strbge  r2, [r3], #0x01
1059         strbgt  ip, [r3]
1060         RET
1061
1062
1063 /*
1064  * Handle short copies (less than 16 bytes), possibly misaligned.
1065  * Some of these are *very* common, thanks to the network stack,
1066  * and so are handled specially.
1067  */
1068 .Lmemcpy_short:
1069         add     pc, pc, r2, lsl #2
1070         nop
1071         RET                     /* 0x00 */
1072         b       .Lmemcpy_bytewise       /* 0x01 */
1073         b       .Lmemcpy_bytewise       /* 0x02 */
1074         b       .Lmemcpy_bytewise       /* 0x03 */
1075         b       .Lmemcpy_4              /* 0x04 */
1076         b       .Lmemcpy_bytewise       /* 0x05 */
1077         b       .Lmemcpy_6              /* 0x06 */
1078         b       .Lmemcpy_bytewise       /* 0x07 */
1079         b       .Lmemcpy_8              /* 0x08 */
1080         b       .Lmemcpy_bytewise       /* 0x09 */
1081         b       .Lmemcpy_bytewise       /* 0x0a */
1082         b       .Lmemcpy_bytewise       /* 0x0b */
1083         b       .Lmemcpy_c              /* 0x0c */
1084 .Lmemcpy_bytewise:
1085         mov     r3, r0                  /* We must not clobber r0 */
1086         ldrb    ip, [r1], #0x01
1087 1:      subs    r2, r2, #0x01
1088         strb    ip, [r3], #0x01
1089         ldrbne  ip, [r1], #0x01
1090         bne     1b
1091         RET
1092
1093 /******************************************************************************
1094  * Special case for 4 byte copies
1095  */
1096 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
1097 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
1098         LMEMCPY_4_PAD
1099 .Lmemcpy_4:
1100         and     r2, r1, #0x03
1101         orr     r2, r2, r0, lsl #2
1102         ands    r2, r2, #0x0f
1103         sub     r3, pc, #0x14
1104         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
1105
1106 /*
1107  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1108  */
1109         ldr     r2, [r1]
1110         str     r2, [r0]
1111         RET
1112         LMEMCPY_4_PAD
1113
1114 /*
1115  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1116  */
1117         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1118         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
1119         mov     r3, r3, lsr #8          /* r3 = .210 */
1120         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1121         str     r3, [r0]
1122         RET
1123         LMEMCPY_4_PAD
1124
1125 /*
1126  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1127  */
1128         ldrh    r3, [r1, #0x02]
1129         ldrh    r2, [r1]
1130         orr     r3, r2, r3, lsl #16
1131         str     r3, [r0]
1132         RET
1133         LMEMCPY_4_PAD
1134
1135 /*
1136  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1137  */
1138         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
1139         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
1140         mov     r3, r3, lsr #24         /* r3 = ...0 */
1141         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1142         str     r3, [r0]
1143         RET
1144         LMEMCPY_4_PAD
1145
1146 /*
1147  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1148  */
1149         ldr     r2, [r1]
1150         strb    r2, [r0]
1151         mov     r3, r2, lsr #8
1152         mov     r1, r2, lsr #24
1153         strb    r1, [r0, #0x03]
1154         strh    r3, [r0, #0x01]
1155         RET
1156         LMEMCPY_4_PAD
1157
1158 /*
1159  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1160  */
1161         ldrb    r2, [r1]
1162         ldrh    r3, [r1, #0x01]
1163         ldrb    r1, [r1, #0x03]
1164         strb    r2, [r0]
1165         strh    r3, [r0, #0x01]
1166         strb    r1, [r0, #0x03]
1167         RET
1168         LMEMCPY_4_PAD
1169
1170 /*
1171  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1172  */
1173         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1174         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
1175         strb    r2, [r0]
1176         mov     r2, r2, lsr #8          /* r2 = ...1 */
1177         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
1178         mov     r3, r3, lsr #8          /* r3 = ...3 */
1179         strh    r2, [r0, #0x01]
1180         strb    r3, [r0, #0x03]
1181         RET
1182         LMEMCPY_4_PAD
1183
1184 /*
1185  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1186  */
1187         ldrb    r2, [r1]
1188         ldrh    r3, [r1, #0x01]
1189         ldrb    r1, [r1, #0x03]
1190         strb    r2, [r0]
1191         strh    r3, [r0, #0x01]
1192         strb    r1, [r0, #0x03]
1193         RET
1194         LMEMCPY_4_PAD
1195
1196 /*
1197  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1198  */
1199         ldr     r2, [r1]
1200         strh    r2, [r0]
1201         mov     r3, r2, lsr #16
1202         strh    r3, [r0, #0x02]
1203         RET
1204         LMEMCPY_4_PAD
1205
1206 /*
1207  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1208  */
1209         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1210         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
1211         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1212         strh    r1, [r0]
1213         mov     r2, r2, lsr #24         /* r2 = ...2 */
1214         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
1215         strh    r2, [r0, #0x02]
1216         RET
1217         LMEMCPY_4_PAD
1218
1219 /*
1220  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1221  */
1222         ldrh    r2, [r1]
1223         ldrh    r3, [r1, #0x02]
1224         strh    r2, [r0]
1225         strh    r3, [r0, #0x02]
1226         RET
1227         LMEMCPY_4_PAD
1228
1229 /*
1230  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1231  */
1232         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
1233         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
1234         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
1235         strh    r1, [r0, #0x02]
1236         mov     r3, r3, lsl #8          /* r3 = 321. */
1237         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
1238         strh    r3, [r0]
1239         RET
1240         LMEMCPY_4_PAD
1241
1242 /*
1243  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1244  */
1245         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1246         strb    r2, [r0]
1247         mov     r3, r2, lsr #8
1248         mov     r1, r2, lsr #24
1249         strh    r3, [r0, #0x01]
1250         strb    r1, [r0, #0x03]
1251         RET
1252         LMEMCPY_4_PAD
1253
1254 /*
1255  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1256  */
1257         ldrb    r2, [r1]
1258         ldrh    r3, [r1, #0x01]
1259         ldrb    r1, [r1, #0x03]
1260         strb    r2, [r0]
1261         strh    r3, [r0, #0x01]
1262         strb    r1, [r0, #0x03]
1263         RET
1264         LMEMCPY_4_PAD
1265
1266 /*
1267  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1268  */
1269         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1270         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
1271         strb    r2, [r0]
1272         mov     r2, r2, lsr #8          /* r2 = ...1 */
1273         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
1274         strh    r2, [r0, #0x01]
1275         mov     r3, r3, lsr #8          /* r3 = ...3 */
1276         strb    r3, [r0, #0x03]
1277         RET
1278         LMEMCPY_4_PAD
1279
1280 /*
1281  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1282  */
1283         ldrb    r2, [r1]
1284         ldrh    r3, [r1, #0x01]
1285         ldrb    r1, [r1, #0x03]
1286         strb    r2, [r0]
1287         strh    r3, [r0, #0x01]
1288         strb    r1, [r0, #0x03]
1289         RET
1290         LMEMCPY_4_PAD
1291
1292
1293 /******************************************************************************
1294  * Special case for 6 byte copies
1295  */
1296 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
1297 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
1298         LMEMCPY_6_PAD
1299 .Lmemcpy_6:
1300         and     r2, r1, #0x03
1301         orr     r2, r2, r0, lsl #2
1302         ands    r2, r2, #0x0f
1303         sub     r3, pc, #0x14
1304         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
1305
1306 /*
1307  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1308  */
1309         ldr     r2, [r1]
1310         ldrh    r3, [r1, #0x04]
1311         str     r2, [r0]
1312         strh    r3, [r0, #0x04]
1313         RET
1314         LMEMCPY_6_PAD
1315
1316 /*
1317  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1318  */
1319         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1320         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
1321         mov     r2, r2, lsr #8          /* r2 = .210 */
1322         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
1323         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
1324         str     r2, [r0]
1325         strh    r3, [r0, #0x04]
1326         RET
1327         LMEMCPY_6_PAD
1328
1329 /*
1330  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1331  */
1332         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1333         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1334         mov     r1, r3, lsr #16         /* r1 = ..54 */
1335         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1336         str     r2, [r0]
1337         strh    r1, [r0, #0x04]
1338         RET
1339         LMEMCPY_6_PAD
1340
1341 /*
1342  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1343  */
1344         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
1345         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
1346         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
1347         mov     r2, r2, lsr #24         /* r2 = ...0 */
1348         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1349         mov     r1, r1, lsl #8          /* r1 = xx5. */
1350         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
1351         str     r2, [r0]
1352         strh    r1, [r0, #0x04]
1353         RET
1354         LMEMCPY_6_PAD
1355
1356 /*
1357  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1358  */
1359         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1360         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
1361         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1362         strh    r1, [r0, #0x01]
1363         strb    r3, [r0]
1364         mov     r3, r3, lsr #24         /* r3 = ...3 */
1365         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
1366         mov     r2, r2, lsr #8          /* r2 = ...5 */
1367         strh    r3, [r0, #0x03]
1368         strb    r2, [r0, #0x05]
1369         RET
1370         LMEMCPY_6_PAD
1371
1372 /*
1373  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1374  */
1375         ldrb    r2, [r1]
1376         ldrh    r3, [r1, #0x01]
1377         ldrh    ip, [r1, #0x03]
1378         ldrb    r1, [r1, #0x05]
1379         strb    r2, [r0]
1380         strh    r3, [r0, #0x01]
1381         strh    ip, [r0, #0x03]
1382         strb    r1, [r0, #0x05]
1383         RET
1384         LMEMCPY_6_PAD
1385
1386 /*
1387  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1388  */
1389         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1390         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
1391         strb    r2, [r0]
1392         mov     r3, r1, lsr #24
1393         strb    r3, [r0, #0x05]
1394         mov     r3, r1, lsr #8          /* r3 = .543 */
1395         strh    r3, [r0, #0x03]
1396         mov     r3, r2, lsr #8          /* r3 = ...1 */
1397         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
1398         strh    r3, [r0, #0x01]
1399         RET
1400         LMEMCPY_6_PAD
1401
1402 /*
1403  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1404  */
1405         ldrb    r2, [r1]
1406         ldrh    r3, [r1, #0x01]
1407         ldrh    ip, [r1, #0x03]
1408         ldrb    r1, [r1, #0x05]
1409         strb    r2, [r0]
1410         strh    r3, [r0, #0x01]
1411         strh    ip, [r0, #0x03]
1412         strb    r1, [r0, #0x05]
1413         RET
1414         LMEMCPY_6_PAD
1415
1416 /*
1417  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1418  */
1419         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
1420         ldr     r3, [r1]                /* r3 = 3210 */
1421         mov     r2, r2, lsl #16         /* r2 = 54.. */
1422         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
1423         strh    r3, [r0]
1424         str     r2, [r0, #0x02]
1425         RET
1426         LMEMCPY_6_PAD
1427
1428 /*
1429  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1430  */
1431         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1432         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
1433         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1434         mov     r2, r2, lsl #8          /* r2 = 543. */
1435         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
1436         strh    r1, [r0]
1437         str     r2, [r0, #0x02]
1438         RET
1439         LMEMCPY_6_PAD
1440
1441 /*
1442  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1443  */
1444         ldrh    r2, [r1]
1445         ldr     r3, [r1, #0x02]
1446         strh    r2, [r0]
1447         str     r3, [r0, #0x02]
1448         RET
1449         LMEMCPY_6_PAD
1450
1451 /*
1452  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1453  */
1454         ldrb    r3, [r1]                /* r3 = ...0 */
1455         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1456         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
1457         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1458         mov     r1, r1, lsl #24         /* r1 = 5... */
1459         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
1460         strh    r3, [r0]
1461         str     r1, [r0, #0x02]
1462         RET
1463         LMEMCPY_6_PAD
1464
1465 /*
1466  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1467  */
1468         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1469         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
1470         strb    r2, [r0]
1471         mov     r2, r2, lsr #8          /* r2 = .321 */
1472         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
1473         mov     r1, r1, lsr #8          /* r1 = ...5 */
1474         str     r2, [r0, #0x01]
1475         strb    r1, [r0, #0x05]
1476         RET
1477         LMEMCPY_6_PAD
1478
1479 /*
1480  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1481  */
1482         ldrb    r2, [r1]
1483         ldrh    r3, [r1, #0x01]
1484         ldrh    ip, [r1, #0x03]
1485         ldrb    r1, [r1, #0x05]
1486         strb    r2, [r0]
1487         strh    r3, [r0, #0x01]
1488         strh    ip, [r0, #0x03]
1489         strb    r1, [r0, #0x05]
1490         RET
1491         LMEMCPY_6_PAD
1492
1493 /*
1494  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1495  */
1496         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1497         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
1498         strb    r2, [r0]
1499         mov     r2, r2, lsr #8          /* r2 = ...1 */
1500         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
1501         mov     r1, r1, lsr #24         /* r1 = ...5 */
1502         str     r2, [r0, #0x01]
1503         strb    r1, [r0, #0x05]
1504         RET
1505         LMEMCPY_6_PAD
1506
1507 /*
1508  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1509  */
1510         ldrb    r2, [r1]
1511         ldr     r3, [r1, #0x01]
1512         ldrb    r1, [r1, #0x05]
1513         strb    r2, [r0]
1514         str     r3, [r0, #0x01]
1515         strb    r1, [r0, #0x05]
1516         RET
1517         LMEMCPY_6_PAD
1518
1519
1520 /******************************************************************************
1521  * Special case for 8 byte copies
1522  */
1523 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
1524 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
1525         LMEMCPY_8_PAD
1526 .Lmemcpy_8:
1527         and     r2, r1, #0x03
1528         orr     r2, r2, r0, lsl #2
1529         ands    r2, r2, #0x0f
1530         sub     r3, pc, #0x14
1531         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
1532
1533 /*
1534  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1535  */
1536         ldr     r2, [r1]
1537         ldr     r3, [r1, #0x04]
1538         str     r2, [r0]
1539         str     r3, [r0, #0x04]
1540         RET
1541         LMEMCPY_8_PAD
1542
1543 /*
1544  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1545  */
1546         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1547         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
1548         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1549         mov     r3, r3, lsr #8          /* r3 = .210 */
1550         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1551         mov     r1, r1, lsl #24         /* r1 = 7... */
1552         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
1553         str     r3, [r0]
1554         str     r2, [r0, #0x04]
1555         RET
1556         LMEMCPY_8_PAD
1557
1558 /*
1559  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1560  */
1561         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1562         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1563         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1564         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1565         mov     r3, r3, lsr #16         /* r3 = ..54 */
1566         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
1567         str     r2, [r0]
1568         str     r3, [r0, #0x04]
1569         RET
1570         LMEMCPY_8_PAD
1571
1572 /*
1573  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1574  */
1575         ldrb    r3, [r1]                /* r3 = ...0 */
1576         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1577         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
1578         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1579         mov     r2, r2, lsr #24         /* r2 = ...4 */
1580         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
1581         str     r3, [r0]
1582         str     r2, [r0, #0x04]
1583         RET
1584         LMEMCPY_8_PAD
1585
1586 /*
1587  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1588  */
1589         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1590         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
1591         strb    r3, [r0]
1592         mov     r1, r2, lsr #24         /* r1 = ...7 */
1593         strb    r1, [r0, #0x07]
1594         mov     r1, r3, lsr #8          /* r1 = .321 */
1595         mov     r3, r3, lsr #24         /* r3 = ...3 */
1596         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
1597         strh    r1, [r0, #0x01]
1598         str     r3, [r0, #0x03]
1599         RET
1600         LMEMCPY_8_PAD
1601
1602 /*
1603  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1604  */
1605         ldrb    r2, [r1]
1606         ldrh    r3, [r1, #0x01]
1607         ldr     ip, [r1, #0x03]
1608         ldrb    r1, [r1, #0x07]
1609         strb    r2, [r0]
1610         strh    r3, [r0, #0x01]
1611         str     ip, [r0, #0x03]
1612         strb    r1, [r0, #0x07]
1613         RET
1614         LMEMCPY_8_PAD
1615
1616 /*
1617  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1618  */
1619         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1620         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1621         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1622         strb    r2, [r0]                /* 0 */
1623         mov     ip, r1, lsr #8          /* ip = ...7 */
1624         strb    ip, [r0, #0x07]         /* 7 */
1625         mov     ip, r2, lsr #8          /* ip = ...1 */
1626         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1627         mov     r3, r3, lsr #8          /* r3 = .543 */
1628         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
1629         strh    ip, [r0, #0x01]
1630         str     r3, [r0, #0x03]
1631         RET
1632         LMEMCPY_8_PAD
1633
1634 /*
1635  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1636  */
1637         ldrb    r3, [r1]                /* r3 = ...0 */
1638         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1639         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
1640         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1641         strb    r3, [r0]
1642         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
1643         strh    ip, [r0, #0x01]
1644         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
1645         str     r2, [r0, #0x03]
1646         strb    r1, [r0, #0x07]
1647         RET
1648         LMEMCPY_8_PAD
1649
1650 /*
1651  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1652  */
1653         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1654         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1655         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1656         strh    r2, [r0]
1657         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
1658         mov     r3, r3, lsr #16         /* r3 = ..76 */
1659         str     r2, [r0, #0x02]
1660         strh    r3, [r0, #0x06]
1661         RET
1662         LMEMCPY_8_PAD
1663
1664 /*
1665  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1666  */
1667         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1668         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1669         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
1670         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1671         strh    r1, [r0]
1672         mov     r1, r2, lsr #24         /* r1 = ...2 */
1673         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
1674         mov     r3, r3, lsr #24         /* r3 = ...6 */
1675         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
1676         str     r1, [r0, #0x02]
1677         strh    r3, [r0, #0x06]
1678         RET
1679         LMEMCPY_8_PAD
1680
1681 /*
1682  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1683  */
1684         ldrh    r2, [r1]
1685         ldr     ip, [r1, #0x02]
1686         ldrh    r3, [r1, #0x06]
1687         strh    r2, [r0]
1688         str     ip, [r0, #0x02]
1689         strh    r3, [r0, #0x06]
1690         RET
1691         LMEMCPY_8_PAD
1692
1693 /*
1694  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1695  */
1696         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
1697         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1698         ldrb    ip, [r1]                /* ip = ...0 */
1699         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
1700         strh    r1, [r0, #0x06]
1701         mov     r3, r3, lsl #24         /* r3 = 5... */
1702         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
1703         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
1704         str     r3, [r0, #0x02]
1705         strh    r2, [r0]
1706         RET
1707         LMEMCPY_8_PAD
1708
1709 /*
1710  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1711  */
1712         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1713         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1714         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
1715         strh    r1, [r0, #0x05]
1716         strb    r2, [r0]
1717         mov     r1, r3, lsr #24         /* r1 = ...7 */
1718         strb    r1, [r0, #0x07]
1719         mov     r2, r2, lsr #8          /* r2 = .321 */
1720         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
1721         str     r2, [r0, #0x01]
1722         RET
1723         LMEMCPY_8_PAD
1724
1725 /*
1726  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1727  */
1728         ldrb    r3, [r1]                /* r3 = ...0 */
1729         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
1730         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1731         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1732         strb    r3, [r0]
1733         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
1734         strh    r3, [r0, #0x05]
1735         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
1736         str     r2, [r0, #0x01]
1737         strb    r1, [r0, #0x07]
1738         RET
1739         LMEMCPY_8_PAD
1740
1741 /*
1742  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1743  */
1744         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1745         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1746         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1747         strb    r2, [r0]
1748         mov     ip, r2, lsr #8          /* ip = ...1 */
1749         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1750         mov     r2, r1, lsr #8          /* r2 = ...7 */
1751         strb    r2, [r0, #0x07]
1752         mov     r1, r1, lsl #8          /* r1 = .76. */
1753         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
1754         str     ip, [r0, #0x01]
1755         strh    r1, [r0, #0x05]
1756         RET
1757         LMEMCPY_8_PAD
1758
1759 /*
1760  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1761  */
1762         ldrb    r2, [r1]
1763         ldr     ip, [r1, #0x01]
1764         ldrh    r3, [r1, #0x05]
1765         ldrb    r1, [r1, #0x07]
1766         strb    r2, [r0]
1767         str     ip, [r0, #0x01]
1768         strh    r3, [r0, #0x05]
1769         strb    r1, [r0, #0x07]
1770         RET
1771         LMEMCPY_8_PAD
1772
1773 /******************************************************************************
1774  * Special case for 12 byte copies
1775  */
1776 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
1777 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
1778         LMEMCPY_C_PAD
1779 .Lmemcpy_c:
1780         and     r2, r1, #0x03
1781         orr     r2, r2, r0, lsl #2
1782         ands    r2, r2, #0x0f
1783         sub     r3, pc, #0x14
1784         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
1785
1786 /*
1787  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1788  */
1789         ldr     r2, [r1]
1790         ldr     r3, [r1, #0x04]
1791         ldr     r1, [r1, #0x08]
1792         str     r2, [r0]
1793         str     r3, [r0, #0x04]
1794         str     r1, [r0, #0x08]
1795         RET
1796         LMEMCPY_C_PAD
1797
1798 /*
1799  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1800  */
1801         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
1802         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1803         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1804         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1805         mov     r2, r2, lsl #24         /* r2 = B... */
1806         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
1807         str     r2, [r0, #0x08]
1808         mov     r2, ip, lsl #24         /* r2 = 7... */
1809         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
1810         mov     r1, r1, lsr #8          /* r1 = .210 */
1811         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
1812         str     r2, [r0, #0x04]
1813         str     r1, [r0]
1814         RET
1815         LMEMCPY_C_PAD
1816
1817 /*
1818  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1819  */
1820         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1821         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1822         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1823         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1824         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1825         str     r2, [r0]
1826         mov     r3, r3, lsr #16         /* r3 = ..54 */
1827         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
1828         mov     r1, r1, lsl #16         /* r1 = BA.. */
1829         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
1830         str     r3, [r0, #0x04]
1831         str     r1, [r0, #0x08]
1832         RET
1833         LMEMCPY_C_PAD
1834
1835 /*
1836  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1837  */
1838         ldrb    r2, [r1]                /* r2 = ...0 */
1839         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1840         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1841         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1842         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1843         str     r2, [r0]
1844         mov     r3, r3, lsr #24         /* r3 = ...4 */
1845         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
1846         mov     r1, r1, lsl #8          /* r1 = BA9. */
1847         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
1848         str     r3, [r0, #0x04]
1849         str     r1, [r0, #0x08]
1850         RET
1851         LMEMCPY_C_PAD
1852
1853 /*
1854  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1855  */
1856         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1857         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1858         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
1859         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1860         strh    r1, [r0, #0x01]
1861         strb    r2, [r0]
1862         mov     r1, r2, lsr #24         /* r1 = ...3 */
1863         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
1864         mov     r1, r3, lsr #24         /* r1 = ...7 */
1865         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
1866         mov     ip, ip, lsr #24         /* ip = ...B */
1867         str     r2, [r0, #0x03]
1868         str     r1, [r0, #0x07]
1869         strb    ip, [r0, #0x0b]
1870         RET
1871         LMEMCPY_C_PAD
1872
1873 /*
1874  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1875  */
1876         ldrb    r2, [r1]
1877         ldrh    r3, [r1, #0x01]
1878         ldr     ip, [r1, #0x03]
1879         strb    r2, [r0]
1880         ldr     r2, [r1, #0x07]
1881         ldrb    r1, [r1, #0x0b]
1882         strh    r3, [r0, #0x01]
1883         str     ip, [r0, #0x03]
1884         str     r2, [r0, #0x07]
1885         strb    r1, [r0, #0x0b]
1886         RET
1887         LMEMCPY_C_PAD
1888
1889 /*
1890  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1891  */
1892         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1893         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1894         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1895         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1896         strb    r2, [r0]
1897         mov     r2, r2, lsr #8          /* r2 = ...1 */
1898         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1899         strh    r2, [r0, #0x01]
1900         mov     r2, r3, lsr #8          /* r2 = .543 */
1901         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
1902         mov     r2, ip, lsr #8          /* r2 = .987 */
1903         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
1904         mov     r1, r1, lsr #8          /* r1 = ...B */
1905         str     r3, [r0, #0x03]
1906         str     r2, [r0, #0x07]
1907         strb    r1, [r0, #0x0b]
1908         RET
1909         LMEMCPY_C_PAD
1910
1911 /*
1912  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1913  */
1914         ldrb    r2, [r1]
1915         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1916         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1917         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1918         strb    r2, [r0]
1919         strh    r3, [r0, #0x01]
1920         mov     r3, r3, lsr #16         /* r3 = ..43 */
1921         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
1922         mov     ip, ip, lsr #16         /* ip = ..87 */
1923         orr     ip, ip, r1, lsl #16     /* ip = A987 */
1924         mov     r1, r1, lsr #16         /* r1 = ..xB */
1925         str     r3, [r0, #0x03]
1926         str     ip, [r0, #0x07]
1927         strb    r1, [r0, #0x0b]
1928         RET
1929         LMEMCPY_C_PAD
1930
1931 /*
1932  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1933  */
1934         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
1935         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1936         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
1937         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1938         strh    ip, [r0]
1939         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
1940         mov     r3, r3, lsr #16         /* r3 = ..76 */
1941         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
1942         mov     r2, r2, lsr #16         /* r2 = ..BA */
1943         str     r1, [r0, #0x02]
1944         str     r3, [r0, #0x06]
1945         strh    r2, [r0, #0x0a]
1946         RET
1947         LMEMCPY_C_PAD
1948
1949 /*
1950  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1951  */
1952         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1953         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1954         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
1955         strh    ip, [r0]
1956         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1957         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
1958         mov     r2, r2, lsr #24         /* r2 = ...2 */
1959         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
1960         mov     r3, r3, lsr #24         /* r3 = ...6 */
1961         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
1962         mov     r1, r1, lsl #8          /* r1 = ..B. */
1963         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
1964         str     r2, [r0, #0x02]
1965         str     r3, [r0, #0x06]
1966         strh    r1, [r0, #0x0a]
1967         RET
1968         LMEMCPY_C_PAD
1969
1970 /*
1971  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1972  */
1973         ldrh    r2, [r1]
1974         ldr     r3, [r1, #0x02]
1975         ldr     ip, [r1, #0x06]
1976         ldrh    r1, [r1, #0x0a]
1977         strh    r2, [r0]
1978         str     r3, [r0, #0x02]
1979         str     ip, [r0, #0x06]
1980         strh    r1, [r0, #0x0a]
1981         RET
1982         LMEMCPY_C_PAD
1983
1984 /*
1985  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1986  */
1987         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
1988         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
1989         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
1990         strh    ip, [r0, #0x0a]
1991         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1992         ldrb    r1, [r1]                /* r1 = ...0 */
1993         mov     r2, r2, lsl #24         /* r2 = 9... */
1994         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
1995         mov     r3, r3, lsl #24         /* r3 = 5... */
1996         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
1997         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
1998         str     r2, [r0, #0x06]
1999         str     r3, [r0, #0x02]
2000         strh    r1, [r0]
2001         RET
2002         LMEMCPY_C_PAD
2003
2004 /*
2005  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
2006  */
2007         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
2008         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
2009         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
2010         strb    r2, [r0]
2011         mov     r3, r2, lsr #8          /* r3 = .321 */
2012         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
2013         str     r3, [r0, #0x01]
2014         mov     r3, ip, lsr #8          /* r3 = .765 */
2015         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
2016         str     r3, [r0, #0x05]
2017         mov     r1, r1, lsr #8          /* r1 = .BA9 */
2018         strh    r1, [r0, #0x09]
2019         mov     r1, r1, lsr #16         /* r1 = ...B */
2020         strb    r1, [r0, #0x0b]
2021         RET
2022         LMEMCPY_C_PAD
2023
2024 /*
2025  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
2026  */
2027         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
2028         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
2029         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
2030         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
2031         strb    r2, [r0, #0x0b]
2032         mov     r2, r3, lsr #16         /* r2 = ..A9 */
2033         strh    r2, [r0, #0x09]
2034         mov     r3, r3, lsl #16         /* r3 = 87.. */
2035         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
2036         mov     ip, ip, lsl #16         /* ip = 43.. */
2037         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
2038         mov     r1, r1, lsr #8          /* r1 = .210 */
2039         str     r3, [r0, #0x05]
2040         str     ip, [r0, #0x01]
2041         strb    r1, [r0]
2042         RET
2043         LMEMCPY_C_PAD
2044
2045 /*
2046  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
2047  */
2048         ldrh    r2, [r1]                /* r2 = ..10 */
2049         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
2050         ldr     ip, [r1, #0x06]         /* ip = 9876 */
2051         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
2052         strb    r2, [r0]
2053         mov     r2, r2, lsr #8          /* r2 = ...1 */
2054         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
2055         mov     r3, r3, lsr #24         /* r3 = ...5 */
2056         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
2057         mov     ip, ip, lsr #24         /* ip = ...9 */
2058         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
2059         mov     r1, r1, lsr #8          /* r1 = ...B */
2060         str     r2, [r0, #0x01]
2061         str     r3, [r0, #0x05]
2062         strh    ip, [r0, #0x09]
2063         strb    r1, [r0, #0x0b]
2064         RET
2065         LMEMCPY_C_PAD
2066
2067 /*
2068  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
2069  */
2070         ldrb    r2, [r1]
2071         ldr     r3, [r1, #0x01]
2072         ldr     ip, [r1, #0x05]
2073         strb    r2, [r0]
2074         ldrh    r2, [r1, #0x09]
2075         ldrb    r1, [r1, #0x0b]
2076         str     r3, [r0, #0x01]
2077         str     ip, [r0, #0x05]
2078         strh    r2, [r0, #0x09]
2079         strb    r1, [r0, #0x0b]
2080         RET
2081 END(memcpy)