]> CyberLeo.Net >> Repos - FreeBSD/releng/10.0.git/blob - lib/libc/arm/string/memcpy_xscale.S
- Copy stable/10 (r259064) to releng/10.0 as part of the
[FreeBSD/releng/10.0.git] / lib / libc / arm / string / memcpy_xscale.S
1 /*      $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $    */
2
3 /*
4  * Copyright 2003 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
40
41 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
42 ENTRY(memcpy)
43         pld     [r1]
44         cmp     r2, #0x0c
45         ble     .Lmemcpy_short          /* <= 12 bytes */
46         mov     r3, r0                  /* We must not clobber r0 */
47
48         /* Word-align the destination buffer */
49         ands    ip, r3, #0x03           /* Already word aligned? */
50         beq     .Lmemcpy_wordaligned    /* Yup */
51         cmp     ip, #0x02
52         ldrb    ip, [r1], #0x01
53         sub     r2, r2, #0x01
54         strb    ip, [r3], #0x01
55         ldrleb  ip, [r1], #0x01
56         suble   r2, r2, #0x01
57         strleb  ip, [r3], #0x01
58         ldrltb  ip, [r1], #0x01
59         sublt   r2, r2, #0x01
60         strltb  ip, [r3], #0x01
61
62         /* Destination buffer is now word aligned */
63 .Lmemcpy_wordaligned:
64         ands    ip, r1, #0x03           /* Is src also word-aligned? */
65         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
66
67         /* Quad-align the destination buffer */
68         tst     r3, #0x07               /* Already quad aligned? */
69         ldrne   ip, [r1], #0x04
70         stmfd   sp!, {r4-r9}            /* Free up some registers */
71         subne   r2, r2, #0x04
72         strne   ip, [r3], #0x04
73
74         /* Destination buffer quad aligned, source is at least word aligned */
75         subs    r2, r2, #0x80
76         blt     .Lmemcpy_w_lessthan128
77
78         /* Copy 128 bytes at a time */
79 .Lmemcpy_w_loop128:
80         ldr     r4, [r1], #0x04         /* LD:00-03 */
81         ldr     r5, [r1], #0x04         /* LD:04-07 */
82         pld     [r1, #0x18]             /* Prefetch 0x20 */
83         ldr     r6, [r1], #0x04         /* LD:08-0b */
84         ldr     r7, [r1], #0x04         /* LD:0c-0f */
85         ldr     r8, [r1], #0x04         /* LD:10-13 */
86         ldr     r9, [r1], #0x04         /* LD:14-17 */
87         strd    r4, [r3], #0x08         /* ST:00-07 */
88         ldr     r4, [r1], #0x04         /* LD:18-1b */
89         ldr     r5, [r1], #0x04         /* LD:1c-1f */
90         strd    r6, [r3], #0x08         /* ST:08-0f */
91         ldr     r6, [r1], #0x04         /* LD:20-23 */
92         ldr     r7, [r1], #0x04         /* LD:24-27 */
93         pld     [r1, #0x18]             /* Prefetch 0x40 */
94         strd    r8, [r3], #0x08         /* ST:10-17 */
95         ldr     r8, [r1], #0x04         /* LD:28-2b */
96         ldr     r9, [r1], #0x04         /* LD:2c-2f */
97         strd    r4, [r3], #0x08         /* ST:18-1f */
98         ldr     r4, [r1], #0x04         /* LD:30-33 */
99         ldr     r5, [r1], #0x04         /* LD:34-37 */
100         strd    r6, [r3], #0x08         /* ST:20-27 */
101         ldr     r6, [r1], #0x04         /* LD:38-3b */
102         ldr     r7, [r1], #0x04         /* LD:3c-3f */
103         strd    r8, [r3], #0x08         /* ST:28-2f */
104         ldr     r8, [r1], #0x04         /* LD:40-43 */
105         ldr     r9, [r1], #0x04         /* LD:44-47 */
106         pld     [r1, #0x18]             /* Prefetch 0x60 */
107         strd    r4, [r3], #0x08         /* ST:30-37 */
108         ldr     r4, [r1], #0x04         /* LD:48-4b */
109         ldr     r5, [r1], #0x04         /* LD:4c-4f */
110         strd    r6, [r3], #0x08         /* ST:38-3f */
111         ldr     r6, [r1], #0x04         /* LD:50-53 */
112         ldr     r7, [r1], #0x04         /* LD:54-57 */
113         strd    r8, [r3], #0x08         /* ST:40-47 */
114         ldr     r8, [r1], #0x04         /* LD:58-5b */
115         ldr     r9, [r1], #0x04         /* LD:5c-5f */
116         strd    r4, [r3], #0x08         /* ST:48-4f */
117         ldr     r4, [r1], #0x04         /* LD:60-63 */
118         ldr     r5, [r1], #0x04         /* LD:64-67 */
119         pld     [r1, #0x18]             /* Prefetch 0x80 */
120         strd    r6, [r3], #0x08         /* ST:50-57 */
121         ldr     r6, [r1], #0x04         /* LD:68-6b */
122         ldr     r7, [r1], #0x04         /* LD:6c-6f */
123         strd    r8, [r3], #0x08         /* ST:58-5f */
124         ldr     r8, [r1], #0x04         /* LD:70-73 */
125         ldr     r9, [r1], #0x04         /* LD:74-77 */
126         strd    r4, [r3], #0x08         /* ST:60-67 */
127         ldr     r4, [r1], #0x04         /* LD:78-7b */
128         ldr     r5, [r1], #0x04         /* LD:7c-7f */
129         strd    r6, [r3], #0x08         /* ST:68-6f */
130         strd    r8, [r3], #0x08         /* ST:70-77 */
131         subs    r2, r2, #0x80
132         strd    r4, [r3], #0x08         /* ST:78-7f */
133         bge     .Lmemcpy_w_loop128
134
135 .Lmemcpy_w_lessthan128:
136         adds    r2, r2, #0x80           /* Adjust for extra sub */
137         ldmeqfd sp!, {r4-r9}
138         bxeq    lr                      /* Return now if done */
139         subs    r2, r2, #0x20
140         blt     .Lmemcpy_w_lessthan32
141
142         /* Copy 32 bytes at a time */
143 .Lmemcpy_w_loop32:
144         ldr     r4, [r1], #0x04
145         ldr     r5, [r1], #0x04
146         pld     [r1, #0x18]
147         ldr     r6, [r1], #0x04
148         ldr     r7, [r1], #0x04
149         ldr     r8, [r1], #0x04
150         ldr     r9, [r1], #0x04
151         strd    r4, [r3], #0x08
152         ldr     r4, [r1], #0x04
153         ldr     r5, [r1], #0x04
154         strd    r6, [r3], #0x08
155         strd    r8, [r3], #0x08
156         subs    r2, r2, #0x20
157         strd    r4, [r3], #0x08
158         bge     .Lmemcpy_w_loop32
159
160 .Lmemcpy_w_lessthan32:
161         adds    r2, r2, #0x20           /* Adjust for extra sub */
162         ldmeqfd sp!, {r4-r9}
163         bxeq    lr                      /* Return now if done */
164
165         and     r4, r2, #0x18
166         rsbs    r4, r4, #0x18
167         addne   pc, pc, r4, lsl #1
168         nop
169
170         /* At least 24 bytes remaining */
171         ldr     r4, [r1], #0x04
172         ldr     r5, [r1], #0x04
173         sub     r2, r2, #0x08
174         strd    r4, [r3], #0x08
175
176         /* At least 16 bytes remaining */
177         ldr     r4, [r1], #0x04
178         ldr     r5, [r1], #0x04
179         sub     r2, r2, #0x08
180         strd    r4, [r3], #0x08
181
182         /* At least 8 bytes remaining */
183         ldr     r4, [r1], #0x04
184         ldr     r5, [r1], #0x04
185         subs    r2, r2, #0x08
186         strd    r4, [r3], #0x08
187
188         /* Less than 8 bytes remaining */
189         ldmfd   sp!, {r4-r9}
190         bxeq    lr                      /* Return now if done */
191         subs    r2, r2, #0x04
192         ldrge   ip, [r1], #0x04
193         strge   ip, [r3], #0x04
194         bxeq    lr                      /* Return now if done */
195         addlt   r2, r2, #0x04
196         ldrb    ip, [r1], #0x01
197         cmp     r2, #0x02
198         ldrgeb  r2, [r1], #0x01
199         strb    ip, [r3], #0x01
200         ldrgtb  ip, [r1]
201         strgeb  r2, [r3], #0x01
202         strgtb  ip, [r3]
203         bx      lr
204
205
206 /*
207  * At this point, it has not been possible to word align both buffers.
208  * The destination buffer is word aligned, but the source buffer is not.
209  */
210 .Lmemcpy_bad_align:
211         stmfd   sp!, {r4-r7}
212         bic     r1, r1, #0x03
213         cmp     ip, #2
214         ldr     ip, [r1], #0x04
215         bgt     .Lmemcpy_bad3
216         beq     .Lmemcpy_bad2
217         b       .Lmemcpy_bad1
218
219 .Lmemcpy_bad1_loop16:
220 #ifdef __ARMEB__
221         mov     r4, ip, lsl #8
222 #else
223         mov     r4, ip, lsr #8
224 #endif
225         ldr     r5, [r1], #0x04
226         pld     [r1, #0x018]
227         ldr     r6, [r1], #0x04
228         ldr     r7, [r1], #0x04
229         ldr     ip, [r1], #0x04
230 #ifdef __ARMEB__
231         orr     r4, r4, r5, lsr #24
232         mov     r5, r5, lsl #8
233         orr     r5, r5, r6, lsr #24
234         mov     r6, r6, lsl #8
235         orr     r6, r6, r7, lsr #24
236         mov     r7, r7, lsl #8
237         orr     r7, r7, ip, lsr #24
238 #else
239         orr     r4, r4, r5, lsl #24
240         mov     r5, r5, lsr #8
241         orr     r5, r5, r6, lsl #24
242         mov     r6, r6, lsr #8
243         orr     r6, r6, r7, lsl #24
244         mov     r7, r7, lsr #8
245         orr     r7, r7, ip, lsl #24
246 #endif
247         str     r4, [r3], #0x04
248         str     r5, [r3], #0x04
249         str     r6, [r3], #0x04
250         str     r7, [r3], #0x04
251 .Lmemcpy_bad1:
252         subs    r2, r2, #0x10         
253         bge     .Lmemcpy_bad1_loop16
254
255         adds    r2, r2, #0x10         
256         ldmeqfd sp!, {r4-r7}
257         bxeq    lr                      /* Return now if done */
258         subs    r2, r2, #0x04
259         sublt   r1, r1, #0x03
260         blt     .Lmemcpy_bad_done
261
262 .Lmemcpy_bad1_loop4:
263 #ifdef __ARMEB__
264         mov     r4, ip, lsl #8
265 #else
266         mov     r4, ip, lsr #8
267 #endif
268         ldr     ip, [r1], #0x04
269         subs    r2, r2, #0x04
270 #ifdef __ARMEB__
271         orr     r4, r4, ip, lsr #24
272 #else
273         orr     r4, r4, ip, lsl #24
274 #endif
275         str     r4, [r3], #0x04
276         bge     .Lmemcpy_bad1_loop4
277         sub     r1, r1, #0x03
278         b       .Lmemcpy_bad_done
279
280 .Lmemcpy_bad2_loop16:
281 #ifdef __ARMEB__
282         mov     r4, ip, lsl #16
283 #else
284         mov     r4, ip, lsr #16
285 #endif
286         ldr     r5, [r1], #0x04
287         pld     [r1, #0x018]
288         ldr     r6, [r1], #0x04
289         ldr     r7, [r1], #0x04
290         ldr     ip, [r1], #0x04
291 #ifdef __ARMEB__
292         orr     r4, r4, r5, lsr #16
293         mov     r5, r5, lsl #16
294         orr     r5, r5, r6, lsr #16
295         mov     r6, r6, lsl #16
296         orr     r6, r6, r7, lsr #16
297         mov     r7, r7, lsl #16
298         orr     r7, r7, ip, lsr #16
299 #else
300         orr     r4, r4, r5, lsl #16
301         mov     r5, r5, lsr #16
302         orr     r5, r5, r6, lsl #16
303         mov     r6, r6, lsr #16
304         orr     r6, r6, r7, lsl #16
305         mov     r7, r7, lsr #16
306         orr     r7, r7, ip, lsl #16
307 #endif
308         str     r4, [r3], #0x04
309         str     r5, [r3], #0x04
310         str     r6, [r3], #0x04
311         str     r7, [r3], #0x04
312 .Lmemcpy_bad2:
313         subs    r2, r2, #0x10         
314         bge     .Lmemcpy_bad2_loop16
315
316         adds    r2, r2, #0x10         
317         ldmeqfd sp!, {r4-r7}
318         bxeq    lr                      /* Return now if done */
319         subs    r2, r2, #0x04
320         sublt   r1, r1, #0x02
321         blt     .Lmemcpy_bad_done
322
323 .Lmemcpy_bad2_loop4:
324 #ifdef __ARMEB__
325         mov     r4, ip, lsl #16
326 #else
327         mov     r4, ip, lsr #16
328 #endif
329         ldr     ip, [r1], #0x04
330         subs    r2, r2, #0x04
331 #ifdef __ARMEB__
332         orr     r4, r4, ip, lsr #16
333 #else
334         orr     r4, r4, ip, lsl #16
335 #endif
336         str     r4, [r3], #0x04
337         bge     .Lmemcpy_bad2_loop4
338         sub     r1, r1, #0x02
339         b       .Lmemcpy_bad_done
340
341 .Lmemcpy_bad3_loop16:
342 #ifdef __ARMEB__
343         mov     r4, ip, lsl #24
344 #else
345         mov     r4, ip, lsr #24
346 #endif
347         ldr     r5, [r1], #0x04
348         pld     [r1, #0x018]
349         ldr     r6, [r1], #0x04
350         ldr     r7, [r1], #0x04
351         ldr     ip, [r1], #0x04
352 #ifdef __ARMEB__
353         orr     r4, r4, r5, lsr #8
354         mov     r5, r5, lsl #24
355         orr     r5, r5, r6, lsr #8
356         mov     r6, r6, lsl #24
357         orr     r6, r6, r7, lsr #8
358         mov     r7, r7, lsl #24
359         orr     r7, r7, ip, lsr #8
360 #else
361         orr     r4, r4, r5, lsl #8
362         mov     r5, r5, lsr #24
363         orr     r5, r5, r6, lsl #8
364         mov     r6, r6, lsr #24
365         orr     r6, r6, r7, lsl #8
366         mov     r7, r7, lsr #24
367         orr     r7, r7, ip, lsl #8
368 #endif
369         str     r4, [r3], #0x04
370         str     r5, [r3], #0x04
371         str     r6, [r3], #0x04
372         str     r7, [r3], #0x04
373 .Lmemcpy_bad3:
374         subs    r2, r2, #0x10         
375         bge     .Lmemcpy_bad3_loop16
376
377         adds    r2, r2, #0x10         
378         ldmeqfd sp!, {r4-r7}
379         bxeq    lr                      /* Return now if done */
380         subs    r2, r2, #0x04
381         sublt   r1, r1, #0x01
382         blt     .Lmemcpy_bad_done
383
384 .Lmemcpy_bad3_loop4:
385 #ifdef __ARMEB__
386         mov     r4, ip, lsl #24
387 #else
388         mov     r4, ip, lsr #24
389 #endif
390         ldr     ip, [r1], #0x04
391         subs    r2, r2, #0x04
392 #ifdef __ARMEB__
393         orr     r4, r4, ip, lsr #8
394 #else
395         orr     r4, r4, ip, lsl #8
396 #endif
397         str     r4, [r3], #0x04
398         bge     .Lmemcpy_bad3_loop4
399         sub     r1, r1, #0x01
400
401 .Lmemcpy_bad_done:
402         ldmfd   sp!, {r4-r7}
403         adds    r2, r2, #0x04
404         bxeq    lr
405         ldrb    ip, [r1], #0x01
406         cmp     r2, #0x02
407         ldrgeb  r2, [r1], #0x01
408         strb    ip, [r3], #0x01
409         ldrgtb  ip, [r1]
410         strgeb  r2, [r3], #0x01
411         strgtb  ip, [r3]
412         bx      lr
413
414
415 /*
416  * Handle short copies (less than 16 bytes), possibly misaligned.
417  * Some of these are *very* common, thanks to the network stack,
418  * and so are handled specially.
419  */
420 .Lmemcpy_short:
421 #ifndef _STANDALONE
422         add     pc, pc, r2, lsl #2
423         nop
424         bx      lr                      /* 0x00 */
425         b       .Lmemcpy_bytewise       /* 0x01 */
426         b       .Lmemcpy_bytewise       /* 0x02 */
427         b       .Lmemcpy_bytewise       /* 0x03 */
428         b       .Lmemcpy_4              /* 0x04 */
429         b       .Lmemcpy_bytewise       /* 0x05 */
430         b       .Lmemcpy_6              /* 0x06 */
431         b       .Lmemcpy_bytewise       /* 0x07 */
432         b       .Lmemcpy_8              /* 0x08 */
433         b       .Lmemcpy_bytewise       /* 0x09 */
434         b       .Lmemcpy_bytewise       /* 0x0a */
435         b       .Lmemcpy_bytewise       /* 0x0b */
436         b       .Lmemcpy_c              /* 0x0c */
437 #endif
438 .Lmemcpy_bytewise:
439         mov     r3, r0                  /* We must not clobber r0 */
440         ldrb    ip, [r1], #0x01
441 1:      subs    r2, r2, #0x01
442         strb    ip, [r3], #0x01
443         ldrneb  ip, [r1], #0x01
444         bne     1b
445         bx      lr
446
447 #ifndef _STANDALONE
448 /******************************************************************************
449  * Special case for 4 byte copies
450  */
451 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
452 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
453         LMEMCPY_4_PAD
454 .Lmemcpy_4:
455         and     r2, r1, #0x03
456         orr     r2, r2, r0, lsl #2
457         ands    r2, r2, #0x0f
458         sub     r3, pc, #0x14
459         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
460
461 /*
462  * 0000: dst is 32-bit aligned, src is 32-bit aligned
463  */
464         ldr     r2, [r1]
465         str     r2, [r0]
466         bx      lr
467         LMEMCPY_4_PAD
468
469 /*
470  * 0001: dst is 32-bit aligned, src is 8-bit aligned
471  */
472         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
473         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
474 #ifdef __ARMEB__
475         mov     r3, r3, lsl #8          /* r3 = 012. */
476         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
477 #else
478         mov     r3, r3, lsr #8          /* r3 = .210 */
479         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
480 #endif
481         str     r3, [r0]
482         bx      lr
483         LMEMCPY_4_PAD
484
485 /*
486  * 0010: dst is 32-bit aligned, src is 16-bit aligned
487  */
488 #ifdef __ARMEB__
489         ldrh    r3, [r1]
490         ldrh    r2, [r1, #0x02]
491 #else
492         ldrh    r3, [r1, #0x02]
493         ldrh    r2, [r1]
494 #endif
495         orr     r3, r2, r3, lsl #16
496         str     r3, [r0]
497         bx      lr
498         LMEMCPY_4_PAD
499
500 /*
501  * 0011: dst is 32-bit aligned, src is 8-bit aligned
502  */
503         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
504         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
505 #ifdef __ARMEB__
506         mov     r3, r3, lsl #24         /* r3 = 0... */
507         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
508 #else
509         mov     r3, r3, lsr #24         /* r3 = ...0 */
510         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
511 #endif
512         str     r3, [r0]
513         bx      lr
514         LMEMCPY_4_PAD
515
516 /*
517  * 0100: dst is 8-bit aligned, src is 32-bit aligned
518  */
519         ldr     r2, [r1]
520 #ifdef __ARMEB__
521         strb    r2, [r0, #0x03]
522         mov     r3, r2, lsr #8
523         mov     r1, r2, lsr #24
524         strb    r1, [r0]
525 #else
526         strb    r2, [r0]
527         mov     r3, r2, lsr #8
528         mov     r1, r2, lsr #24
529         strb    r1, [r0, #0x03]
530 #endif
531         strh    r3, [r0, #0x01]
532         bx      lr
533         LMEMCPY_4_PAD
534
535 /*
536  * 0101: dst is 8-bit aligned, src is 8-bit aligned
537  */
538         ldrb    r2, [r1]
539         ldrh    r3, [r1, #0x01]
540         ldrb    r1, [r1, #0x03]
541         strb    r2, [r0]
542         strh    r3, [r0, #0x01]
543         strb    r1, [r0, #0x03]
544         bx      lr
545         LMEMCPY_4_PAD
546
547 /*
548  * 0110: dst is 8-bit aligned, src is 16-bit aligned
549  */
550         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
551         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
552 #ifdef __ARMEB__
553         mov     r1, r2, lsr #8          /* r1 = ...0 */
554         strb    r1, [r0]
555         mov     r2, r2, lsl #8          /* r2 = .01. */
556         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
557 #else
558         strb    r2, [r0]
559         mov     r2, r2, lsr #8          /* r2 = ...1 */
560         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
561         mov     r3, r3, lsr #8          /* r3 = ...3 */
562 #endif
563         strh    r2, [r0, #0x01]
564         strb    r3, [r0, #0x03]
565         bx      lr
566         LMEMCPY_4_PAD
567
568 /*
569  * 0111: dst is 8-bit aligned, src is 8-bit aligned
570  */
571         ldrb    r2, [r1]
572         ldrh    r3, [r1, #0x01]
573         ldrb    r1, [r1, #0x03]
574         strb    r2, [r0]
575         strh    r3, [r0, #0x01]
576         strb    r1, [r0, #0x03]
577         bx      lr
578         LMEMCPY_4_PAD
579
580 /*
581  * 1000: dst is 16-bit aligned, src is 32-bit aligned
582  */
583         ldr     r2, [r1]
584 #ifdef __ARMEB__
585         strh    r2, [r0, #0x02]
586         mov     r3, r2, lsr #16
587         strh    r3, [r0]
588 #else
589         strh    r2, [r0]
590         mov     r3, r2, lsr #16
591         strh    r3, [r0, #0x02]
592 #endif
593         bx       lr
594         LMEMCPY_4_PAD
595
596 /*
597  * 1001: dst is 16-bit aligned, src is 8-bit aligned
598  */
599         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
600         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
601         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
602         strh    r1, [r0]
603 #ifdef __ARMEB__
604         mov     r2, r2, lsl #8          /* r2 = 012. */
605         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
606 #else
607         mov     r2, r2, lsr #24         /* r2 = ...2 */
608         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
609 #endif
610         strh    r2, [r0, #0x02]
611         bx      lr
612         LMEMCPY_4_PAD
613
614 /*
615  * 1010: dst is 16-bit aligned, src is 16-bit aligned
616  */
617         ldrh    r2, [r1]
618         ldrh    r3, [r1, #0x02]
619         strh    r2, [r0]
620         strh    r3, [r0, #0x02]
621         bx      lr
622         LMEMCPY_4_PAD
623
624 /*
625  * 1011: dst is 16-bit aligned, src is 8-bit aligned
626  */
627         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
628         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
629         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
630         strh    r1, [r0, #0x02]
631 #ifdef __ARMEB__
632         mov     r3, r3, lsr #24         /* r3 = ...1 */
633         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
634 #else
635         mov     r3, r3, lsl #8          /* r3 = 321. */
636         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
637 #endif
638         strh    r3, [r0]
639         bx      lr
640         LMEMCPY_4_PAD
641
642 /*
643  * 1100: dst is 8-bit aligned, src is 32-bit aligned
644  */
645         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
646 #ifdef __ARMEB__
647         strb    r2, [r0, #0x03]
648         mov     r3, r2, lsr #8
649         mov     r1, r2, lsr #24
650         strh    r3, [r0, #0x01]
651         strb    r1, [r0]
652 #else
653         strb    r2, [r0]
654         mov     r3, r2, lsr #8
655         mov     r1, r2, lsr #24
656         strh    r3, [r0, #0x01]
657         strb    r1, [r0, #0x03]
658 #endif
659         bx      lr
660         LMEMCPY_4_PAD
661
662 /*
663  * 1101: dst is 8-bit aligned, src is 8-bit aligned
664  */
665         ldrb    r2, [r1]
666         ldrh    r3, [r1, #0x01]
667         ldrb    r1, [r1, #0x03]
668         strb    r2, [r0]
669         strh    r3, [r0, #0x01]
670         strb    r1, [r0, #0x03]
671         bx      lr
672         LMEMCPY_4_PAD
673
674 /*
675  * 1110: dst is 8-bit aligned, src is 16-bit aligned
676  */
677 #ifdef __ARMEB__
678         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
679         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
680         strb    r3, [r0, #0x03]
681         mov     r3, r3, lsr #8          /* r3 = ...2 */
682         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
683         strh    r3, [r0, #0x01]
684         mov     r2, r2, lsr #8          /* r2 = ...0 */
685         strb    r2, [r0]
686 #else
687         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
688         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
689         strb    r2, [r0]
690         mov     r2, r2, lsr #8          /* r2 = ...1 */
691         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
692         strh    r2, [r0, #0x01]
693         mov     r3, r3, lsr #8          /* r3 = ...3 */
694         strb    r3, [r0, #0x03]
695 #endif
696         bx      lr
697         LMEMCPY_4_PAD
698
699 /*
700  * 1111: dst is 8-bit aligned, src is 8-bit aligned
701  */
702         ldrb    r2, [r1]
703         ldrh    r3, [r1, #0x01]
704         ldrb    r1, [r1, #0x03]
705         strb    r2, [r0]
706         strh    r3, [r0, #0x01]
707         strb    r1, [r0, #0x03]
708         bx      lr
709         LMEMCPY_4_PAD
710
711
712 /******************************************************************************
713  * Special case for 6 byte copies
714  */
715 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
716 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
717         LMEMCPY_6_PAD
718 .Lmemcpy_6:
719         and     r2, r1, #0x03
720         orr     r2, r2, r0, lsl #2
721         ands    r2, r2, #0x0f
722         sub     r3, pc, #0x14
723         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
724
725 /*
726  * 0000: dst is 32-bit aligned, src is 32-bit aligned
727  */
728         ldr     r2, [r1]
729         ldrh    r3, [r1, #0x04]
730         str     r2, [r0]
731         strh    r3, [r0, #0x04]
732         bx      lr
733         LMEMCPY_6_PAD
734
735 /*
736  * 0001: dst is 32-bit aligned, src is 8-bit aligned
737  */
738         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
739         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
740 #ifdef __ARMEB__
741         mov     r2, r2, lsl #8          /* r2 = 012. */
742         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
743 #else
744         mov     r2, r2, lsr #8          /* r2 = .210 */
745         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
746 #endif
747         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
748         str     r2, [r0]
749         strh    r3, [r0, #0x04]
750         bx      lr
751         LMEMCPY_6_PAD
752
753 /*
754  * 0010: dst is 32-bit aligned, src is 16-bit aligned
755  */
756         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
757         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
758 #ifdef __ARMEB__
759         mov     r1, r3, lsr #16         /* r1 = ..23 */
760         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
761         str     r1, [r0]
762         strh    r3, [r0, #0x04]
763 #else
764         mov     r1, r3, lsr #16         /* r1 = ..54 */
765         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
766         str     r2, [r0]
767         strh    r1, [r0, #0x04]
768 #endif
769         bx      lr
770         LMEMCPY_6_PAD
771
772 /*
773  * 0011: dst is 32-bit aligned, src is 8-bit aligned
774  */
775         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
776         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
777         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
778 #ifdef __ARMEB__
779         mov     r2, r2, lsl #24         /* r2 = 0... */
780         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
781         mov     r3, r3, lsl #8          /* r3 = 234. */
782         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
783 #else
784         mov     r2, r2, lsr #24         /* r2 = ...0 */
785         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
786         mov     r1, r1, lsl #8          /* r1 = xx5. */
787         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
788 #endif
789         str     r2, [r0]
790         strh    r1, [r0, #0x04]
791         bx      lr
792         LMEMCPY_6_PAD
793
794 /*
795  * 0100: dst is 8-bit aligned, src is 32-bit aligned
796  */
797         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
798         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
799         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
800         strh    r1, [r0, #0x01]
801 #ifdef __ARMEB__
802         mov     r1, r3, lsr #24         /* r1 = ...0 */
803         strb    r1, [r0]
804         mov     r3, r3, lsl #8          /* r3 = 123. */
805         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
806 #else
807         strb    r3, [r0]
808         mov     r3, r3, lsr #24         /* r3 = ...3 */
809         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
810         mov     r2, r2, lsr #8          /* r2 = ...5 */
811 #endif
812         strh    r3, [r0, #0x03]
813         strb    r2, [r0, #0x05]
814         bx      lr
815         LMEMCPY_6_PAD
816
817 /*
818  * 0101: dst is 8-bit aligned, src is 8-bit aligned
819  */
820         ldrb    r2, [r1]
821         ldrh    r3, [r1, #0x01]
822         ldrh    ip, [r1, #0x03]
823         ldrb    r1, [r1, #0x05]
824         strb    r2, [r0]
825         strh    r3, [r0, #0x01]
826         strh    ip, [r0, #0x03]
827         strb    r1, [r0, #0x05]
828         bx      lr
829         LMEMCPY_6_PAD
830
831 /*
832  * 0110: dst is 8-bit aligned, src is 16-bit aligned
833  */
834         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
835         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
836 #ifdef __ARMEB__
837         mov     r3, r2, lsr #8          /* r3 = ...0 */
838         strb    r3, [r0]
839         strb    r1, [r0, #0x05]
840         mov     r3, r1, lsr #8          /* r3 = .234 */
841         strh    r3, [r0, #0x03]
842         mov     r3, r2, lsl #8          /* r3 = .01. */
843         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
844         strh    r3, [r0, #0x01]
845 #else
846         strb    r2, [r0]
847         mov     r3, r1, lsr #24
848         strb    r3, [r0, #0x05]
849         mov     r3, r1, lsr #8          /* r3 = .543 */
850         strh    r3, [r0, #0x03]
851         mov     r3, r2, lsr #8          /* r3 = ...1 */
852         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
853         strh    r3, [r0, #0x01]
854 #endif
855         bx      lr
856         LMEMCPY_6_PAD
857
858 /*
859  * 0111: dst is 8-bit aligned, src is 8-bit aligned
860  */
861         ldrb    r2, [r1]
862         ldrh    r3, [r1, #0x01]
863         ldrh    ip, [r1, #0x03]
864         ldrb    r1, [r1, #0x05]
865         strb    r2, [r0]
866         strh    r3, [r0, #0x01]
867         strh    ip, [r0, #0x03]
868         strb    r1, [r0, #0x05]
869         bx      lr
870         LMEMCPY_6_PAD
871
872 /*
873  * 1000: dst is 16-bit aligned, src is 32-bit aligned
874  */
875 #ifdef __ARMEB__
876         ldr     r2, [r1]                /* r2 = 0123 */
877         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
878         mov     r1, r2, lsr #16         /* r1 = ..01 */
879         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
880         strh    r1, [r0]
881         str     r3, [r0, #0x02]
882 #else
883         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
884         ldr     r3, [r1]                /* r3 = 3210 */
885         mov     r2, r2, lsl #16         /* r2 = 54.. */
886         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
887         strh    r3, [r0]
888         str     r2, [r0, #0x02]
889 #endif
890         bx      lr
891         LMEMCPY_6_PAD
892
893 /*
894  * 1001: dst is 16-bit aligned, src is 8-bit aligned
895  */
896         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
897         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
898         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
899 #ifdef __ARMEB__
900         mov     r2, r2, lsr #8          /* r2 = .345 */
901         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
902 #else
903         mov     r2, r2, lsl #8          /* r2 = 543. */
904         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
905 #endif
906         strh    r1, [r0]
907         str     r2, [r0, #0x02]
908         bx      lr
909         LMEMCPY_6_PAD
910
911 /*
912  * 1010: dst is 16-bit aligned, src is 16-bit aligned
913  */
914         ldrh    r2, [r1]
915         ldr     r3, [r1, #0x02]
916         strh    r2, [r0]
917         str     r3, [r0, #0x02]
918         bx      lr
919         LMEMCPY_6_PAD
920
921 /*
922  * 1011: dst is 16-bit aligned, src is 8-bit aligned
923  */
924         ldrb    r3, [r1]                /* r3 = ...0 */
925         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
926         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
927 #ifdef __ARMEB__
928         mov     r3, r3, lsl #8          /* r3 = ..0. */
929         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
930         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
931 #else
932         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
933         mov     r1, r1, lsl #24         /* r1 = 5... */
934         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
935 #endif
936         strh    r3, [r0]
937         str     r1, [r0, #0x02]
938         bx      lr
939         LMEMCPY_6_PAD
940
941 /*
942  * 1100: dst is 8-bit aligned, src is 32-bit aligned
943  */
944         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
945         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
946 #ifdef __ARMEB__
947         mov     r3, r2, lsr #24         /* r3 = ...0 */
948         strb    r3, [r0]
949         mov     r2, r2, lsl #8          /* r2 = 123. */
950         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
951 #else
952         strb    r2, [r0]
953         mov     r2, r2, lsr #8          /* r2 = .321 */
954         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
955         mov     r1, r1, lsr #8          /* r1 = ...5 */
956 #endif
957         str     r2, [r0, #0x01]
958         strb    r1, [r0, #0x05]
959         bx      lr
960         LMEMCPY_6_PAD
961
962 /*
963  * 1101: dst is 8-bit aligned, src is 8-bit aligned
964  */
965         ldrb    r2, [r1]
966         ldrh    r3, [r1, #0x01]
967         ldrh    ip, [r1, #0x03]
968         ldrb    r1, [r1, #0x05]
969         strb    r2, [r0]
970         strh    r3, [r0, #0x01]
971         strh    ip, [r0, #0x03]
972         strb    r1, [r0, #0x05]
973         bx      lr
974         LMEMCPY_6_PAD
975
976 /*
977  * 1110: dst is 8-bit aligned, src is 16-bit aligned
978  */
979         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
980         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
981 #ifdef __ARMEB__
982         mov     r3, r2, lsr #8          /* r3 = ...0 */
983         strb    r3, [r0]
984         mov     r2, r2, lsl #24         /* r2 = 1... */
985         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
986 #else
987         strb    r2, [r0]
988         mov     r2, r2, lsr #8          /* r2 = ...1 */
989         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
990         mov     r1, r1, lsr #24         /* r1 = ...5 */
991 #endif
992         str     r2, [r0, #0x01]
993         strb    r1, [r0, #0x05]
994         bx      lr
995         LMEMCPY_6_PAD
996
997 /*
998  * 1111: dst is 8-bit aligned, src is 8-bit aligned
999  */
1000         ldrb    r2, [r1]
1001         ldr     r3, [r1, #0x01]
1002         ldrb    r1, [r1, #0x05]
1003         strb    r2, [r0]
1004         str     r3, [r0, #0x01]
1005         strb    r1, [r0, #0x05]
1006         bx      lr
1007         LMEMCPY_6_PAD
1008
1009
1010 /******************************************************************************
1011  * Special case for 8 byte copies
1012  */
1013 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
1014 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
1015         LMEMCPY_8_PAD
1016 .Lmemcpy_8:
1017         and     r2, r1, #0x03
1018         orr     r2, r2, r0, lsl #2
1019         ands    r2, r2, #0x0f
1020         sub     r3, pc, #0x14
1021         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
1022
1023 /*
1024  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1025  */
1026         ldr     r2, [r1]
1027         ldr     r3, [r1, #0x04]
1028         str     r2, [r0]
1029         str     r3, [r0, #0x04]
1030         bx      lr
1031         LMEMCPY_8_PAD
1032
1033 /*
1034  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1035  */
1036         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1037         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
1038         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1039 #ifdef __ARMEB__
1040         mov     r3, r3, lsl #8          /* r3 = 012. */
1041         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
1042         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
1043 #else
1044         mov     r3, r3, lsr #8          /* r3 = .210 */
1045         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1046         mov     r1, r1, lsl #24         /* r1 = 7... */
1047         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
1048 #endif
1049         str     r3, [r0]
1050         str     r2, [r0, #0x04]
1051         bx      lr
1052         LMEMCPY_8_PAD
1053
1054 /*
1055  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1056  */
1057         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1058         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1059         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1060 #ifdef __ARMEB__
1061         mov     r2, r2, lsl #16         /* r2 = 01.. */
1062         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
1063         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
1064 #else
1065         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1066         mov     r3, r3, lsr #16         /* r3 = ..54 */
1067         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
1068 #endif
1069         str     r2, [r0]
1070         str     r3, [r0, #0x04]
1071         bx      lr
1072         LMEMCPY_8_PAD
1073
1074 /*
1075  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1076  */
1077         ldrb    r3, [r1]                /* r3 = ...0 */
1078         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1079         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
1080 #ifdef __ARMEB__
1081         mov     r3, r3, lsl #24         /* r3 = 0... */
1082         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
1083         mov     r2, r2, lsl #24         /* r2 = 4... */
1084         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
1085 #else
1086         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1087         mov     r2, r2, lsr #24         /* r2 = ...4 */
1088         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
1089 #endif
1090         str     r3, [r0]
1091         str     r2, [r0, #0x04]
1092         bx      lr
1093         LMEMCPY_8_PAD
1094
1095 /*
1096  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1097  */
1098         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1099         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
1100 #ifdef __ARMEB__
1101         mov     r1, r3, lsr #24         /* r1 = ...0 */
1102         strb    r1, [r0]
1103         mov     r1, r3, lsr #8          /* r1 = .012 */
1104         strb    r2, [r0, #0x07]
1105         mov     r3, r3, lsl #24         /* r3 = 3... */
1106         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
1107 #else
1108         strb    r3, [r0]
1109         mov     r1, r2, lsr #24         /* r1 = ...7 */
1110         strb    r1, [r0, #0x07]
1111         mov     r1, r3, lsr #8          /* r1 = .321 */
1112         mov     r3, r3, lsr #24         /* r3 = ...3 */
1113         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
1114 #endif
1115         strh    r1, [r0, #0x01]
1116         str     r3, [r0, #0x03]
1117         bx      lr
1118         LMEMCPY_8_PAD
1119
1120 /*
1121  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1122  */
1123         ldrb    r2, [r1]
1124         ldrh    r3, [r1, #0x01]
1125         ldr     ip, [r1, #0x03]
1126         ldrb    r1, [r1, #0x07]
1127         strb    r2, [r0]
1128         strh    r3, [r0, #0x01]
1129         str     ip, [r0, #0x03]
1130         strb    r1, [r0, #0x07]
1131         bx      lr
1132         LMEMCPY_8_PAD
1133
1134 /*
1135  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1136  */
1137         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1138         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1139         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1140 #ifdef __ARMEB__
1141         mov     ip, r2, lsr #8          /* ip = ...0 */
1142         strb    ip, [r0]
1143         mov     ip, r2, lsl #8          /* ip = .01. */
1144         orr     ip, ip, r3, lsr #24     /* ip = .012 */
1145         strb    r1, [r0, #0x07]
1146         mov     r3, r3, lsl #8          /* r3 = 345. */
1147         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
1148 #else
1149         strb    r2, [r0]                /* 0 */
1150         mov     ip, r1, lsr #8          /* ip = ...7 */
1151         strb    ip, [r0, #0x07]         /* 7 */
1152         mov     ip, r2, lsr #8          /* ip = ...1 */
1153         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1154         mov     r3, r3, lsr #8          /* r3 = .543 */
1155         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
1156 #endif
1157         strh    ip, [r0, #0x01]
1158         str     r3, [r0, #0x03]
1159         bx      lr
1160         LMEMCPY_8_PAD
1161
1162 /*
1163  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1164  */
1165         ldrb    r3, [r1]                /* r3 = ...0 */
1166         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1167         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
1168         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1169         strb    r3, [r0]
1170         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
1171 #ifdef __ARMEB__
1172         strh    r3, [r0, #0x01]
1173         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
1174 #else
1175         strh    ip, [r0, #0x01]
1176         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
1177 #endif
1178         str     r2, [r0, #0x03]
1179         strb    r1, [r0, #0x07]
1180         bx      lr
1181         LMEMCPY_8_PAD
1182
1183 /*
1184  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1185  */
1186         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1187         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1188         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1189 #ifdef __ARMEB__
1190         strh    r1, [r0]
1191         mov     r1, r3, lsr #16         /* r1 = ..45 */
1192         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
1193 #else
1194         strh    r2, [r0]
1195         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
1196         mov     r3, r3, lsr #16         /* r3 = ..76 */
1197 #endif
1198         str     r2, [r0, #0x02]
1199         strh    r3, [r0, #0x06]
1200         bx      lr
1201         LMEMCPY_8_PAD
1202
1203 /*
1204  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1205  */
1206         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1207         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1208         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
1209         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1210         strh    r1, [r0]
1211 #ifdef __ARMEB__
1212         mov     r1, r2, lsl #24         /* r1 = 2... */
1213         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
1214         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
1215 #else
1216         mov     r1, r2, lsr #24         /* r1 = ...2 */
1217         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
1218         mov     r3, r3, lsr #24         /* r3 = ...6 */
1219         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
1220 #endif
1221         str     r1, [r0, #0x02]
1222         strh    r3, [r0, #0x06]
1223         bx      lr
1224         LMEMCPY_8_PAD
1225
1226 /*
1227  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1228  */
1229         ldrh    r2, [r1]
1230         ldr     ip, [r1, #0x02]
1231         ldrh    r3, [r1, #0x06]
1232         strh    r2, [r0]
1233         str     ip, [r0, #0x02]
1234         strh    r3, [r0, #0x06]
1235         bx      lr
1236         LMEMCPY_8_PAD
1237
1238 /*
1239  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1240  */
1241         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
1242         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1243         ldrb    ip, [r1]                /* ip = ...0 */
1244         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
1245         strh    r1, [r0, #0x06]
1246 #ifdef __ARMEB__
1247         mov     r3, r3, lsr #24         /* r3 = ...5 */
1248         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
1249         mov     r2, r2, lsr #24         /* r2 = ...1 */
1250         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
1251 #else
1252         mov     r3, r3, lsl #24         /* r3 = 5... */
1253         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
1254         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
1255 #endif
1256         str     r3, [r0, #0x02]
1257         strh    r2, [r0]
1258         bx      lr
1259         LMEMCPY_8_PAD
1260
1261 /*
1262  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1263  */
1264         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1265         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1266         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
1267         strh    r1, [r0, #0x05]
1268 #ifdef __ARMEB__
1269         strb    r3, [r0, #0x07]
1270         mov     r1, r2, lsr #24         /* r1 = ...0 */
1271         strb    r1, [r0]
1272         mov     r2, r2, lsl #8          /* r2 = 123. */
1273         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
1274         str     r2, [r0, #0x01]
1275 #else
1276         strb    r2, [r0]
1277         mov     r1, r3, lsr #24         /* r1 = ...7 */
1278         strb    r1, [r0, #0x07]
1279         mov     r2, r2, lsr #8          /* r2 = .321 */
1280         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
1281         str     r2, [r0, #0x01]
1282 #endif
1283         bx       lr
1284         LMEMCPY_8_PAD
1285
1286 /*
1287  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1288  */
1289         ldrb    r3, [r1]                /* r3 = ...0 */
1290         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
1291         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1292         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1293         strb    r3, [r0]
1294         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
1295 #ifdef __ARMEB__
1296         strh    ip, [r0, #0x05]
1297         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
1298 #else
1299         strh    r3, [r0, #0x05]
1300         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
1301 #endif
1302         str     r2, [r0, #0x01]
1303         strb    r1, [r0, #0x07]
1304         bx      lr
1305         LMEMCPY_8_PAD
1306
1307 /*
1308  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1309  */
1310         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1311         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1312         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1313 #ifdef __ARMEB__
1314         mov     ip, r2, lsr #8          /* ip = ...0 */
1315         strb    ip, [r0]
1316         mov     ip, r2, lsl #24         /* ip = 1... */
1317         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
1318         strb    r1, [r0, #0x07]
1319         mov     r1, r1, lsr #8          /* r1 = ...6 */
1320         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
1321 #else
1322         strb    r2, [r0]
1323         mov     ip, r2, lsr #8          /* ip = ...1 */
1324         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1325         mov     r2, r1, lsr #8          /* r2 = ...7 */
1326         strb    r2, [r0, #0x07]
1327         mov     r1, r1, lsl #8          /* r1 = .76. */
1328         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
1329 #endif
1330         str     ip, [r0, #0x01]
1331         strh    r1, [r0, #0x05]
1332         bx      lr
1333         LMEMCPY_8_PAD
1334
1335 /*
1336  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1337  */
1338         ldrb    r2, [r1]
1339         ldr     ip, [r1, #0x01]
1340         ldrh    r3, [r1, #0x05]
1341         ldrb    r1, [r1, #0x07]
1342         strb    r2, [r0]
1343         str     ip, [r0, #0x01]
1344         strh    r3, [r0, #0x05]
1345         strb    r1, [r0, #0x07]
1346         bx      lr
1347         LMEMCPY_8_PAD
1348
1349 /******************************************************************************
1350  * Special case for 12 byte copies
1351  */
1352 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
1353 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
1354         LMEMCPY_C_PAD
1355 .Lmemcpy_c:
1356         and     r2, r1, #0x03
1357         orr     r2, r2, r0, lsl #2
1358         ands    r2, r2, #0x0f
1359         sub     r3, pc, #0x14
1360         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
1361
1362 /*
1363  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1364  */
1365         ldr     r2, [r1]
1366         ldr     r3, [r1, #0x04]
1367         ldr     r1, [r1, #0x08]
1368         str     r2, [r0]
1369         str     r3, [r0, #0x04]
1370         str     r1, [r0, #0x08]
1371         bx      lr
1372         LMEMCPY_C_PAD
1373
1374 /*
1375  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1376  */
1377         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
1378         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1379         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1380         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1381 #ifdef __ARMEB__
1382         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
1383         str     r2, [r0, #0x08]
1384         mov     r2, ip, lsr #24         /* r2 = ...7 */
1385         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
1386         mov     r1, r1, lsl #8          /* r1 = 012. */
1387         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
1388 #else
1389         mov     r2, r2, lsl #24         /* r2 = B... */
1390         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
1391         str     r2, [r0, #0x08]
1392         mov     r2, ip, lsl #24         /* r2 = 7... */
1393         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
1394         mov     r1, r1, lsr #8          /* r1 = .210 */
1395         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
1396 #endif
1397         str     r2, [r0, #0x04]
1398         str     r1, [r0]
1399         bx      lr
1400         LMEMCPY_C_PAD
1401
1402 /*
1403  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1404  */
1405         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1406         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1407         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1408         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1409 #ifdef __ARMEB__
1410         mov     r2, r2, lsl #16         /* r2 = 01.. */
1411         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
1412         str     r2, [r0]
1413         mov     r3, r3, lsl #16         /* r3 = 45.. */
1414         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
1415         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
1416 #else
1417         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1418         str     r2, [r0]
1419         mov     r3, r3, lsr #16         /* r3 = ..54 */
1420         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
1421         mov     r1, r1, lsl #16         /* r1 = BA.. */
1422         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
1423 #endif
1424         str     r3, [r0, #0x04]
1425         str     r1, [r0, #0x08]
1426         bx      lr
1427         LMEMCPY_C_PAD
1428
1429 /*
1430  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1431  */
1432         ldrb    r2, [r1]                /* r2 = ...0 */
1433         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1434         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1435         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1436 #ifdef __ARMEB__
1437         mov     r2, r2, lsl #24         /* r2 = 0... */
1438         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
1439         str     r2, [r0]
1440         mov     r3, r3, lsl #24         /* r3 = 4... */
1441         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
1442         mov     r1, r1, lsr #8          /* r1 = .9AB */
1443         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
1444 #else
1445         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1446         str     r2, [r0]
1447         mov     r3, r3, lsr #24         /* r3 = ...4 */
1448         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
1449         mov     r1, r1, lsl #8          /* r1 = BA9. */
1450         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
1451 #endif
1452         str     r3, [r0, #0x04]
1453         str     r1, [r0, #0x08]
1454         bx      lr
1455         LMEMCPY_C_PAD
1456
1457 /*
1458  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1459  */
1460         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1461         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1462         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
1463         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1464         strh    r1, [r0, #0x01]
1465 #ifdef __ARMEB__
1466         mov     r1, r2, lsr #24         /* r1 = ...0 */
1467         strb    r1, [r0]
1468         mov     r1, r2, lsl #24         /* r1 = 3... */
1469         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
1470         mov     r1, r3, lsl #24         /* r1 = 7... */
1471         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
1472 #else
1473         strb    r2, [r0]
1474         mov     r1, r2, lsr #24         /* r1 = ...3 */
1475         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
1476         mov     r1, r3, lsr #24         /* r1 = ...7 */
1477         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
1478         mov     ip, ip, lsr #24         /* ip = ...B */
1479 #endif
1480         str     r2, [r0, #0x03]
1481         str     r1, [r0, #0x07]
1482         strb    ip, [r0, #0x0b]
1483         bx      lr
1484         LMEMCPY_C_PAD
1485
1486 /*
1487  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1488  */
1489         ldrb    r2, [r1]
1490         ldrh    r3, [r1, #0x01]
1491         ldr     ip, [r1, #0x03]
1492         strb    r2, [r0]
1493         ldr     r2, [r1, #0x07]
1494         ldrb    r1, [r1, #0x0b]
1495         strh    r3, [r0, #0x01]
1496         str     ip, [r0, #0x03]
1497         str     r2, [r0, #0x07]
1498         strb    r1, [r0, #0x0b]
1499         bx      lr
1500         LMEMCPY_C_PAD
1501
1502 /*
1503  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1504  */
1505         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1506         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1507         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1508         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1509 #ifdef __ARMEB__
1510         mov     r2, r2, ror #8          /* r2 = 1..0 */
1511         strb    r2, [r0]
1512         mov     r2, r2, lsr #16         /* r2 = ..1. */
1513         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
1514         strh    r2, [r0, #0x01]
1515         mov     r2, r3, lsl #8          /* r2 = 345. */
1516         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
1517         mov     r2, ip, lsl #8          /* r2 = 789. */
1518         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
1519 #else
1520         strb    r2, [r0]
1521         mov     r2, r2, lsr #8          /* r2 = ...1 */
1522         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1523         strh    r2, [r0, #0x01]
1524         mov     r2, r3, lsr #8          /* r2 = .543 */
1525         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
1526         mov     r2, ip, lsr #8          /* r2 = .987 */
1527         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
1528         mov     r1, r1, lsr #8          /* r1 = ...B */
1529 #endif
1530         str     r3, [r0, #0x03]
1531         str     r2, [r0, #0x07]
1532         strb    r1, [r0, #0x0b]
1533         bx      lr
1534         LMEMCPY_C_PAD
1535
1536 /*
1537  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1538  */
1539         ldrb    r2, [r1]
1540         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1541         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1542         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1543         strb    r2, [r0]
1544 #ifdef __ARMEB__
1545         mov     r2, r3, lsr #16         /* r2 = ..12 */
1546         strh    r2, [r0, #0x01]
1547         mov     r3, r3, lsl #16         /* r3 = 34.. */
1548         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
1549         mov     ip, ip, lsl #16         /* ip = 78.. */
1550         orr     ip, ip, r1, lsr #16     /* ip = 789A */
1551         mov     r1, r1, lsr #8          /* r1 = .9AB */
1552 #else
1553         strh    r3, [r0, #0x01]
1554         mov     r3, r3, lsr #16         /* r3 = ..43 */
1555         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
1556         mov     ip, ip, lsr #16         /* ip = ..87 */
1557         orr     ip, ip, r1, lsl #16     /* ip = A987 */
1558         mov     r1, r1, lsr #16         /* r1 = ..xB */
1559 #endif
1560         str     r3, [r0, #0x03]
1561         str     ip, [r0, #0x07]
1562         strb    r1, [r0, #0x0b]
1563         bx      lr
1564         LMEMCPY_C_PAD
1565
1566 /*
1567  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1568  */
1569         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
1570         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1571         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
1572         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1573 #ifdef __ARMEB__
1574         strh    r1, [r0]
1575         mov     r1, ip, lsl #16         /* r1 = 23.. */
1576         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
1577         mov     r3, r3, lsl #16         /* r3 = 67.. */
1578         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
1579 #else
1580         strh    ip, [r0]
1581         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
1582         mov     r3, r3, lsr #16         /* r3 = ..76 */
1583         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
1584         mov     r2, r2, lsr #16         /* r2 = ..BA */
1585 #endif
1586         str     r1, [r0, #0x02]
1587         str     r3, [r0, #0x06]
1588         strh    r2, [r0, #0x0a]
1589         bx      lr
1590         LMEMCPY_C_PAD
1591
1592 /*
1593  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1594  */
1595         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1596         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1597         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
1598         strh    ip, [r0]
1599         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1600         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
1601 #ifdef __ARMEB__
1602         mov     r2, r2, lsl #24         /* r2 = 2... */
1603         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
1604         mov     r3, r3, lsl #24         /* r3 = 6... */
1605         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
1606         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
1607 #else
1608         mov     r2, r2, lsr #24         /* r2 = ...2 */
1609         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
1610         mov     r3, r3, lsr #24         /* r3 = ...6 */
1611         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
1612         mov     r1, r1, lsl #8          /* r1 = ..B. */
1613         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
1614 #endif
1615         str     r2, [r0, #0x02]
1616         str     r3, [r0, #0x06]
1617         strh    r1, [r0, #0x0a]
1618         bx      lr
1619         LMEMCPY_C_PAD
1620
1621 /*
1622  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1623  */
1624         ldrh    r2, [r1]
1625         ldr     r3, [r1, #0x02]
1626         ldr     ip, [r1, #0x06]
1627         ldrh    r1, [r1, #0x0a]
1628         strh    r2, [r0]
1629         str     r3, [r0, #0x02]
1630         str     ip, [r0, #0x06]
1631         strh    r1, [r0, #0x0a]
1632         bx      lr
1633         LMEMCPY_C_PAD
1634
1635 /*
1636  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1637  */
1638         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
1639         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
1640         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
1641         strh    ip, [r0, #0x0a]
1642         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1643         ldrb    r1, [r1]                /* r1 = ...0 */
1644 #ifdef __ARMEB__
1645         mov     r2, r2, lsr #24         /* r2 = ...9 */
1646         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
1647         mov     r3, r3, lsr #24         /* r3 = ...5 */
1648         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
1649         mov     r1, r1, lsl #8          /* r1 = ..0. */
1650         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
1651 #else
1652         mov     r2, r2, lsl #24         /* r2 = 9... */
1653         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
1654         mov     r3, r3, lsl #24         /* r3 = 5... */
1655         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
1656         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
1657 #endif
1658         str     r2, [r0, #0x06]
1659         str     r3, [r0, #0x02]
1660         strh    r1, [r0]
1661         bx      lr
1662         LMEMCPY_C_PAD
1663
1664 /*
1665  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1666  */
1667         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1668         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
1669         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
1670 #ifdef __ARMEB__
1671         mov     r3, r2, lsr #24         /* r3 = ...0 */
1672         strb    r3, [r0]
1673         mov     r2, r2, lsl #8          /* r2 = 123. */
1674         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
1675         str     r2, [r0, #0x01]
1676         mov     r2, ip, lsl #8          /* r2 = 567. */
1677         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
1678         str     r2, [r0, #0x05]
1679         mov     r2, r1, lsr #8          /* r2 = ..9A */
1680         strh    r2, [r0, #0x09]
1681         strb    r1, [r0, #0x0b]
1682 #else
1683         strb    r2, [r0]
1684         mov     r3, r2, lsr #8          /* r3 = .321 */
1685         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
1686         str     r3, [r0, #0x01]
1687         mov     r3, ip, lsr #8          /* r3 = .765 */
1688         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
1689         str     r3, [r0, #0x05]
1690         mov     r1, r1, lsr #8          /* r1 = .BA9 */
1691         strh    r1, [r0, #0x09]
1692         mov     r1, r1, lsr #16         /* r1 = ...B */
1693         strb    r1, [r0, #0x0b]
1694 #endif
1695         bx      lr
1696         LMEMCPY_C_PAD
1697
1698 /*
1699  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1700  */
1701         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
1702         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
1703         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1704         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1705         strb    r2, [r0, #0x0b]
1706 #ifdef __ARMEB__
1707         strh    r3, [r0, #0x09]
1708         mov     r3, r3, lsr #16         /* r3 = ..78 */
1709         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
1710         mov     ip, ip, lsr #16         /* ip = ..34 */
1711         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
1712         mov     r1, r1, lsr #16         /* r1 = ..x0 */
1713 #else
1714         mov     r2, r3, lsr #16         /* r2 = ..A9 */
1715         strh    r2, [r0, #0x09]
1716         mov     r3, r3, lsl #16         /* r3 = 87.. */
1717         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
1718         mov     ip, ip, lsl #16         /* ip = 43.. */
1719         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
1720         mov     r1, r1, lsr #8          /* r1 = .210 */
1721 #endif
1722         str     r3, [r0, #0x05]
1723         str     ip, [r0, #0x01]
1724         strb    r1, [r0]
1725         bx      lr
1726         LMEMCPY_C_PAD
1727
1728 /*
1729  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1730  */
1731 #ifdef __ARMEB__
1732         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
1733         ldr     ip, [r1, #0x06]         /* ip = 6789 */
1734         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
1735         ldrh    r1, [r1]                /* r1 = ..01 */
1736         strb    r2, [r0, #0x0b]
1737         mov     r2, r2, lsr #8          /* r2 = ...A */
1738         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
1739         mov     ip, ip, lsr #8          /* ip = .678 */
1740         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
1741         mov     r3, r3, lsr #8          /* r3 = .234 */
1742         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
1743         mov     r1, r1, lsr #8          /* r1 = ...0 */
1744         strb    r1, [r0]
1745         str     r3, [r0, #0x01]
1746         str     ip, [r0, #0x05]
1747         strh    r2, [r0, #0x09]
1748 #else
1749         ldrh    r2, [r1]                /* r2 = ..10 */
1750         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
1751         ldr     ip, [r1, #0x06]         /* ip = 9876 */
1752         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
1753         strb    r2, [r0]
1754         mov     r2, r2, lsr #8          /* r2 = ...1 */
1755         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1756         mov     r3, r3, lsr #24         /* r3 = ...5 */
1757         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
1758         mov     ip, ip, lsr #24         /* ip = ...9 */
1759         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
1760         mov     r1, r1, lsr #8          /* r1 = ...B */
1761         str     r2, [r0, #0x01]
1762         str     r3, [r0, #0x05]
1763         strh    ip, [r0, #0x09]
1764         strb    r1, [r0, #0x0b]
1765 #endif
1766         bx      lr
1767         LMEMCPY_C_PAD
1768
1769 /*
1770  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1771  */
1772         ldrb    r2, [r1]
1773         ldr     r3, [r1, #0x01]
1774         ldr     ip, [r1, #0x05]
1775         strb    r2, [r0]
1776         ldrh    r2, [r1, #0x09]
1777         ldrb    r1, [r1, #0x0b]
1778         str     r3, [r0, #0x01]
1779         str     ip, [r0, #0x05]
1780         strh    r2, [r0, #0x09]
1781         strb    r1, [r0, #0x0b]
1782         bx      lr
1783 #endif  /* !_STANDALONE */