]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - lib/libc/arm/string/memcpy_xscale.S
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / lib / libc / arm / string / memcpy_xscale.S
1 /*      $NetBSD: memcpy_xscale.S,v 1.1 2003/10/14 07:51:45 scw Exp $    */
2
3 /*
4  * Copyright 2003 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Steve C. Woodford for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
40
41 .syntax unified
42
43 /* LINTSTUB: Func: void *memcpy(void *dst, const void *src, size_t len) */
44 ENTRY(memcpy)
45         pld     [r1]
46         cmp     r2, #0x0c
47         ble     .Lmemcpy_short          /* <= 12 bytes */
48         mov     r3, r0                  /* We must not clobber r0 */
49
50         /* Word-align the destination buffer */
51         ands    ip, r3, #0x03           /* Already word aligned? */
52         beq     .Lmemcpy_wordaligned    /* Yup */
53         cmp     ip, #0x02
54         ldrb    ip, [r1], #0x01
55         sub     r2, r2, #0x01
56         strb    ip, [r3], #0x01
57         ldrble  ip, [r1], #0x01
58         suble   r2, r2, #0x01
59         strble  ip, [r3], #0x01
60         ldrblt  ip, [r1], #0x01
61         sublt   r2, r2, #0x01
62         strblt  ip, [r3], #0x01
63
64         /* Destination buffer is now word aligned */
65 .Lmemcpy_wordaligned:
66         ands    ip, r1, #0x03           /* Is src also word-aligned? */
67         bne     .Lmemcpy_bad_align      /* Nope. Things just got bad */
68
69         /* Quad-align the destination buffer */
70         tst     r3, #0x07               /* Already quad aligned? */
71         ldrne   ip, [r1], #0x04
72         stmfd   sp!, {r4-r9}            /* Free up some registers */
73         subne   r2, r2, #0x04
74         strne   ip, [r3], #0x04
75
76         /* Destination buffer quad aligned, source is at least word aligned */
77         subs    r2, r2, #0x80
78         blt     .Lmemcpy_w_lessthan128
79
80         /* Copy 128 bytes at a time */
81 .Lmemcpy_w_loop128:
82         ldr     r4, [r1], #0x04         /* LD:00-03 */
83         ldr     r5, [r1], #0x04         /* LD:04-07 */
84         pld     [r1, #0x18]             /* Prefetch 0x20 */
85         ldr     r6, [r1], #0x04         /* LD:08-0b */
86         ldr     r7, [r1], #0x04         /* LD:0c-0f */
87         ldr     r8, [r1], #0x04         /* LD:10-13 */
88         ldr     r9, [r1], #0x04         /* LD:14-17 */
89         strd    r4, [r3], #0x08         /* ST:00-07 */
90         ldr     r4, [r1], #0x04         /* LD:18-1b */
91         ldr     r5, [r1], #0x04         /* LD:1c-1f */
92         strd    r6, [r3], #0x08         /* ST:08-0f */
93         ldr     r6, [r1], #0x04         /* LD:20-23 */
94         ldr     r7, [r1], #0x04         /* LD:24-27 */
95         pld     [r1, #0x18]             /* Prefetch 0x40 */
96         strd    r8, [r3], #0x08         /* ST:10-17 */
97         ldr     r8, [r1], #0x04         /* LD:28-2b */
98         ldr     r9, [r1], #0x04         /* LD:2c-2f */
99         strd    r4, [r3], #0x08         /* ST:18-1f */
100         ldr     r4, [r1], #0x04         /* LD:30-33 */
101         ldr     r5, [r1], #0x04         /* LD:34-37 */
102         strd    r6, [r3], #0x08         /* ST:20-27 */
103         ldr     r6, [r1], #0x04         /* LD:38-3b */
104         ldr     r7, [r1], #0x04         /* LD:3c-3f */
105         strd    r8, [r3], #0x08         /* ST:28-2f */
106         ldr     r8, [r1], #0x04         /* LD:40-43 */
107         ldr     r9, [r1], #0x04         /* LD:44-47 */
108         pld     [r1, #0x18]             /* Prefetch 0x60 */
109         strd    r4, [r3], #0x08         /* ST:30-37 */
110         ldr     r4, [r1], #0x04         /* LD:48-4b */
111         ldr     r5, [r1], #0x04         /* LD:4c-4f */
112         strd    r6, [r3], #0x08         /* ST:38-3f */
113         ldr     r6, [r1], #0x04         /* LD:50-53 */
114         ldr     r7, [r1], #0x04         /* LD:54-57 */
115         strd    r8, [r3], #0x08         /* ST:40-47 */
116         ldr     r8, [r1], #0x04         /* LD:58-5b */
117         ldr     r9, [r1], #0x04         /* LD:5c-5f */
118         strd    r4, [r3], #0x08         /* ST:48-4f */
119         ldr     r4, [r1], #0x04         /* LD:60-63 */
120         ldr     r5, [r1], #0x04         /* LD:64-67 */
121         pld     [r1, #0x18]             /* Prefetch 0x80 */
122         strd    r6, [r3], #0x08         /* ST:50-57 */
123         ldr     r6, [r1], #0x04         /* LD:68-6b */
124         ldr     r7, [r1], #0x04         /* LD:6c-6f */
125         strd    r8, [r3], #0x08         /* ST:58-5f */
126         ldr     r8, [r1], #0x04         /* LD:70-73 */
127         ldr     r9, [r1], #0x04         /* LD:74-77 */
128         strd    r4, [r3], #0x08         /* ST:60-67 */
129         ldr     r4, [r1], #0x04         /* LD:78-7b */
130         ldr     r5, [r1], #0x04         /* LD:7c-7f */
131         strd    r6, [r3], #0x08         /* ST:68-6f */
132         strd    r8, [r3], #0x08         /* ST:70-77 */
133         subs    r2, r2, #0x80
134         strd    r4, [r3], #0x08         /* ST:78-7f */
135         bge     .Lmemcpy_w_loop128
136
137 .Lmemcpy_w_lessthan128:
138         adds    r2, r2, #0x80           /* Adjust for extra sub */
139         ldmfdeq sp!, {r4-r9}
140         bxeq    lr                      /* Return now if done */
141         subs    r2, r2, #0x20
142         blt     .Lmemcpy_w_lessthan32
143
144         /* Copy 32 bytes at a time */
145 .Lmemcpy_w_loop32:
146         ldr     r4, [r1], #0x04
147         ldr     r5, [r1], #0x04
148         pld     [r1, #0x18]
149         ldr     r6, [r1], #0x04
150         ldr     r7, [r1], #0x04
151         ldr     r8, [r1], #0x04
152         ldr     r9, [r1], #0x04
153         strd    r4, [r3], #0x08
154         ldr     r4, [r1], #0x04
155         ldr     r5, [r1], #0x04
156         strd    r6, [r3], #0x08
157         strd    r8, [r3], #0x08
158         subs    r2, r2, #0x20
159         strd    r4, [r3], #0x08
160         bge     .Lmemcpy_w_loop32
161
162 .Lmemcpy_w_lessthan32:
163         adds    r2, r2, #0x20           /* Adjust for extra sub */
164         ldmfdeq sp!, {r4-r9}
165         bxeq    lr                      /* Return now if done */
166
167         and     r4, r2, #0x18
168         rsbs    r4, r4, #0x18
169         addne   pc, pc, r4, lsl #1
170         nop
171
172         /* At least 24 bytes remaining */
173         ldr     r4, [r1], #0x04
174         ldr     r5, [r1], #0x04
175         sub     r2, r2, #0x08
176         strd    r4, [r3], #0x08
177
178         /* At least 16 bytes remaining */
179         ldr     r4, [r1], #0x04
180         ldr     r5, [r1], #0x04
181         sub     r2, r2, #0x08
182         strd    r4, [r3], #0x08
183
184         /* At least 8 bytes remaining */
185         ldr     r4, [r1], #0x04
186         ldr     r5, [r1], #0x04
187         subs    r2, r2, #0x08
188         strd    r4, [r3], #0x08
189
190         /* Less than 8 bytes remaining */
191         ldmfd   sp!, {r4-r9}
192         bxeq    lr                      /* Return now if done */
193         subs    r2, r2, #0x04
194         ldrge   ip, [r1], #0x04
195         strge   ip, [r3], #0x04
196         bxeq    lr                      /* Return now if done */
197         addlt   r2, r2, #0x04
198         ldrb    ip, [r1], #0x01
199         cmp     r2, #0x02
200         ldrbge  r2, [r1], #0x01
201         strb    ip, [r3], #0x01
202         ldrbgt  ip, [r1]
203         strbge  r2, [r3], #0x01
204         strbgt  ip, [r3]
205         bx      lr
206
207
208 /*
209  * At this point, it has not been possible to word align both buffers.
210  * The destination buffer is word aligned, but the source buffer is not.
211  */
212 .Lmemcpy_bad_align:
213         stmfd   sp!, {r4-r7}
214         bic     r1, r1, #0x03
215         cmp     ip, #2
216         ldr     ip, [r1], #0x04
217         bgt     .Lmemcpy_bad3
218         beq     .Lmemcpy_bad2
219         b       .Lmemcpy_bad1
220
221 .Lmemcpy_bad1_loop16:
222 #ifdef __ARMEB__
223         mov     r4, ip, lsl #8
224 #else
225         mov     r4, ip, lsr #8
226 #endif
227         ldr     r5, [r1], #0x04
228         pld     [r1, #0x018]
229         ldr     r6, [r1], #0x04
230         ldr     r7, [r1], #0x04
231         ldr     ip, [r1], #0x04
232 #ifdef __ARMEB__
233         orr     r4, r4, r5, lsr #24
234         mov     r5, r5, lsl #8
235         orr     r5, r5, r6, lsr #24
236         mov     r6, r6, lsl #8
237         orr     r6, r6, r7, lsr #24
238         mov     r7, r7, lsl #8
239         orr     r7, r7, ip, lsr #24
240 #else
241         orr     r4, r4, r5, lsl #24
242         mov     r5, r5, lsr #8
243         orr     r5, r5, r6, lsl #24
244         mov     r6, r6, lsr #8
245         orr     r6, r6, r7, lsl #24
246         mov     r7, r7, lsr #8
247         orr     r7, r7, ip, lsl #24
248 #endif
249         str     r4, [r3], #0x04
250         str     r5, [r3], #0x04
251         str     r6, [r3], #0x04
252         str     r7, [r3], #0x04
253 .Lmemcpy_bad1:
254         subs    r2, r2, #0x10         
255         bge     .Lmemcpy_bad1_loop16
256
257         adds    r2, r2, #0x10         
258         ldmfdeq sp!, {r4-r7}
259         bxeq    lr                      /* Return now if done */
260         subs    r2, r2, #0x04
261         sublt   r1, r1, #0x03
262         blt     .Lmemcpy_bad_done
263
264 .Lmemcpy_bad1_loop4:
265 #ifdef __ARMEB__
266         mov     r4, ip, lsl #8
267 #else
268         mov     r4, ip, lsr #8
269 #endif
270         ldr     ip, [r1], #0x04
271         subs    r2, r2, #0x04
272 #ifdef __ARMEB__
273         orr     r4, r4, ip, lsr #24
274 #else
275         orr     r4, r4, ip, lsl #24
276 #endif
277         str     r4, [r3], #0x04
278         bge     .Lmemcpy_bad1_loop4
279         sub     r1, r1, #0x03
280         b       .Lmemcpy_bad_done
281
282 .Lmemcpy_bad2_loop16:
283 #ifdef __ARMEB__
284         mov     r4, ip, lsl #16
285 #else
286         mov     r4, ip, lsr #16
287 #endif
288         ldr     r5, [r1], #0x04
289         pld     [r1, #0x018]
290         ldr     r6, [r1], #0x04
291         ldr     r7, [r1], #0x04
292         ldr     ip, [r1], #0x04
293 #ifdef __ARMEB__
294         orr     r4, r4, r5, lsr #16
295         mov     r5, r5, lsl #16
296         orr     r5, r5, r6, lsr #16
297         mov     r6, r6, lsl #16
298         orr     r6, r6, r7, lsr #16
299         mov     r7, r7, lsl #16
300         orr     r7, r7, ip, lsr #16
301 #else
302         orr     r4, r4, r5, lsl #16
303         mov     r5, r5, lsr #16
304         orr     r5, r5, r6, lsl #16
305         mov     r6, r6, lsr #16
306         orr     r6, r6, r7, lsl #16
307         mov     r7, r7, lsr #16
308         orr     r7, r7, ip, lsl #16
309 #endif
310         str     r4, [r3], #0x04
311         str     r5, [r3], #0x04
312         str     r6, [r3], #0x04
313         str     r7, [r3], #0x04
314 .Lmemcpy_bad2:
315         subs    r2, r2, #0x10         
316         bge     .Lmemcpy_bad2_loop16
317
318         adds    r2, r2, #0x10         
319         ldmfdeq sp!, {r4-r7}
320         bxeq    lr                      /* Return now if done */
321         subs    r2, r2, #0x04
322         sublt   r1, r1, #0x02
323         blt     .Lmemcpy_bad_done
324
325 .Lmemcpy_bad2_loop4:
326 #ifdef __ARMEB__
327         mov     r4, ip, lsl #16
328 #else
329         mov     r4, ip, lsr #16
330 #endif
331         ldr     ip, [r1], #0x04
332         subs    r2, r2, #0x04
333 #ifdef __ARMEB__
334         orr     r4, r4, ip, lsr #16
335 #else
336         orr     r4, r4, ip, lsl #16
337 #endif
338         str     r4, [r3], #0x04
339         bge     .Lmemcpy_bad2_loop4
340         sub     r1, r1, #0x02
341         b       .Lmemcpy_bad_done
342
343 .Lmemcpy_bad3_loop16:
344 #ifdef __ARMEB__
345         mov     r4, ip, lsl #24
346 #else
347         mov     r4, ip, lsr #24
348 #endif
349         ldr     r5, [r1], #0x04
350         pld     [r1, #0x018]
351         ldr     r6, [r1], #0x04
352         ldr     r7, [r1], #0x04
353         ldr     ip, [r1], #0x04
354 #ifdef __ARMEB__
355         orr     r4, r4, r5, lsr #8
356         mov     r5, r5, lsl #24
357         orr     r5, r5, r6, lsr #8
358         mov     r6, r6, lsl #24
359         orr     r6, r6, r7, lsr #8
360         mov     r7, r7, lsl #24
361         orr     r7, r7, ip, lsr #8
362 #else
363         orr     r4, r4, r5, lsl #8
364         mov     r5, r5, lsr #24
365         orr     r5, r5, r6, lsl #8
366         mov     r6, r6, lsr #24
367         orr     r6, r6, r7, lsl #8
368         mov     r7, r7, lsr #24
369         orr     r7, r7, ip, lsl #8
370 #endif
371         str     r4, [r3], #0x04
372         str     r5, [r3], #0x04
373         str     r6, [r3], #0x04
374         str     r7, [r3], #0x04
375 .Lmemcpy_bad3:
376         subs    r2, r2, #0x10         
377         bge     .Lmemcpy_bad3_loop16
378
379         adds    r2, r2, #0x10         
380         ldmfdeq sp!, {r4-r7}
381         bxeq    lr                      /* Return now if done */
382         subs    r2, r2, #0x04
383         sublt   r1, r1, #0x01
384         blt     .Lmemcpy_bad_done
385
386 .Lmemcpy_bad3_loop4:
387 #ifdef __ARMEB__
388         mov     r4, ip, lsl #24
389 #else
390         mov     r4, ip, lsr #24
391 #endif
392         ldr     ip, [r1], #0x04
393         subs    r2, r2, #0x04
394 #ifdef __ARMEB__
395         orr     r4, r4, ip, lsr #8
396 #else
397         orr     r4, r4, ip, lsl #8
398 #endif
399         str     r4, [r3], #0x04
400         bge     .Lmemcpy_bad3_loop4
401         sub     r1, r1, #0x01
402
403 .Lmemcpy_bad_done:
404         ldmfd   sp!, {r4-r7}
405         adds    r2, r2, #0x04
406         bxeq    lr
407         ldrb    ip, [r1], #0x01
408         cmp     r2, #0x02
409         ldrbge  r2, [r1], #0x01
410         strb    ip, [r3], #0x01
411         ldrbgt  ip, [r1]
412         strbge  r2, [r3], #0x01
413         strbgt  ip, [r3]
414         bx      lr
415
416
417 /*
418  * Handle short copies (less than 16 bytes), possibly misaligned.
419  * Some of these are *very* common, thanks to the network stack,
420  * and so are handled specially.
421  */
422 .Lmemcpy_short:
423 #ifndef _STANDALONE
424         add     pc, pc, r2, lsl #2
425         nop
426         bx      lr                      /* 0x00 */
427         b       .Lmemcpy_bytewise       /* 0x01 */
428         b       .Lmemcpy_bytewise       /* 0x02 */
429         b       .Lmemcpy_bytewise       /* 0x03 */
430         b       .Lmemcpy_4              /* 0x04 */
431         b       .Lmemcpy_bytewise       /* 0x05 */
432         b       .Lmemcpy_6              /* 0x06 */
433         b       .Lmemcpy_bytewise       /* 0x07 */
434         b       .Lmemcpy_8              /* 0x08 */
435         b       .Lmemcpy_bytewise       /* 0x09 */
436         b       .Lmemcpy_bytewise       /* 0x0a */
437         b       .Lmemcpy_bytewise       /* 0x0b */
438         b       .Lmemcpy_c              /* 0x0c */
439 #endif
440 .Lmemcpy_bytewise:
441         mov     r3, r0                  /* We must not clobber r0 */
442         ldrb    ip, [r1], #0x01
443 1:      subs    r2, r2, #0x01
444         strb    ip, [r3], #0x01
445         ldrbne  ip, [r1], #0x01
446         bne     1b
447         bx      lr
448
449 #ifndef _STANDALONE
450 /******************************************************************************
451  * Special case for 4 byte copies
452  */
453 #define LMEMCPY_4_LOG2  6       /* 64 bytes */
454 #define LMEMCPY_4_PAD   .align LMEMCPY_4_LOG2
455         LMEMCPY_4_PAD
456 .Lmemcpy_4:
457         and     r2, r1, #0x03
458         orr     r2, r2, r0, lsl #2
459         ands    r2, r2, #0x0f
460         sub     r3, pc, #0x14
461         addne   pc, r3, r2, lsl #LMEMCPY_4_LOG2
462
463 /*
464  * 0000: dst is 32-bit aligned, src is 32-bit aligned
465  */
466         ldr     r2, [r1]
467         str     r2, [r0]
468         bx      lr
469         LMEMCPY_4_PAD
470
471 /*
472  * 0001: dst is 32-bit aligned, src is 8-bit aligned
473  */
474         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
475         ldr     r2, [r1, #3]            /* BE:r2 = 3xxx  LE:r2 = xxx3 */
476 #ifdef __ARMEB__
477         mov     r3, r3, lsl #8          /* r3 = 012. */
478         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
479 #else
480         mov     r3, r3, lsr #8          /* r3 = .210 */
481         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
482 #endif
483         str     r3, [r0]
484         bx      lr
485         LMEMCPY_4_PAD
486
487 /*
488  * 0010: dst is 32-bit aligned, src is 16-bit aligned
489  */
490 #ifdef __ARMEB__
491         ldrh    r3, [r1]
492         ldrh    r2, [r1, #0x02]
493 #else
494         ldrh    r3, [r1, #0x02]
495         ldrh    r2, [r1]
496 #endif
497         orr     r3, r2, r3, lsl #16
498         str     r3, [r0]
499         bx      lr
500         LMEMCPY_4_PAD
501
502 /*
503  * 0011: dst is 32-bit aligned, src is 8-bit aligned
504  */
505         ldr     r3, [r1, #-3]           /* BE:r3 = xxx0  LE:r3 = 0xxx */
506         ldr     r2, [r1, #1]            /* BE:r2 = 123x  LE:r2 = x321 */
507 #ifdef __ARMEB__
508         mov     r3, r3, lsl #24         /* r3 = 0... */
509         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
510 #else
511         mov     r3, r3, lsr #24         /* r3 = ...0 */
512         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
513 #endif
514         str     r3, [r0]
515         bx      lr
516         LMEMCPY_4_PAD
517
518 /*
519  * 0100: dst is 8-bit aligned, src is 32-bit aligned
520  */
521         ldr     r2, [r1]
522 #ifdef __ARMEB__
523         strb    r2, [r0, #0x03]
524         mov     r3, r2, lsr #8
525         mov     r1, r2, lsr #24
526         strb    r1, [r0]
527 #else
528         strb    r2, [r0]
529         mov     r3, r2, lsr #8
530         mov     r1, r2, lsr #24
531         strb    r1, [r0, #0x03]
532 #endif
533         strh    r3, [r0, #0x01]
534         bx      lr
535         LMEMCPY_4_PAD
536
537 /*
538  * 0101: dst is 8-bit aligned, src is 8-bit aligned
539  */
540         ldrb    r2, [r1]
541         ldrh    r3, [r1, #0x01]
542         ldrb    r1, [r1, #0x03]
543         strb    r2, [r0]
544         strh    r3, [r0, #0x01]
545         strb    r1, [r0, #0x03]
546         bx      lr
547         LMEMCPY_4_PAD
548
549 /*
550  * 0110: dst is 8-bit aligned, src is 16-bit aligned
551  */
552         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
553         ldrh    r3, [r1, #0x02]         /* LE:r3 = ..23  LE:r3 = ..32 */
554 #ifdef __ARMEB__
555         mov     r1, r2, lsr #8          /* r1 = ...0 */
556         strb    r1, [r0]
557         mov     r2, r2, lsl #8          /* r2 = .01. */
558         orr     r2, r2, r3, lsr #8      /* r2 = .012 */
559 #else
560         strb    r2, [r0]
561         mov     r2, r2, lsr #8          /* r2 = ...1 */
562         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
563         mov     r3, r3, lsr #8          /* r3 = ...3 */
564 #endif
565         strh    r2, [r0, #0x01]
566         strb    r3, [r0, #0x03]
567         bx      lr
568         LMEMCPY_4_PAD
569
570 /*
571  * 0111: dst is 8-bit aligned, src is 8-bit aligned
572  */
573         ldrb    r2, [r1]
574         ldrh    r3, [r1, #0x01]
575         ldrb    r1, [r1, #0x03]
576         strb    r2, [r0]
577         strh    r3, [r0, #0x01]
578         strb    r1, [r0, #0x03]
579         bx      lr
580         LMEMCPY_4_PAD
581
582 /*
583  * 1000: dst is 16-bit aligned, src is 32-bit aligned
584  */
585         ldr     r2, [r1]
586 #ifdef __ARMEB__
587         strh    r2, [r0, #0x02]
588         mov     r3, r2, lsr #16
589         strh    r3, [r0]
590 #else
591         strh    r2, [r0]
592         mov     r3, r2, lsr #16
593         strh    r3, [r0, #0x02]
594 #endif
595         bx       lr
596         LMEMCPY_4_PAD
597
598 /*
599  * 1001: dst is 16-bit aligned, src is 8-bit aligned
600  */
601         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
602         ldr     r3, [r1, #3]            /* BE:r3 = 3xxx  LE:r3 = xxx3 */
603         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
604         strh    r1, [r0]
605 #ifdef __ARMEB__
606         mov     r2, r2, lsl #8          /* r2 = 012. */
607         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
608 #else
609         mov     r2, r2, lsr #24         /* r2 = ...2 */
610         orr     r2, r2, r3, lsl #8      /* r2 = xx32 */
611 #endif
612         strh    r2, [r0, #0x02]
613         bx      lr
614         LMEMCPY_4_PAD
615
616 /*
617  * 1010: dst is 16-bit aligned, src is 16-bit aligned
618  */
619         ldrh    r2, [r1]
620         ldrh    r3, [r1, #0x02]
621         strh    r2, [r0]
622         strh    r3, [r0, #0x02]
623         bx      lr
624         LMEMCPY_4_PAD
625
626 /*
627  * 1011: dst is 16-bit aligned, src is 8-bit aligned
628  */
629         ldr     r3, [r1, #1]            /* BE:r3 = 123x  LE:r3 = x321 */
630         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
631         mov     r1, r3, lsr #8          /* BE:r1 = .123  LE:r1 = .x32 */
632         strh    r1, [r0, #0x02]
633 #ifdef __ARMEB__
634         mov     r3, r3, lsr #24         /* r3 = ...1 */
635         orr     r3, r3, r2, lsl #8      /* r3 = xx01 */
636 #else
637         mov     r3, r3, lsl #8          /* r3 = 321. */
638         orr     r3, r3, r2, lsr #24     /* r3 = 3210 */
639 #endif
640         strh    r3, [r0]
641         bx      lr
642         LMEMCPY_4_PAD
643
644 /*
645  * 1100: dst is 8-bit aligned, src is 32-bit aligned
646  */
647         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
648 #ifdef __ARMEB__
649         strb    r2, [r0, #0x03]
650         mov     r3, r2, lsr #8
651         mov     r1, r2, lsr #24
652         strh    r3, [r0, #0x01]
653         strb    r1, [r0]
654 #else
655         strb    r2, [r0]
656         mov     r3, r2, lsr #8
657         mov     r1, r2, lsr #24
658         strh    r3, [r0, #0x01]
659         strb    r1, [r0, #0x03]
660 #endif
661         bx      lr
662         LMEMCPY_4_PAD
663
664 /*
665  * 1101: dst is 8-bit aligned, src is 8-bit aligned
666  */
667         ldrb    r2, [r1]
668         ldrh    r3, [r1, #0x01]
669         ldrb    r1, [r1, #0x03]
670         strb    r2, [r0]
671         strh    r3, [r0, #0x01]
672         strb    r1, [r0, #0x03]
673         bx      lr
674         LMEMCPY_4_PAD
675
676 /*
677  * 1110: dst is 8-bit aligned, src is 16-bit aligned
678  */
679 #ifdef __ARMEB__
680         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
681         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
682         strb    r3, [r0, #0x03]
683         mov     r3, r3, lsr #8          /* r3 = ...2 */
684         orr     r3, r3, r2, lsl #8      /* r3 = ..12 */
685         strh    r3, [r0, #0x01]
686         mov     r2, r2, lsr #8          /* r2 = ...0 */
687         strb    r2, [r0]
688 #else
689         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
690         ldrh    r3, [r1, #0x02]         /* BE:r3 = ..23  LE:r3 = ..32 */
691         strb    r2, [r0]
692         mov     r2, r2, lsr #8          /* r2 = ...1 */
693         orr     r2, r2, r3, lsl #8      /* r2 = .321 */
694         strh    r2, [r0, #0x01]
695         mov     r3, r3, lsr #8          /* r3 = ...3 */
696         strb    r3, [r0, #0x03]
697 #endif
698         bx      lr
699         LMEMCPY_4_PAD
700
701 /*
702  * 1111: dst is 8-bit aligned, src is 8-bit aligned
703  */
704         ldrb    r2, [r1]
705         ldrh    r3, [r1, #0x01]
706         ldrb    r1, [r1, #0x03]
707         strb    r2, [r0]
708         strh    r3, [r0, #0x01]
709         strb    r1, [r0, #0x03]
710         bx      lr
711         LMEMCPY_4_PAD
712
713
714 /******************************************************************************
715  * Special case for 6 byte copies
716  */
717 #define LMEMCPY_6_LOG2  6       /* 64 bytes */
718 #define LMEMCPY_6_PAD   .align LMEMCPY_6_LOG2
719         LMEMCPY_6_PAD
720 .Lmemcpy_6:
721         and     r2, r1, #0x03
722         orr     r2, r2, r0, lsl #2
723         ands    r2, r2, #0x0f
724         sub     r3, pc, #0x14
725         addne   pc, r3, r2, lsl #LMEMCPY_6_LOG2
726
727 /*
728  * 0000: dst is 32-bit aligned, src is 32-bit aligned
729  */
730         ldr     r2, [r1]
731         ldrh    r3, [r1, #0x04]
732         str     r2, [r0]
733         strh    r3, [r0, #0x04]
734         bx      lr
735         LMEMCPY_6_PAD
736
737 /*
738  * 0001: dst is 32-bit aligned, src is 8-bit aligned
739  */
740         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
741         ldr     r3, [r1, #0x03]         /* BE:r3 = 345x  LE:r3 = x543 */
742 #ifdef __ARMEB__
743         mov     r2, r2, lsl #8          /* r2 = 012. */
744         orr     r2, r2, r3, lsr #24     /* r2 = 0123 */
745 #else
746         mov     r2, r2, lsr #8          /* r2 = .210 */
747         orr     r2, r2, r3, lsl #24     /* r2 = 3210 */
748 #endif
749         mov     r3, r3, lsr #8          /* BE:r3 = .345  LE:r3 = .x54 */
750         str     r2, [r0]
751         strh    r3, [r0, #0x04]
752         bx      lr
753         LMEMCPY_6_PAD
754
755 /*
756  * 0010: dst is 32-bit aligned, src is 16-bit aligned
757  */
758         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
759         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
760 #ifdef __ARMEB__
761         mov     r1, r3, lsr #16         /* r1 = ..23 */
762         orr     r1, r1, r2, lsl #16     /* r1 = 0123 */
763         str     r1, [r0]
764         strh    r3, [r0, #0x04]
765 #else
766         mov     r1, r3, lsr #16         /* r1 = ..54 */
767         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
768         str     r2, [r0]
769         strh    r1, [r0, #0x04]
770 #endif
771         bx      lr
772         LMEMCPY_6_PAD
773
774 /*
775  * 0011: dst is 32-bit aligned, src is 8-bit aligned
776  */
777         ldr     r2, [r1, #-3]           /* BE:r2 = xxx0  LE:r2 = 0xxx */
778         ldr     r3, [r1, #1]            /* BE:r3 = 1234  LE:r3 = 4321 */
779         ldr     r1, [r1, #5]            /* BE:r1 = 5xxx  LE:r3 = xxx5 */
780 #ifdef __ARMEB__
781         mov     r2, r2, lsl #24         /* r2 = 0... */
782         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
783         mov     r3, r3, lsl #8          /* r3 = 234. */
784         orr     r1, r3, r1, lsr #24     /* r1 = 2345 */
785 #else
786         mov     r2, r2, lsr #24         /* r2 = ...0 */
787         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
788         mov     r1, r1, lsl #8          /* r1 = xx5. */
789         orr     r1, r1, r3, lsr #24     /* r1 = xx54 */
790 #endif
791         str     r2, [r0]
792         strh    r1, [r0, #0x04]
793         bx      lr
794         LMEMCPY_6_PAD
795
796 /*
797  * 0100: dst is 8-bit aligned, src is 32-bit aligned
798  */
799         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
800         ldrh    r2, [r1, #0x04]         /* BE:r2 = ..45  LE:r2 = ..54 */
801         mov     r1, r3, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
802         strh    r1, [r0, #0x01]
803 #ifdef __ARMEB__
804         mov     r1, r3, lsr #24         /* r1 = ...0 */
805         strb    r1, [r0]
806         mov     r3, r3, lsl #8          /* r3 = 123. */
807         orr     r3, r3, r2, lsr #8      /* r3 = 1234 */
808 #else
809         strb    r3, [r0]
810         mov     r3, r3, lsr #24         /* r3 = ...3 */
811         orr     r3, r3, r2, lsl #8      /* r3 = .543 */
812         mov     r2, r2, lsr #8          /* r2 = ...5 */
813 #endif
814         strh    r3, [r0, #0x03]
815         strb    r2, [r0, #0x05]
816         bx      lr
817         LMEMCPY_6_PAD
818
819 /*
820  * 0101: dst is 8-bit aligned, src is 8-bit aligned
821  */
822         ldrb    r2, [r1]
823         ldrh    r3, [r1, #0x01]
824         ldrh    ip, [r1, #0x03]
825         ldrb    r1, [r1, #0x05]
826         strb    r2, [r0]
827         strh    r3, [r0, #0x01]
828         strh    ip, [r0, #0x03]
829         strb    r1, [r0, #0x05]
830         bx      lr
831         LMEMCPY_6_PAD
832
833 /*
834  * 0110: dst is 8-bit aligned, src is 16-bit aligned
835  */
836         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
837         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
838 #ifdef __ARMEB__
839         mov     r3, r2, lsr #8          /* r3 = ...0 */
840         strb    r3, [r0]
841         strb    r1, [r0, #0x05]
842         mov     r3, r1, lsr #8          /* r3 = .234 */
843         strh    r3, [r0, #0x03]
844         mov     r3, r2, lsl #8          /* r3 = .01. */
845         orr     r3, r3, r1, lsr #24     /* r3 = .012 */
846         strh    r3, [r0, #0x01]
847 #else
848         strb    r2, [r0]
849         mov     r3, r1, lsr #24
850         strb    r3, [r0, #0x05]
851         mov     r3, r1, lsr #8          /* r3 = .543 */
852         strh    r3, [r0, #0x03]
853         mov     r3, r2, lsr #8          /* r3 = ...1 */
854         orr     r3, r3, r1, lsl #8      /* r3 = 4321 */
855         strh    r3, [r0, #0x01]
856 #endif
857         bx      lr
858         LMEMCPY_6_PAD
859
860 /*
861  * 0111: dst is 8-bit aligned, src is 8-bit aligned
862  */
863         ldrb    r2, [r1]
864         ldrh    r3, [r1, #0x01]
865         ldrh    ip, [r1, #0x03]
866         ldrb    r1, [r1, #0x05]
867         strb    r2, [r0]
868         strh    r3, [r0, #0x01]
869         strh    ip, [r0, #0x03]
870         strb    r1, [r0, #0x05]
871         bx      lr
872         LMEMCPY_6_PAD
873
874 /*
875  * 1000: dst is 16-bit aligned, src is 32-bit aligned
876  */
877 #ifdef __ARMEB__
878         ldr     r2, [r1]                /* r2 = 0123 */
879         ldrh    r3, [r1, #0x04]         /* r3 = ..45 */
880         mov     r1, r2, lsr #16         /* r1 = ..01 */
881         orr     r3, r3, r2, lsl#16      /* r3 = 2345 */
882         strh    r1, [r0]
883         str     r3, [r0, #0x02]
884 #else
885         ldrh    r2, [r1, #0x04]         /* r2 = ..54 */
886         ldr     r3, [r1]                /* r3 = 3210 */
887         mov     r2, r2, lsl #16         /* r2 = 54.. */
888         orr     r2, r2, r3, lsr #16     /* r2 = 5432 */
889         strh    r3, [r0]
890         str     r2, [r0, #0x02]
891 #endif
892         bx      lr
893         LMEMCPY_6_PAD
894
895 /*
896  * 1001: dst is 16-bit aligned, src is 8-bit aligned
897  */
898         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
899         ldr     r2, [r1, #3]            /* BE:r2 = 345x  LE:r2 = x543 */
900         mov     r1, r3, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
901 #ifdef __ARMEB__
902         mov     r2, r2, lsr #8          /* r2 = .345 */
903         orr     r2, r2, r3, lsl #24     /* r2 = 2345 */
904 #else
905         mov     r2, r2, lsl #8          /* r2 = 543. */
906         orr     r2, r2, r3, lsr #24     /* r2 = 5432 */
907 #endif
908         strh    r1, [r0]
909         str     r2, [r0, #0x02]
910         bx      lr
911         LMEMCPY_6_PAD
912
913 /*
914  * 1010: dst is 16-bit aligned, src is 16-bit aligned
915  */
916         ldrh    r2, [r1]
917         ldr     r3, [r1, #0x02]
918         strh    r2, [r0]
919         str     r3, [r0, #0x02]
920         bx      lr
921         LMEMCPY_6_PAD
922
923 /*
924  * 1011: dst is 16-bit aligned, src is 8-bit aligned
925  */
926         ldrb    r3, [r1]                /* r3 = ...0 */
927         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
928         ldrb    r1, [r1, #0x05]         /* r1 = ...5 */
929 #ifdef __ARMEB__
930         mov     r3, r3, lsl #8          /* r3 = ..0. */
931         orr     r3, r3, r2, lsr #24     /* r3 = ..01 */
932         orr     r1, r1, r2, lsl #8      /* r1 = 2345 */
933 #else
934         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
935         mov     r1, r1, lsl #24         /* r1 = 5... */
936         orr     r1, r1, r2, lsr #8      /* r1 = 5432 */
937 #endif
938         strh    r3, [r0]
939         str     r1, [r0, #0x02]
940         bx      lr
941         LMEMCPY_6_PAD
942
943 /*
944  * 1100: dst is 8-bit aligned, src is 32-bit aligned
945  */
946         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
947         ldrh    r1, [r1, #0x04]         /* BE:r1 = ..45  LE:r1 = ..54 */
948 #ifdef __ARMEB__
949         mov     r3, r2, lsr #24         /* r3 = ...0 */
950         strb    r3, [r0]
951         mov     r2, r2, lsl #8          /* r2 = 123. */
952         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
953 #else
954         strb    r2, [r0]
955         mov     r2, r2, lsr #8          /* r2 = .321 */
956         orr     r2, r2, r1, lsl #24     /* r2 = 4321 */
957         mov     r1, r1, lsr #8          /* r1 = ...5 */
958 #endif
959         str     r2, [r0, #0x01]
960         strb    r1, [r0, #0x05]
961         bx      lr
962         LMEMCPY_6_PAD
963
964 /*
965  * 1101: dst is 8-bit aligned, src is 8-bit aligned
966  */
967         ldrb    r2, [r1]
968         ldrh    r3, [r1, #0x01]
969         ldrh    ip, [r1, #0x03]
970         ldrb    r1, [r1, #0x05]
971         strb    r2, [r0]
972         strh    r3, [r0, #0x01]
973         strh    ip, [r0, #0x03]
974         strb    r1, [r0, #0x05]
975         bx      lr
976         LMEMCPY_6_PAD
977
978 /*
979  * 1110: dst is 8-bit aligned, src is 16-bit aligned
980  */
981         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
982         ldr     r1, [r1, #0x02]         /* BE:r1 = 2345  LE:r1 = 5432 */
983 #ifdef __ARMEB__
984         mov     r3, r2, lsr #8          /* r3 = ...0 */
985         strb    r3, [r0]
986         mov     r2, r2, lsl #24         /* r2 = 1... */
987         orr     r2, r2, r1, lsr #8      /* r2 = 1234 */
988 #else
989         strb    r2, [r0]
990         mov     r2, r2, lsr #8          /* r2 = ...1 */
991         orr     r2, r2, r1, lsl #8      /* r2 = 4321 */
992         mov     r1, r1, lsr #24         /* r1 = ...5 */
993 #endif
994         str     r2, [r0, #0x01]
995         strb    r1, [r0, #0x05]
996         bx      lr
997         LMEMCPY_6_PAD
998
999 /*
1000  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1001  */
1002         ldrb    r2, [r1]
1003         ldr     r3, [r1, #0x01]
1004         ldrb    r1, [r1, #0x05]
1005         strb    r2, [r0]
1006         str     r3, [r0, #0x01]
1007         strb    r1, [r0, #0x05]
1008         bx      lr
1009         LMEMCPY_6_PAD
1010
1011
1012 /******************************************************************************
1013  * Special case for 8 byte copies
1014  */
1015 #define LMEMCPY_8_LOG2  6       /* 64 bytes */
1016 #define LMEMCPY_8_PAD   .align LMEMCPY_8_LOG2
1017         LMEMCPY_8_PAD
1018 .Lmemcpy_8:
1019         and     r2, r1, #0x03
1020         orr     r2, r2, r0, lsl #2
1021         ands    r2, r2, #0x0f
1022         sub     r3, pc, #0x14
1023         addne   pc, r3, r2, lsl #LMEMCPY_8_LOG2
1024
1025 /*
1026  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1027  */
1028         ldr     r2, [r1]
1029         ldr     r3, [r1, #0x04]
1030         str     r2, [r0]
1031         str     r3, [r0, #0x04]
1032         bx      lr
1033         LMEMCPY_8_PAD
1034
1035 /*
1036  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1037  */
1038         ldr     r3, [r1, #-1]           /* BE:r3 = x012  LE:r3 = 210x */
1039         ldr     r2, [r1, #0x03]         /* BE:r2 = 3456  LE:r2 = 6543 */
1040         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1041 #ifdef __ARMEB__
1042         mov     r3, r3, lsl #8          /* r3 = 012. */
1043         orr     r3, r3, r2, lsr #24     /* r3 = 0123 */
1044         orr     r2, r1, r2, lsl #8      /* r2 = 4567 */
1045 #else
1046         mov     r3, r3, lsr #8          /* r3 = .210 */
1047         orr     r3, r3, r2, lsl #24     /* r3 = 3210 */
1048         mov     r1, r1, lsl #24         /* r1 = 7... */
1049         orr     r2, r1, r2, lsr #8      /* r2 = 7654 */
1050 #endif
1051         str     r3, [r0]
1052         str     r2, [r0, #0x04]
1053         bx      lr
1054         LMEMCPY_8_PAD
1055
1056 /*
1057  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1058  */
1059         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1060         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1061         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1062 #ifdef __ARMEB__
1063         mov     r2, r2, lsl #16         /* r2 = 01.. */
1064         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
1065         orr     r3, r1, r3, lsl #16     /* r3 = 4567 */
1066 #else
1067         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1068         mov     r3, r3, lsr #16         /* r3 = ..54 */
1069         orr     r3, r3, r1, lsl #16     /* r3 = 7654 */
1070 #endif
1071         str     r2, [r0]
1072         str     r3, [r0, #0x04]
1073         bx      lr
1074         LMEMCPY_8_PAD
1075
1076 /*
1077  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1078  */
1079         ldrb    r3, [r1]                /* r3 = ...0 */
1080         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1081         ldr     r1, [r1, #0x05]         /* BE:r1 = 567x  LE:r1 = x765 */
1082 #ifdef __ARMEB__
1083         mov     r3, r3, lsl #24         /* r3 = 0... */
1084         orr     r3, r3, r2, lsr #8      /* r3 = 0123 */
1085         mov     r2, r2, lsl #24         /* r2 = 4... */
1086         orr     r2, r2, r1, lsr #8      /* r2 = 4567 */
1087 #else
1088         orr     r3, r3, r2, lsl #8      /* r3 = 3210 */
1089         mov     r2, r2, lsr #24         /* r2 = ...4 */
1090         orr     r2, r2, r1, lsl #8      /* r2 = 7654 */
1091 #endif
1092         str     r3, [r0]
1093         str     r2, [r0, #0x04]
1094         bx      lr
1095         LMEMCPY_8_PAD
1096
1097 /*
1098  * 0100: dst is 8-bit aligned, src is 32-bit aligned
1099  */
1100         ldr     r3, [r1]                /* BE:r3 = 0123  LE:r3 = 3210 */
1101         ldr     r2, [r1, #0x04]         /* BE:r2 = 4567  LE:r2 = 7654 */
1102 #ifdef __ARMEB__
1103         mov     r1, r3, lsr #24         /* r1 = ...0 */
1104         strb    r1, [r0]
1105         mov     r1, r3, lsr #8          /* r1 = .012 */
1106         strb    r2, [r0, #0x07]
1107         mov     r3, r3, lsl #24         /* r3 = 3... */
1108         orr     r3, r3, r2, lsr #8      /* r3 = 3456 */
1109 #else
1110         strb    r3, [r0]
1111         mov     r1, r2, lsr #24         /* r1 = ...7 */
1112         strb    r1, [r0, #0x07]
1113         mov     r1, r3, lsr #8          /* r1 = .321 */
1114         mov     r3, r3, lsr #24         /* r3 = ...3 */
1115         orr     r3, r3, r2, lsl #8      /* r3 = 6543 */
1116 #endif
1117         strh    r1, [r0, #0x01]
1118         str     r3, [r0, #0x03]
1119         bx      lr
1120         LMEMCPY_8_PAD
1121
1122 /*
1123  * 0101: dst is 8-bit aligned, src is 8-bit aligned
1124  */
1125         ldrb    r2, [r1]
1126         ldrh    r3, [r1, #0x01]
1127         ldr     ip, [r1, #0x03]
1128         ldrb    r1, [r1, #0x07]
1129         strb    r2, [r0]
1130         strh    r3, [r0, #0x01]
1131         str     ip, [r0, #0x03]
1132         strb    r1, [r0, #0x07]
1133         bx      lr
1134         LMEMCPY_8_PAD
1135
1136 /*
1137  * 0110: dst is 8-bit aligned, src is 16-bit aligned
1138  */
1139         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1140         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1141         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1142 #ifdef __ARMEB__
1143         mov     ip, r2, lsr #8          /* ip = ...0 */
1144         strb    ip, [r0]
1145         mov     ip, r2, lsl #8          /* ip = .01. */
1146         orr     ip, ip, r3, lsr #24     /* ip = .012 */
1147         strb    r1, [r0, #0x07]
1148         mov     r3, r3, lsl #8          /* r3 = 345. */
1149         orr     r3, r3, r1, lsr #8      /* r3 = 3456 */
1150 #else
1151         strb    r2, [r0]                /* 0 */
1152         mov     ip, r1, lsr #8          /* ip = ...7 */
1153         strb    ip, [r0, #0x07]         /* 7 */
1154         mov     ip, r2, lsr #8          /* ip = ...1 */
1155         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1156         mov     r3, r3, lsr #8          /* r3 = .543 */
1157         orr     r3, r3, r1, lsl #24     /* r3 = 6543 */
1158 #endif
1159         strh    ip, [r0, #0x01]
1160         str     r3, [r0, #0x03]
1161         bx      lr
1162         LMEMCPY_8_PAD
1163
1164 /*
1165  * 0111: dst is 8-bit aligned, src is 8-bit aligned
1166  */
1167         ldrb    r3, [r1]                /* r3 = ...0 */
1168         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1169         ldrh    r2, [r1, #0x05]         /* BE:r2 = ..56  LE:r2 = ..65 */
1170         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1171         strb    r3, [r0]
1172         mov     r3, ip, lsr #16         /* BE:r3 = ..12  LE:r3 = ..43 */
1173 #ifdef __ARMEB__
1174         strh    r3, [r0, #0x01]
1175         orr     r2, r2, ip, lsl #16     /* r2 = 3456 */
1176 #else
1177         strh    ip, [r0, #0x01]
1178         orr     r2, r3, r2, lsl #16     /* r2 = 6543 */
1179 #endif
1180         str     r2, [r0, #0x03]
1181         strb    r1, [r0, #0x07]
1182         bx      lr
1183         LMEMCPY_8_PAD
1184
1185 /*
1186  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1187  */
1188         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1189         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1190         mov     r1, r2, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1191 #ifdef __ARMEB__
1192         strh    r1, [r0]
1193         mov     r1, r3, lsr #16         /* r1 = ..45 */
1194         orr     r2, r1 ,r2, lsl #16     /* r2 = 2345 */
1195 #else
1196         strh    r2, [r0]
1197         orr     r2, r1, r3, lsl #16     /* r2 = 5432 */
1198         mov     r3, r3, lsr #16         /* r3 = ..76 */
1199 #endif
1200         str     r2, [r0, #0x02]
1201         strh    r3, [r0, #0x06]
1202         bx      lr
1203         LMEMCPY_8_PAD
1204
1205 /*
1206  * 1001: dst is 16-bit aligned, src is 8-bit aligned
1207  */
1208         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1209         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1210         ldrb    ip, [r1, #0x07]         /* ip = ...7 */
1211         mov     r1, r2, lsr #8          /* BE:r1 = .x01  LE:r1 = .210 */
1212         strh    r1, [r0]
1213 #ifdef __ARMEB__
1214         mov     r1, r2, lsl #24         /* r1 = 2... */
1215         orr     r1, r1, r3, lsr #8      /* r1 = 2345 */
1216         orr     r3, ip, r3, lsl #8      /* r3 = 4567 */
1217 #else
1218         mov     r1, r2, lsr #24         /* r1 = ...2 */
1219         orr     r1, r1, r3, lsl #8      /* r1 = 5432 */
1220         mov     r3, r3, lsr #24         /* r3 = ...6 */
1221         orr     r3, r3, ip, lsl #8      /* r3 = ..76 */
1222 #endif
1223         str     r1, [r0, #0x02]
1224         strh    r3, [r0, #0x06]
1225         bx      lr
1226         LMEMCPY_8_PAD
1227
1228 /*
1229  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1230  */
1231         ldrh    r2, [r1]
1232         ldr     ip, [r1, #0x02]
1233         ldrh    r3, [r1, #0x06]
1234         strh    r2, [r0]
1235         str     ip, [r0, #0x02]
1236         strh    r3, [r0, #0x06]
1237         bx      lr
1238         LMEMCPY_8_PAD
1239
1240 /*
1241  * 1011: dst is 16-bit aligned, src is 8-bit aligned
1242  */
1243         ldr     r3, [r1, #0x05]         /* BE:r3 = 567x  LE:r3 = x765 */
1244         ldr     r2, [r1, #0x01]         /* BE:r2 = 1234  LE:r2 = 4321 */
1245         ldrb    ip, [r1]                /* ip = ...0 */
1246         mov     r1, r3, lsr #8          /* BE:r1 = .567  LE:r1 = .x76 */
1247         strh    r1, [r0, #0x06]
1248 #ifdef __ARMEB__
1249         mov     r3, r3, lsr #24         /* r3 = ...5 */
1250         orr     r3, r3, r2, lsl #8      /* r3 = 2345 */
1251         mov     r2, r2, lsr #24         /* r2 = ...1 */
1252         orr     r2, r2, ip, lsl #8      /* r2 = ..01 */
1253 #else
1254         mov     r3, r3, lsl #24         /* r3 = 5... */
1255         orr     r3, r3, r2, lsr #8      /* r3 = 5432 */
1256         orr     r2, ip, r2, lsl #8      /* r2 = 3210 */
1257 #endif
1258         str     r3, [r0, #0x02]
1259         strh    r2, [r0]
1260         bx      lr
1261         LMEMCPY_8_PAD
1262
1263 /*
1264  * 1100: dst is 8-bit aligned, src is 32-bit aligned
1265  */
1266         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1267         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1268         mov     r1, r3, lsr #8          /* BE:r1 = .456  LE:r1 = .765 */
1269         strh    r1, [r0, #0x05]
1270 #ifdef __ARMEB__
1271         strb    r3, [r0, #0x07]
1272         mov     r1, r2, lsr #24         /* r1 = ...0 */
1273         strb    r1, [r0]
1274         mov     r2, r2, lsl #8          /* r2 = 123. */
1275         orr     r2, r2, r3, lsr #24     /* r2 = 1234 */
1276         str     r2, [r0, #0x01]
1277 #else
1278         strb    r2, [r0]
1279         mov     r1, r3, lsr #24         /* r1 = ...7 */
1280         strb    r1, [r0, #0x07]
1281         mov     r2, r2, lsr #8          /* r2 = .321 */
1282         orr     r2, r2, r3, lsl #24     /* r2 = 4321 */
1283         str     r2, [r0, #0x01]
1284 #endif
1285         bx       lr
1286         LMEMCPY_8_PAD
1287
1288 /*
1289  * 1101: dst is 8-bit aligned, src is 8-bit aligned
1290  */
1291         ldrb    r3, [r1]                /* r3 = ...0 */
1292         ldrh    r2, [r1, #0x01]         /* BE:r2 = ..12  LE:r2 = ..21 */
1293         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1294         ldrb    r1, [r1, #0x07]         /* r1 = ...7 */
1295         strb    r3, [r0]
1296         mov     r3, ip, lsr #16         /* BE:r3 = ..34  LE:r3 = ..65 */
1297 #ifdef __ARMEB__
1298         strh    ip, [r0, #0x05]
1299         orr     r2, r3, r2, lsl #16     /* r2 = 1234 */
1300 #else
1301         strh    r3, [r0, #0x05]
1302         orr     r2, r2, ip, lsl #16     /* r2 = 4321 */
1303 #endif
1304         str     r2, [r0, #0x01]
1305         strb    r1, [r0, #0x07]
1306         bx      lr
1307         LMEMCPY_8_PAD
1308
1309 /*
1310  * 1110: dst is 8-bit aligned, src is 16-bit aligned
1311  */
1312         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1313         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1314         ldrh    r1, [r1, #0x06]         /* BE:r1 = ..67  LE:r1 = ..76 */
1315 #ifdef __ARMEB__
1316         mov     ip, r2, lsr #8          /* ip = ...0 */
1317         strb    ip, [r0]
1318         mov     ip, r2, lsl #24         /* ip = 1... */
1319         orr     ip, ip, r3, lsr #8      /* ip = 1234 */
1320         strb    r1, [r0, #0x07]
1321         mov     r1, r1, lsr #8          /* r1 = ...6 */
1322         orr     r1, r1, r3, lsl #8      /* r1 = 3456 */
1323 #else
1324         strb    r2, [r0]
1325         mov     ip, r2, lsr #8          /* ip = ...1 */
1326         orr     ip, ip, r3, lsl #8      /* ip = 4321 */
1327         mov     r2, r1, lsr #8          /* r2 = ...7 */
1328         strb    r2, [r0, #0x07]
1329         mov     r1, r1, lsl #8          /* r1 = .76. */
1330         orr     r1, r1, r3, lsr #24     /* r1 = .765 */
1331 #endif
1332         str     ip, [r0, #0x01]
1333         strh    r1, [r0, #0x05]
1334         bx      lr
1335         LMEMCPY_8_PAD
1336
1337 /*
1338  * 1111: dst is 8-bit aligned, src is 8-bit aligned
1339  */
1340         ldrb    r2, [r1]
1341         ldr     ip, [r1, #0x01]
1342         ldrh    r3, [r1, #0x05]
1343         ldrb    r1, [r1, #0x07]
1344         strb    r2, [r0]
1345         str     ip, [r0, #0x01]
1346         strh    r3, [r0, #0x05]
1347         strb    r1, [r0, #0x07]
1348         bx      lr
1349         LMEMCPY_8_PAD
1350
1351 /******************************************************************************
1352  * Special case for 12 byte copies
1353  */
1354 #define LMEMCPY_C_LOG2  7       /* 128 bytes */
1355 #define LMEMCPY_C_PAD   .align LMEMCPY_C_LOG2
1356         LMEMCPY_C_PAD
1357 .Lmemcpy_c:
1358         and     r2, r1, #0x03
1359         orr     r2, r2, r0, lsl #2
1360         ands    r2, r2, #0x0f
1361         sub     r3, pc, #0x14
1362         addne   pc, r3, r2, lsl #LMEMCPY_C_LOG2
1363
1364 /*
1365  * 0000: dst is 32-bit aligned, src is 32-bit aligned
1366  */
1367         ldr     r2, [r1]
1368         ldr     r3, [r1, #0x04]
1369         ldr     r1, [r1, #0x08]
1370         str     r2, [r0]
1371         str     r3, [r0, #0x04]
1372         str     r1, [r0, #0x08]
1373         bx      lr
1374         LMEMCPY_C_PAD
1375
1376 /*
1377  * 0001: dst is 32-bit aligned, src is 8-bit aligned
1378  */
1379         ldrb    r2, [r1, #0xb]          /* r2 = ...B */
1380         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1381         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1382         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1383 #ifdef __ARMEB__
1384         orr     r2, r2, ip, lsl #8      /* r2 = 89AB */
1385         str     r2, [r0, #0x08]
1386         mov     r2, ip, lsr #24         /* r2 = ...7 */
1387         orr     r2, r2, r3, lsl #8      /* r2 = 4567 */
1388         mov     r1, r1, lsl #8          /* r1 = 012. */
1389         orr     r1, r1, r3, lsr #24     /* r1 = 0123 */
1390 #else
1391         mov     r2, r2, lsl #24         /* r2 = B... */
1392         orr     r2, r2, ip, lsr #8      /* r2 = BA98 */
1393         str     r2, [r0, #0x08]
1394         mov     r2, ip, lsl #24         /* r2 = 7... */
1395         orr     r2, r2, r3, lsr #8      /* r2 = 7654 */
1396         mov     r1, r1, lsr #8          /* r1 = .210 */
1397         orr     r1, r1, r3, lsl #24     /* r1 = 3210 */
1398 #endif
1399         str     r2, [r0, #0x04]
1400         str     r1, [r0]
1401         bx      lr
1402         LMEMCPY_C_PAD
1403
1404 /*
1405  * 0010: dst is 32-bit aligned, src is 16-bit aligned
1406  */
1407         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1408         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1409         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1410         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1411 #ifdef __ARMEB__
1412         mov     r2, r2, lsl #16         /* r2 = 01.. */
1413         orr     r2, r2, r3, lsr #16     /* r2 = 0123 */
1414         str     r2, [r0]
1415         mov     r3, r3, lsl #16         /* r3 = 45.. */
1416         orr     r3, r3, ip, lsr #16     /* r3 = 4567 */
1417         orr     r1, r1, ip, lsl #16     /* r1 = 89AB */
1418 #else
1419         orr     r2, r2, r3, lsl #16     /* r2 = 3210 */
1420         str     r2, [r0]
1421         mov     r3, r3, lsr #16         /* r3 = ..54 */
1422         orr     r3, r3, ip, lsl #16     /* r3 = 7654 */
1423         mov     r1, r1, lsl #16         /* r1 = BA.. */
1424         orr     r1, r1, ip, lsr #16     /* r1 = BA98 */
1425 #endif
1426         str     r3, [r0, #0x04]
1427         str     r1, [r0, #0x08]
1428         bx      lr
1429         LMEMCPY_C_PAD
1430
1431 /*
1432  * 0011: dst is 32-bit aligned, src is 8-bit aligned
1433  */
1434         ldrb    r2, [r1]                /* r2 = ...0 */
1435         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1436         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1437         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1438 #ifdef __ARMEB__
1439         mov     r2, r2, lsl #24         /* r2 = 0... */
1440         orr     r2, r2, r3, lsr #8      /* r2 = 0123 */
1441         str     r2, [r0]
1442         mov     r3, r3, lsl #24         /* r3 = 4... */
1443         orr     r3, r3, ip, lsr #8      /* r3 = 4567 */
1444         mov     r1, r1, lsr #8          /* r1 = .9AB */
1445         orr     r1, r1, ip, lsl #24     /* r1 = 89AB */
1446 #else
1447         orr     r2, r2, r3, lsl #8      /* r2 = 3210 */
1448         str     r2, [r0]
1449         mov     r3, r3, lsr #24         /* r3 = ...4 */
1450         orr     r3, r3, ip, lsl #8      /* r3 = 7654 */
1451         mov     r1, r1, lsl #8          /* r1 = BA9. */
1452         orr     r1, r1, ip, lsr #24     /* r1 = BA98 */
1453 #endif
1454         str     r3, [r0, #0x04]
1455         str     r1, [r0, #0x08]
1456         bx      lr
1457         LMEMCPY_C_PAD
1458
1459 /*
1460  * 0100: dst is 8-bit aligned (byte 1), src is 32-bit aligned
1461  */
1462         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1463         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1464         ldr     ip, [r1, #0x08]         /* BE:ip = 89AB  LE:ip = BA98 */
1465         mov     r1, r2, lsr #8          /* BE:r1 = .012  LE:r1 = .321 */
1466         strh    r1, [r0, #0x01]
1467 #ifdef __ARMEB__
1468         mov     r1, r2, lsr #24         /* r1 = ...0 */
1469         strb    r1, [r0]
1470         mov     r1, r2, lsl #24         /* r1 = 3... */
1471         orr     r2, r1, r3, lsr #8      /* r1 = 3456 */
1472         mov     r1, r3, lsl #24         /* r1 = 7... */
1473         orr     r1, r1, ip, lsr #8      /* r1 = 789A */
1474 #else
1475         strb    r2, [r0]
1476         mov     r1, r2, lsr #24         /* r1 = ...3 */
1477         orr     r2, r1, r3, lsl #8      /* r1 = 6543 */
1478         mov     r1, r3, lsr #24         /* r1 = ...7 */
1479         orr     r1, r1, ip, lsl #8      /* r1 = A987 */
1480         mov     ip, ip, lsr #24         /* ip = ...B */
1481 #endif
1482         str     r2, [r0, #0x03]
1483         str     r1, [r0, #0x07]
1484         strb    ip, [r0, #0x0b]
1485         bx      lr
1486         LMEMCPY_C_PAD
1487
1488 /*
1489  * 0101: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 1)
1490  */
1491         ldrb    r2, [r1]
1492         ldrh    r3, [r1, #0x01]
1493         ldr     ip, [r1, #0x03]
1494         strb    r2, [r0]
1495         ldr     r2, [r1, #0x07]
1496         ldrb    r1, [r1, #0x0b]
1497         strh    r3, [r0, #0x01]
1498         str     ip, [r0, #0x03]
1499         str     r2, [r0, #0x07]
1500         strb    r1, [r0, #0x0b]
1501         bx      lr
1502         LMEMCPY_C_PAD
1503
1504 /*
1505  * 0110: dst is 8-bit aligned (byte 1), src is 16-bit aligned
1506  */
1507         ldrh    r2, [r1]                /* BE:r2 = ..01  LE:r2 = ..10 */
1508         ldr     r3, [r1, #0x02]         /* BE:r3 = 2345  LE:r3 = 5432 */
1509         ldr     ip, [r1, #0x06]         /* BE:ip = 6789  LE:ip = 9876 */
1510         ldrh    r1, [r1, #0x0a]         /* BE:r1 = ..AB  LE:r1 = ..BA */
1511 #ifdef __ARMEB__
1512         mov     r2, r2, ror #8          /* r2 = 1..0 */
1513         strb    r2, [r0]
1514         mov     r2, r2, lsr #16         /* r2 = ..1. */
1515         orr     r2, r2, r3, lsr #24     /* r2 = ..12 */
1516         strh    r2, [r0, #0x01]
1517         mov     r2, r3, lsl #8          /* r2 = 345. */
1518         orr     r3, r2, ip, lsr #24     /* r3 = 3456 */
1519         mov     r2, ip, lsl #8          /* r2 = 789. */
1520         orr     r2, r2, r1, lsr #8      /* r2 = 789A */
1521 #else
1522         strb    r2, [r0]
1523         mov     r2, r2, lsr #8          /* r2 = ...1 */
1524         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1525         strh    r2, [r0, #0x01]
1526         mov     r2, r3, lsr #8          /* r2 = .543 */
1527         orr     r3, r2, ip, lsl #24     /* r3 = 6543 */
1528         mov     r2, ip, lsr #8          /* r2 = .987 */
1529         orr     r2, r2, r1, lsl #24     /* r2 = A987 */
1530         mov     r1, r1, lsr #8          /* r1 = ...B */
1531 #endif
1532         str     r3, [r0, #0x03]
1533         str     r2, [r0, #0x07]
1534         strb    r1, [r0, #0x0b]
1535         bx      lr
1536         LMEMCPY_C_PAD
1537
1538 /*
1539  * 0111: dst is 8-bit aligned (byte 1), src is 8-bit aligned (byte 3)
1540  */
1541         ldrb    r2, [r1]
1542         ldr     r3, [r1, #0x01]         /* BE:r3 = 1234  LE:r3 = 4321 */
1543         ldr     ip, [r1, #0x05]         /* BE:ip = 5678  LE:ip = 8765 */
1544         ldr     r1, [r1, #0x09]         /* BE:r1 = 9ABx  LE:r1 = xBA9 */
1545         strb    r2, [r0]
1546 #ifdef __ARMEB__
1547         mov     r2, r3, lsr #16         /* r2 = ..12 */
1548         strh    r2, [r0, #0x01]
1549         mov     r3, r3, lsl #16         /* r3 = 34.. */
1550         orr     r3, r3, ip, lsr #16     /* r3 = 3456 */
1551         mov     ip, ip, lsl #16         /* ip = 78.. */
1552         orr     ip, ip, r1, lsr #16     /* ip = 789A */
1553         mov     r1, r1, lsr #8          /* r1 = .9AB */
1554 #else
1555         strh    r3, [r0, #0x01]
1556         mov     r3, r3, lsr #16         /* r3 = ..43 */
1557         orr     r3, r3, ip, lsl #16     /* r3 = 6543 */
1558         mov     ip, ip, lsr #16         /* ip = ..87 */
1559         orr     ip, ip, r1, lsl #16     /* ip = A987 */
1560         mov     r1, r1, lsr #16         /* r1 = ..xB */
1561 #endif
1562         str     r3, [r0, #0x03]
1563         str     ip, [r0, #0x07]
1564         strb    r1, [r0, #0x0b]
1565         bx      lr
1566         LMEMCPY_C_PAD
1567
1568 /*
1569  * 1000: dst is 16-bit aligned, src is 32-bit aligned
1570  */
1571         ldr     ip, [r1]                /* BE:ip = 0123  LE:ip = 3210 */
1572         ldr     r3, [r1, #0x04]         /* BE:r3 = 4567  LE:r3 = 7654 */
1573         ldr     r2, [r1, #0x08]         /* BE:r2 = 89AB  LE:r2 = BA98 */
1574         mov     r1, ip, lsr #16         /* BE:r1 = ..01  LE:r1 = ..32 */
1575 #ifdef __ARMEB__
1576         strh    r1, [r0]
1577         mov     r1, ip, lsl #16         /* r1 = 23.. */
1578         orr     r1, r1, r3, lsr #16     /* r1 = 2345 */
1579         mov     r3, r3, lsl #16         /* r3 = 67.. */
1580         orr     r3, r3, r2, lsr #16     /* r3 = 6789 */
1581 #else
1582         strh    ip, [r0]
1583         orr     r1, r1, r3, lsl #16     /* r1 = 5432 */
1584         mov     r3, r3, lsr #16         /* r3 = ..76 */
1585         orr     r3, r3, r2, lsl #16     /* r3 = 9876 */
1586         mov     r2, r2, lsr #16         /* r2 = ..BA */
1587 #endif
1588         str     r1, [r0, #0x02]
1589         str     r3, [r0, #0x06]
1590         strh    r2, [r0, #0x0a]
1591         bx      lr
1592         LMEMCPY_C_PAD
1593
1594 /*
1595  * 1001: dst is 16-bit aligned, src is 8-bit aligned (byte 1)
1596  */
1597         ldr     r2, [r1, #-1]           /* BE:r2 = x012  LE:r2 = 210x */
1598         ldr     r3, [r1, #0x03]         /* BE:r3 = 3456  LE:r3 = 6543 */
1599         mov     ip, r2, lsr #8          /* BE:ip = .x01  LE:ip = .210 */
1600         strh    ip, [r0]
1601         ldr     ip, [r1, #0x07]         /* BE:ip = 789A  LE:ip = A987 */
1602         ldrb    r1, [r1, #0x0b]         /* r1 = ...B */
1603 #ifdef __ARMEB__
1604         mov     r2, r2, lsl #24         /* r2 = 2... */
1605         orr     r2, r2, r3, lsr #8      /* r2 = 2345 */
1606         mov     r3, r3, lsl #24         /* r3 = 6... */
1607         orr     r3, r3, ip, lsr #8      /* r3 = 6789 */
1608         orr     r1, r1, ip, lsl #8      /* r1 = 89AB */
1609 #else
1610         mov     r2, r2, lsr #24         /* r2 = ...2 */
1611         orr     r2, r2, r3, lsl #8      /* r2 = 5432 */
1612         mov     r3, r3, lsr #24         /* r3 = ...6 */
1613         orr     r3, r3, ip, lsl #8      /* r3 = 9876 */
1614         mov     r1, r1, lsl #8          /* r1 = ..B. */
1615         orr     r1, r1, ip, lsr #24     /* r1 = ..BA */
1616 #endif
1617         str     r2, [r0, #0x02]
1618         str     r3, [r0, #0x06]
1619         strh    r1, [r0, #0x0a]
1620         bx      lr
1621         LMEMCPY_C_PAD
1622
1623 /*
1624  * 1010: dst is 16-bit aligned, src is 16-bit aligned
1625  */
1626         ldrh    r2, [r1]
1627         ldr     r3, [r1, #0x02]
1628         ldr     ip, [r1, #0x06]
1629         ldrh    r1, [r1, #0x0a]
1630         strh    r2, [r0]
1631         str     r3, [r0, #0x02]
1632         str     ip, [r0, #0x06]
1633         strh    r1, [r0, #0x0a]
1634         bx      lr
1635         LMEMCPY_C_PAD
1636
1637 /*
1638  * 1011: dst is 16-bit aligned, src is 8-bit aligned (byte 3)
1639  */
1640         ldr     r2, [r1, #0x09]         /* BE:r2 = 9ABx  LE:r2 = xBA9 */
1641         ldr     r3, [r1, #0x05]         /* BE:r3 = 5678  LE:r3 = 8765 */
1642         mov     ip, r2, lsr #8          /* BE:ip = .9AB  LE:ip = .xBA */
1643         strh    ip, [r0, #0x0a]
1644         ldr     ip, [r1, #0x01]         /* BE:ip = 1234  LE:ip = 4321 */
1645         ldrb    r1, [r1]                /* r1 = ...0 */
1646 #ifdef __ARMEB__
1647         mov     r2, r2, lsr #24         /* r2 = ...9 */
1648         orr     r2, r2, r3, lsl #8      /* r2 = 6789 */
1649         mov     r3, r3, lsr #24         /* r3 = ...5 */
1650         orr     r3, r3, ip, lsl #8      /* r3 = 2345 */
1651         mov     r1, r1, lsl #8          /* r1 = ..0. */
1652         orr     r1, r1, ip, lsr #24     /* r1 = ..01 */
1653 #else
1654         mov     r2, r2, lsl #24         /* r2 = 9... */
1655         orr     r2, r2, r3, lsr #8      /* r2 = 9876 */
1656         mov     r3, r3, lsl #24         /* r3 = 5... */
1657         orr     r3, r3, ip, lsr #8      /* r3 = 5432 */
1658         orr     r1, r1, ip, lsl #8      /* r1 = 3210 */
1659 #endif
1660         str     r2, [r0, #0x06]
1661         str     r3, [r0, #0x02]
1662         strh    r1, [r0]
1663         bx      lr
1664         LMEMCPY_C_PAD
1665
1666 /*
1667  * 1100: dst is 8-bit aligned (byte 3), src is 32-bit aligned
1668  */
1669         ldr     r2, [r1]                /* BE:r2 = 0123  LE:r2 = 3210 */
1670         ldr     ip, [r1, #0x04]         /* BE:ip = 4567  LE:ip = 7654 */
1671         ldr     r1, [r1, #0x08]         /* BE:r1 = 89AB  LE:r1 = BA98 */
1672 #ifdef __ARMEB__
1673         mov     r3, r2, lsr #24         /* r3 = ...0 */
1674         strb    r3, [r0]
1675         mov     r2, r2, lsl #8          /* r2 = 123. */
1676         orr     r2, r2, ip, lsr #24     /* r2 = 1234 */
1677         str     r2, [r0, #0x01]
1678         mov     r2, ip, lsl #8          /* r2 = 567. */
1679         orr     r2, r2, r1, lsr #24     /* r2 = 5678 */
1680         str     r2, [r0, #0x05]
1681         mov     r2, r1, lsr #8          /* r2 = ..9A */
1682         strh    r2, [r0, #0x09]
1683         strb    r1, [r0, #0x0b]
1684 #else
1685         strb    r2, [r0]
1686         mov     r3, r2, lsr #8          /* r3 = .321 */
1687         orr     r3, r3, ip, lsl #24     /* r3 = 4321 */
1688         str     r3, [r0, #0x01]
1689         mov     r3, ip, lsr #8          /* r3 = .765 */
1690         orr     r3, r3, r1, lsl #24     /* r3 = 8765 */
1691         str     r3, [r0, #0x05]
1692         mov     r1, r1, lsr #8          /* r1 = .BA9 */
1693         strh    r1, [r0, #0x09]
1694         mov     r1, r1, lsr #16         /* r1 = ...B */
1695         strb    r1, [r0, #0x0b]
1696 #endif
1697         bx      lr
1698         LMEMCPY_C_PAD
1699
1700 /*
1701  * 1101: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 1)
1702  */
1703         ldrb    r2, [r1, #0x0b]         /* r2 = ...B */
1704         ldr     r3, [r1, #0x07]         /* BE:r3 = 789A  LE:r3 = A987 */
1705         ldr     ip, [r1, #0x03]         /* BE:ip = 3456  LE:ip = 6543 */
1706         ldr     r1, [r1, #-1]           /* BE:r1 = x012  LE:r1 = 210x */
1707         strb    r2, [r0, #0x0b]
1708 #ifdef __ARMEB__
1709         strh    r3, [r0, #0x09]
1710         mov     r3, r3, lsr #16         /* r3 = ..78 */
1711         orr     r3, r3, ip, lsl #16     /* r3 = 5678 */
1712         mov     ip, ip, lsr #16         /* ip = ..34 */
1713         orr     ip, ip, r1, lsl #16     /* ip = 1234 */
1714         mov     r1, r1, lsr #16         /* r1 = ..x0 */
1715 #else
1716         mov     r2, r3, lsr #16         /* r2 = ..A9 */
1717         strh    r2, [r0, #0x09]
1718         mov     r3, r3, lsl #16         /* r3 = 87.. */
1719         orr     r3, r3, ip, lsr #16     /* r3 = 8765 */
1720         mov     ip, ip, lsl #16         /* ip = 43.. */
1721         orr     ip, ip, r1, lsr #16     /* ip = 4321 */
1722         mov     r1, r1, lsr #8          /* r1 = .210 */
1723 #endif
1724         str     r3, [r0, #0x05]
1725         str     ip, [r0, #0x01]
1726         strb    r1, [r0]
1727         bx      lr
1728         LMEMCPY_C_PAD
1729
1730 /*
1731  * 1110: dst is 8-bit aligned (byte 3), src is 16-bit aligned
1732  */
1733 #ifdef __ARMEB__
1734         ldrh    r2, [r1, #0x0a]         /* r2 = ..AB */
1735         ldr     ip, [r1, #0x06]         /* ip = 6789 */
1736         ldr     r3, [r1, #0x02]         /* r3 = 2345 */
1737         ldrh    r1, [r1]                /* r1 = ..01 */
1738         strb    r2, [r0, #0x0b]
1739         mov     r2, r2, lsr #8          /* r2 = ...A */
1740         orr     r2, r2, ip, lsl #8      /* r2 = 789A */
1741         mov     ip, ip, lsr #8          /* ip = .678 */
1742         orr     ip, ip, r3, lsl #24     /* ip = 5678 */
1743         mov     r3, r3, lsr #8          /* r3 = .234 */
1744         orr     r3, r3, r1, lsl #24     /* r3 = 1234 */
1745         mov     r1, r1, lsr #8          /* r1 = ...0 */
1746         strb    r1, [r0]
1747         str     r3, [r0, #0x01]
1748         str     ip, [r0, #0x05]
1749         strh    r2, [r0, #0x09]
1750 #else
1751         ldrh    r2, [r1]                /* r2 = ..10 */
1752         ldr     r3, [r1, #0x02]         /* r3 = 5432 */
1753         ldr     ip, [r1, #0x06]         /* ip = 9876 */
1754         ldrh    r1, [r1, #0x0a]         /* r1 = ..BA */
1755         strb    r2, [r0]
1756         mov     r2, r2, lsr #8          /* r2 = ...1 */
1757         orr     r2, r2, r3, lsl #8      /* r2 = 4321 */
1758         mov     r3, r3, lsr #24         /* r3 = ...5 */
1759         orr     r3, r3, ip, lsl #8      /* r3 = 8765 */
1760         mov     ip, ip, lsr #24         /* ip = ...9 */
1761         orr     ip, ip, r1, lsl #8      /* ip = .BA9 */
1762         mov     r1, r1, lsr #8          /* r1 = ...B */
1763         str     r2, [r0, #0x01]
1764         str     r3, [r0, #0x05]
1765         strh    ip, [r0, #0x09]
1766         strb    r1, [r0, #0x0b]
1767 #endif
1768         bx      lr
1769         LMEMCPY_C_PAD
1770
1771 /*
1772  * 1111: dst is 8-bit aligned (byte 3), src is 8-bit aligned (byte 3)
1773  */
1774         ldrb    r2, [r1]
1775         ldr     r3, [r1, #0x01]
1776         ldr     ip, [r1, #0x05]
1777         strb    r2, [r0]
1778         ldrh    r2, [r1, #0x09]
1779         ldrb    r1, [r1, #0x0b]
1780         str     r3, [r0, #0x01]
1781         str     ip, [r0, #0x05]
1782         strh    r2, [r0, #0x09]
1783         strb    r1, [r0, #0x0b]
1784         bx      lr
1785 #endif  /* !_STANDALONE */
1786 END(memcpy)