1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
46 #define GET_PCB(tmp) \
47 mrc p15, 0, tmp, c13, c0, 4; \
48 add tmp, tmp, #(TD_PCB)
51 .word _C_LABEL(__pcpu) + PC_CURPCB
52 #define GET_PCB(tmp) \
57 * r0 = user space address
58 * r1 = kernel space address
61 * Copies bytes from user space to kernel space
66 movle pc, lr /* Bail early if length is <= 0 */
72 ldr r3, .L_min_memcpy_size
76 stmfd sp!, {r0-r2, r4, lr}
80 mov r3, #2 /* SRC_IS_USER */
85 ldmfd sp!, {r0-r2, r4, lr}
90 stmfd sp!, {r10-r11, lr}
96 adr ip, .Lcopyin_fault
97 ldr r11, [r10, #PCB_ONFAULT]
98 str ip, [r10, #PCB_ONFAULT]
100 str r11, [r10, #PCB_ONFAULT]
102 ldmfd sp!, {r10-r11, pc}
106 str r11, [r10, #PCB_ONFAULT]
108 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
109 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
110 ldmfd sp!, {r10-r11, pc}
114 /* Word-align the destination buffer */
115 ands ip, r1, #0x03 /* Already word aligned? */
116 beq .Lcopyin_wordaligned /* Yup */
118 cmp r2, ip /* Enough bytes left to align it? */
119 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
122 addne pc, pc, ip, lsl #3
124 ldrbt ip, [r0], #0x01
126 ldrbt ip, [r0], #0x01
128 ldrbt ip, [r0], #0x01
130 cmp r2, #0x00 /* All done? */
133 /* Destination buffer is now word aligned */
134 .Lcopyin_wordaligned:
135 ands ip, r0, #0x03 /* Is src also word-aligned? */
136 bne .Lcopyin_bad_align /* Nope. Things just got bad */
137 cmp r2, #0x08 /* Less than 8 bytes remaining? */
138 blt .Lcopyin_w_less_than8
140 /* Quad-align the destination buffer */
141 tst r1, #0x07 /* Already quad aligned? */
142 ldrtne ip, [r0], #0x04
143 strne ip, [r1], #0x04
145 stmfd sp!, {r4-r9} /* Free up some registers */
146 mov r3, #-1 /* Signal restore r4-r9 */
148 /* Destination buffer quad aligned, source is word aligned */
150 blt .Lcopyin_w_lessthan128
152 /* Copy 128 bytes at a time */
154 ldrt r4, [r0], #0x04 /* LD:00-03 */
155 ldrt r5, [r0], #0x04 /* LD:04-07 */
156 pld [r0, #0x18] /* Prefetch 0x20 */
157 ldrt r6, [r0], #0x04 /* LD:08-0b */
158 ldrt r7, [r0], #0x04 /* LD:0c-0f */
159 ldrt r8, [r0], #0x04 /* LD:10-13 */
160 ldrt r9, [r0], #0x04 /* LD:14-17 */
161 strd r4, [r1], #0x08 /* ST:00-07 */
162 ldrt r4, [r0], #0x04 /* LD:18-1b */
163 ldrt r5, [r0], #0x04 /* LD:1c-1f */
164 strd r6, [r1], #0x08 /* ST:08-0f */
165 ldrt r6, [r0], #0x04 /* LD:20-23 */
166 ldrt r7, [r0], #0x04 /* LD:24-27 */
167 pld [r0, #0x18] /* Prefetch 0x40 */
168 strd r8, [r1], #0x08 /* ST:10-17 */
169 ldrt r8, [r0], #0x04 /* LD:28-2b */
170 ldrt r9, [r0], #0x04 /* LD:2c-2f */
171 strd r4, [r1], #0x08 /* ST:18-1f */
172 ldrt r4, [r0], #0x04 /* LD:30-33 */
173 ldrt r5, [r0], #0x04 /* LD:34-37 */
174 strd r6, [r1], #0x08 /* ST:20-27 */
175 ldrt r6, [r0], #0x04 /* LD:38-3b */
176 ldrt r7, [r0], #0x04 /* LD:3c-3f */
177 strd r8, [r1], #0x08 /* ST:28-2f */
178 ldrt r8, [r0], #0x04 /* LD:40-43 */
179 ldrt r9, [r0], #0x04 /* LD:44-47 */
180 pld [r0, #0x18] /* Prefetch 0x60 */
181 strd r4, [r1], #0x08 /* ST:30-37 */
182 ldrt r4, [r0], #0x04 /* LD:48-4b */
183 ldrt r5, [r0], #0x04 /* LD:4c-4f */
184 strd r6, [r1], #0x08 /* ST:38-3f */
185 ldrt r6, [r0], #0x04 /* LD:50-53 */
186 ldrt r7, [r0], #0x04 /* LD:54-57 */
187 strd r8, [r1], #0x08 /* ST:40-47 */
188 ldrt r8, [r0], #0x04 /* LD:58-5b */
189 ldrt r9, [r0], #0x04 /* LD:5c-5f */
190 strd r4, [r1], #0x08 /* ST:48-4f */
191 ldrt r4, [r0], #0x04 /* LD:60-63 */
192 ldrt r5, [r0], #0x04 /* LD:64-67 */
193 pld [r0, #0x18] /* Prefetch 0x80 */
194 strd r6, [r1], #0x08 /* ST:50-57 */
195 ldrt r6, [r0], #0x04 /* LD:68-6b */
196 ldrt r7, [r0], #0x04 /* LD:6c-6f */
197 strd r8, [r1], #0x08 /* ST:58-5f */
198 ldrt r8, [r0], #0x04 /* LD:70-73 */
199 ldrt r9, [r0], #0x04 /* LD:74-77 */
200 strd r4, [r1], #0x08 /* ST:60-67 */
201 ldrt r4, [r0], #0x04 /* LD:78-7b */
202 ldrt r5, [r0], #0x04 /* LD:7c-7f */
203 strd r6, [r1], #0x08 /* ST:68-6f */
204 strd r8, [r1], #0x08 /* ST:70-77 */
206 strd r4, [r1], #0x08 /* ST:78-7f */
207 bge .Lcopyin_w_loop128
209 .Lcopyin_w_lessthan128:
210 adds r2, r2, #0x80 /* Adjust for extra sub */
214 blt .Lcopyin_w_lessthan32
216 /* Copy 32 bytes at a time */
232 bge .Lcopyin_w_loop32
234 .Lcopyin_w_lessthan32:
235 adds r2, r2, #0x20 /* Adjust for extra sub */
237 RETeq /* Return now if done */
242 add pc, pc, r5, lsl #1
245 /* At least 24 bytes remaining */
251 /* At least 16 bytes remaining */
257 /* At least 8 bytes remaining */
263 /* Less than 8 bytes remaining */
265 RETeq /* Return now if done */
268 .Lcopyin_w_less_than8:
270 ldrtge ip, [r0], #0x04
271 strge ip, [r1], #0x04
272 RETeq /* Return now if done */
274 ldrbt ip, [r0], #0x01
276 ldrbtge r2, [r0], #0x01
279 strbge r2, [r1], #0x01
284 * At this point, it has not been possible to word align both buffers.
285 * The destination buffer (r1) is word aligned, but the source buffer
298 .Lcopyin_bad1_loop16:
310 orr r4, r4, r5, lsr #24
312 orr r5, r5, r6, lsr #24
314 orr r6, r6, r7, lsr #24
316 orr r7, r7, ip, lsr #24
318 orr r4, r4, r5, lsl #24
320 orr r5, r5, r6, lsl #24
322 orr r6, r6, r7, lsl #24
324 orr r7, r7, ip, lsl #24
332 bge .Lcopyin_bad1_loop16
336 RETeq /* Return now if done */
350 orr r4, r4, ip, lsr #24
352 orr r4, r4, ip, lsl #24
355 bge .Lcopyin_bad1_loop4
359 .Lcopyin_bad2_loop16:
371 orr r4, r4, r5, lsr #16
373 orr r5, r5, r6, lsr #16
375 orr r6, r6, r7, lsr #16
377 orr r7, r7, ip, lsr #16
379 orr r4, r4, r5, lsl #16
381 orr r5, r5, r6, lsl #16
383 orr r6, r6, r7, lsl #16
385 orr r7, r7, ip, lsl #16
393 bge .Lcopyin_bad2_loop16
397 RETeq /* Return now if done */
411 orr r4, r4, ip, lsr #16
413 orr r4, r4, ip, lsl #16
416 bge .Lcopyin_bad2_loop4
420 .Lcopyin_bad3_loop16:
432 orr r4, r4, r5, lsr #8
434 orr r5, r5, r6, lsr #8
436 orr r6, r6, r7, lsr #8
438 orr r7, r7, ip, lsr #8
440 orr r4, r4, r5, lsl #8
442 orr r5, r5, r6, lsl #8
444 orr r6, r6, r7, lsl #8
446 orr r7, r7, ip, lsl #8
454 bge .Lcopyin_bad3_loop16
458 RETeq /* Return now if done */
472 orr r4, r4, ip, lsr #8
474 orr r4, r4, ip, lsl #8
477 bge .Lcopyin_bad3_loop4
487 addne pc, pc, r2, lsl #3
489 ldrbt ip, [r0], #0x01
491 ldrbt ip, [r0], #0x01
499 * r0 = kernel space address
500 * r1 = user space address
503 * Copies bytes from kernel space to user space
508 movle pc, lr /* Bail early if length is <= 0 */
510 ldr r3, .L_arm_memcpy
514 ldr r3, .L_min_memcpy_size
518 stmfd sp!, {r0-r2, r4, lr}
522 mov r3, #1 /* DST_IS_USER */
523 ldr r4, .L_arm_memcpy
527 ldmfd sp!, {r0-r2, r4, lr}
532 stmfd sp!, {r10-r11, lr}
538 adr ip, .Lcopyout_fault
539 ldr r11, [r10, #PCB_ONFAULT]
540 str ip, [r10, #PCB_ONFAULT]
542 str r11, [r10, #PCB_ONFAULT]
544 ldmfd sp!, {r10-r11, pc}
548 str r11, [r10, #PCB_ONFAULT]
550 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
551 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
552 ldmfd sp!, {r10-r11, pc}
556 /* Word-align the destination buffer */
557 ands ip, r1, #0x03 /* Already word aligned? */
558 beq .Lcopyout_wordaligned /* Yup */
560 cmp r2, ip /* Enough bytes left to align it? */
561 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
564 addne pc, pc, ip, lsl #3
567 strbt ip, [r1], #0x01
569 strbt ip, [r1], #0x01
571 strbt ip, [r1], #0x01
572 cmp r2, #0x00 /* All done? */
575 /* Destination buffer is now word aligned */
576 .Lcopyout_wordaligned:
577 ands ip, r0, #0x03 /* Is src also word-aligned? */
578 bne .Lcopyout_bad_align /* Nope. Things just got bad */
579 cmp r2, #0x08 /* Less than 8 bytes remaining? */
580 blt .Lcopyout_w_less_than8
582 /* Quad-align the destination buffer */
583 tst r0, #0x07 /* Already quad aligned? */
584 ldrne ip, [r0], #0x04
586 strtne ip, [r1], #0x04
588 stmfd sp!, {r4-r9} /* Free up some registers */
589 mov r3, #-1 /* Signal restore r4-r9 */
591 /* Destination buffer word aligned, source is quad aligned */
593 blt .Lcopyout_w_lessthan128
595 /* Copy 128 bytes at a time */
597 ldrd r4, [r0], #0x08 /* LD:00-07 */
598 pld [r0, #0x18] /* Prefetch 0x20 */
599 ldrd r6, [r0], #0x08 /* LD:08-0f */
600 ldrd r8, [r0], #0x08 /* LD:10-17 */
601 strt r4, [r1], #0x04 /* ST:00-03 */
602 strt r5, [r1], #0x04 /* ST:04-07 */
603 ldrd r4, [r0], #0x08 /* LD:18-1f */
604 strt r6, [r1], #0x04 /* ST:08-0b */
605 strt r7, [r1], #0x04 /* ST:0c-0f */
606 ldrd r6, [r0], #0x08 /* LD:20-27 */
607 pld [r0, #0x18] /* Prefetch 0x40 */
608 strt r8, [r1], #0x04 /* ST:10-13 */
609 strt r9, [r1], #0x04 /* ST:14-17 */
610 ldrd r8, [r0], #0x08 /* LD:28-2f */
611 strt r4, [r1], #0x04 /* ST:18-1b */
612 strt r5, [r1], #0x04 /* ST:1c-1f */
613 ldrd r4, [r0], #0x08 /* LD:30-37 */
614 strt r6, [r1], #0x04 /* ST:20-23 */
615 strt r7, [r1], #0x04 /* ST:24-27 */
616 ldrd r6, [r0], #0x08 /* LD:38-3f */
617 strt r8, [r1], #0x04 /* ST:28-2b */
618 strt r9, [r1], #0x04 /* ST:2c-2f */
619 ldrd r8, [r0], #0x08 /* LD:40-47 */
620 pld [r0, #0x18] /* Prefetch 0x60 */
621 strt r4, [r1], #0x04 /* ST:30-33 */
622 strt r5, [r1], #0x04 /* ST:34-37 */
623 ldrd r4, [r0], #0x08 /* LD:48-4f */
624 strt r6, [r1], #0x04 /* ST:38-3b */
625 strt r7, [r1], #0x04 /* ST:3c-3f */
626 ldrd r6, [r0], #0x08 /* LD:50-57 */
627 strt r8, [r1], #0x04 /* ST:40-43 */
628 strt r9, [r1], #0x04 /* ST:44-47 */
629 ldrd r8, [r0], #0x08 /* LD:58-4f */
630 strt r4, [r1], #0x04 /* ST:48-4b */
631 strt r5, [r1], #0x04 /* ST:4c-4f */
632 ldrd r4, [r0], #0x08 /* LD:60-67 */
633 pld [r0, #0x18] /* Prefetch 0x80 */
634 strt r6, [r1], #0x04 /* ST:50-53 */
635 strt r7, [r1], #0x04 /* ST:54-57 */
636 ldrd r6, [r0], #0x08 /* LD:68-6f */
637 strt r8, [r1], #0x04 /* ST:58-5b */
638 strt r9, [r1], #0x04 /* ST:5c-5f */
639 ldrd r8, [r0], #0x08 /* LD:70-77 */
640 strt r4, [r1], #0x04 /* ST:60-63 */
641 strt r5, [r1], #0x04 /* ST:64-67 */
642 ldrd r4, [r0], #0x08 /* LD:78-7f */
643 strt r6, [r1], #0x04 /* ST:68-6b */
644 strt r7, [r1], #0x04 /* ST:6c-6f */
645 strt r8, [r1], #0x04 /* ST:70-73 */
646 strt r9, [r1], #0x04 /* ST:74-77 */
648 strt r4, [r1], #0x04 /* ST:78-7b */
649 strt r5, [r1], #0x04 /* ST:7c-7f */
650 bge .Lcopyout_w_loop128
652 .Lcopyout_w_lessthan128:
653 adds r2, r2, #0x80 /* Adjust for extra sub */
655 RETeq /* Return now if done */
657 blt .Lcopyout_w_lessthan32
659 /* Copy 32 bytes at a time */
675 bge .Lcopyout_w_loop32
677 .Lcopyout_w_lessthan32:
678 adds r2, r2, #0x20 /* Adjust for extra sub */
680 RETeq /* Return now if done */
685 add pc, pc, r5, lsl #1
688 /* At least 24 bytes remaining */
694 /* At least 16 bytes remaining */
700 /* At least 8 bytes remaining */
706 /* Less than 8 bytes remaining */
708 RETeq /* Return now if done */
711 .Lcopyout_w_less_than8:
713 ldrge ip, [r0], #0x04
714 strtge ip, [r1], #0x04
715 RETeq /* Return now if done */
719 ldrbge r2, [r0], #0x01
720 strbt ip, [r1], #0x01
722 strbtge r2, [r1], #0x01
727 * At this point, it has not been possible to word align both buffers.
728 * The destination buffer (r1) is word aligned, but the source buffer
741 .Lcopyout_bad1_loop16:
753 orr r4, r4, r5, lsr #24
755 orr r5, r5, r6, lsr #24
757 orr r6, r6, r7, lsr #24
759 orr r7, r7, ip, lsr #24
761 orr r4, r4, r5, lsl #24
763 orr r5, r5, r6, lsl #24
765 orr r6, r6, r7, lsl #24
767 orr r7, r7, ip, lsl #24
775 bge .Lcopyout_bad1_loop16
779 RETeq /* Return now if done */
784 .Lcopyout_bad1_loop4:
793 orr r4, r4, ip, lsr #24
795 orr r4, r4, ip, lsl #24
798 bge .Lcopyout_bad1_loop4
802 .Lcopyout_bad2_loop16:
814 orr r4, r4, r5, lsr #16
816 orr r5, r5, r6, lsr #16
818 orr r6, r6, r7, lsr #16
820 orr r7, r7, ip, lsr #16
822 orr r4, r4, r5, lsl #16
824 orr r5, r5, r6, lsl #16
826 orr r6, r6, r7, lsl #16
828 orr r7, r7, ip, lsl #16
836 bge .Lcopyout_bad2_loop16
840 RETeq /* Return now if done */
845 .Lcopyout_bad2_loop4:
854 orr r4, r4, ip, lsr #16
856 orr r4, r4, ip, lsl #16
859 bge .Lcopyout_bad2_loop4
863 .Lcopyout_bad3_loop16:
875 orr r4, r4, r5, lsr #8
877 orr r5, r5, r6, lsr #8
879 orr r6, r6, r7, lsr #8
881 orr r7, r7, ip, lsr #8
883 orr r4, r4, r5, lsl #8
885 orr r5, r5, r6, lsl #8
887 orr r6, r6, r7, lsl #8
889 orr r7, r7, ip, lsl #8
897 bge .Lcopyout_bad3_loop16
901 RETeq /* Return now if done */
906 .Lcopyout_bad3_loop4:
915 orr r4, r4, ip, lsr #8
917 orr r4, r4, ip, lsl #8
920 bge .Lcopyout_bad3_loop4
930 addne pc, pc, r2, lsl #3
933 strbt ip, [r1], #0x01
935 strbt ip, [r1], #0x01