1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
41 #include <machine/acle-compat.h>
48 #define GET_PCB(tmp) \
49 mrc p15, 0, tmp, c13, c0, 4; \
50 add tmp, tmp, #(TD_PCB)
53 .word _C_LABEL(__pcpu) + PC_CURPCB
54 #define GET_PCB(tmp) \
59 * r0 = user space address
60 * r1 = kernel space address
63 * Copies bytes from user space to kernel space
68 movle pc, lr /* Bail early if length is <= 0 */
74 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
83 ldr r3, .L_min_memcpy_size
87 stmfd sp!, {r0-r2, r4, lr}
91 mov r3, #2 /* SRC_IS_USER */
96 ldmfd sp!, {r0-r2, r4, lr}
101 stmfd sp!, {r10-r11, lr}
107 adr ip, .Lcopyin_fault
108 ldr r11, [r10, #PCB_ONFAULT]
109 str ip, [r10, #PCB_ONFAULT]
111 str r11, [r10, #PCB_ONFAULT]
113 ldmfd sp!, {r10-r11, pc}
117 str r11, [r10, #PCB_ONFAULT]
119 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
120 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
121 ldmfd sp!, {r10-r11, pc}
125 /* Word-align the destination buffer */
126 ands ip, r1, #0x03 /* Already word aligned? */
127 beq .Lcopyin_wordaligned /* Yup */
129 cmp r2, ip /* Enough bytes left to align it? */
130 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
133 addne pc, pc, ip, lsl #3
135 ldrbt ip, [r0], #0x01
137 ldrbt ip, [r0], #0x01
139 ldrbt ip, [r0], #0x01
141 cmp r2, #0x00 /* All done? */
144 /* Destination buffer is now word aligned */
145 .Lcopyin_wordaligned:
146 ands ip, r0, #0x03 /* Is src also word-aligned? */
147 bne .Lcopyin_bad_align /* Nope. Things just got bad */
148 cmp r2, #0x08 /* Less than 8 bytes remaining? */
149 blt .Lcopyin_w_less_than8
151 /* Quad-align the destination buffer */
152 tst r1, #0x07 /* Already quad aligned? */
153 ldrtne ip, [r0], #0x04
154 strne ip, [r1], #0x04
156 stmfd sp!, {r4-r9} /* Free up some registers */
157 mov r3, #-1 /* Signal restore r4-r9 */
159 /* Destination buffer quad aligned, source is word aligned */
161 blt .Lcopyin_w_lessthan128
163 /* Copy 128 bytes at a time */
165 ldrt r4, [r0], #0x04 /* LD:00-03 */
166 ldrt r5, [r0], #0x04 /* LD:04-07 */
167 pld [r0, #0x18] /* Prefetch 0x20 */
168 ldrt r6, [r0], #0x04 /* LD:08-0b */
169 ldrt r7, [r0], #0x04 /* LD:0c-0f */
170 ldrt r8, [r0], #0x04 /* LD:10-13 */
171 ldrt r9, [r0], #0x04 /* LD:14-17 */
172 strd r4, [r1], #0x08 /* ST:00-07 */
173 ldrt r4, [r0], #0x04 /* LD:18-1b */
174 ldrt r5, [r0], #0x04 /* LD:1c-1f */
175 strd r6, [r1], #0x08 /* ST:08-0f */
176 ldrt r6, [r0], #0x04 /* LD:20-23 */
177 ldrt r7, [r0], #0x04 /* LD:24-27 */
178 pld [r0, #0x18] /* Prefetch 0x40 */
179 strd r8, [r1], #0x08 /* ST:10-17 */
180 ldrt r8, [r0], #0x04 /* LD:28-2b */
181 ldrt r9, [r0], #0x04 /* LD:2c-2f */
182 strd r4, [r1], #0x08 /* ST:18-1f */
183 ldrt r4, [r0], #0x04 /* LD:30-33 */
184 ldrt r5, [r0], #0x04 /* LD:34-37 */
185 strd r6, [r1], #0x08 /* ST:20-27 */
186 ldrt r6, [r0], #0x04 /* LD:38-3b */
187 ldrt r7, [r0], #0x04 /* LD:3c-3f */
188 strd r8, [r1], #0x08 /* ST:28-2f */
189 ldrt r8, [r0], #0x04 /* LD:40-43 */
190 ldrt r9, [r0], #0x04 /* LD:44-47 */
191 pld [r0, #0x18] /* Prefetch 0x60 */
192 strd r4, [r1], #0x08 /* ST:30-37 */
193 ldrt r4, [r0], #0x04 /* LD:48-4b */
194 ldrt r5, [r0], #0x04 /* LD:4c-4f */
195 strd r6, [r1], #0x08 /* ST:38-3f */
196 ldrt r6, [r0], #0x04 /* LD:50-53 */
197 ldrt r7, [r0], #0x04 /* LD:54-57 */
198 strd r8, [r1], #0x08 /* ST:40-47 */
199 ldrt r8, [r0], #0x04 /* LD:58-5b */
200 ldrt r9, [r0], #0x04 /* LD:5c-5f */
201 strd r4, [r1], #0x08 /* ST:48-4f */
202 ldrt r4, [r0], #0x04 /* LD:60-63 */
203 ldrt r5, [r0], #0x04 /* LD:64-67 */
204 pld [r0, #0x18] /* Prefetch 0x80 */
205 strd r6, [r1], #0x08 /* ST:50-57 */
206 ldrt r6, [r0], #0x04 /* LD:68-6b */
207 ldrt r7, [r0], #0x04 /* LD:6c-6f */
208 strd r8, [r1], #0x08 /* ST:58-5f */
209 ldrt r8, [r0], #0x04 /* LD:70-73 */
210 ldrt r9, [r0], #0x04 /* LD:74-77 */
211 strd r4, [r1], #0x08 /* ST:60-67 */
212 ldrt r4, [r0], #0x04 /* LD:78-7b */
213 ldrt r5, [r0], #0x04 /* LD:7c-7f */
214 strd r6, [r1], #0x08 /* ST:68-6f */
215 strd r8, [r1], #0x08 /* ST:70-77 */
217 strd r4, [r1], #0x08 /* ST:78-7f */
218 bge .Lcopyin_w_loop128
220 .Lcopyin_w_lessthan128:
221 adds r2, r2, #0x80 /* Adjust for extra sub */
225 blt .Lcopyin_w_lessthan32
227 /* Copy 32 bytes at a time */
243 bge .Lcopyin_w_loop32
245 .Lcopyin_w_lessthan32:
246 adds r2, r2, #0x20 /* Adjust for extra sub */
248 RETeq /* Return now if done */
253 add pc, pc, r5, lsl #1
256 /* At least 24 bytes remaining */
262 /* At least 16 bytes remaining */
268 /* At least 8 bytes remaining */
274 /* Less than 8 bytes remaining */
276 RETeq /* Return now if done */
279 .Lcopyin_w_less_than8:
281 ldrtge ip, [r0], #0x04
282 strge ip, [r1], #0x04
283 RETeq /* Return now if done */
285 ldrbt ip, [r0], #0x01
287 ldrbtge r2, [r0], #0x01
290 strbge r2, [r1], #0x01
295 * At this point, it has not been possible to word align both buffers.
296 * The destination buffer (r1) is word aligned, but the source buffer
309 .Lcopyin_bad1_loop16:
321 orr r4, r4, r5, lsr #24
323 orr r5, r5, r6, lsr #24
325 orr r6, r6, r7, lsr #24
327 orr r7, r7, ip, lsr #24
329 orr r4, r4, r5, lsl #24
331 orr r5, r5, r6, lsl #24
333 orr r6, r6, r7, lsl #24
335 orr r7, r7, ip, lsl #24
343 bge .Lcopyin_bad1_loop16
347 RETeq /* Return now if done */
361 orr r4, r4, ip, lsr #24
363 orr r4, r4, ip, lsl #24
366 bge .Lcopyin_bad1_loop4
370 .Lcopyin_bad2_loop16:
382 orr r4, r4, r5, lsr #16
384 orr r5, r5, r6, lsr #16
386 orr r6, r6, r7, lsr #16
388 orr r7, r7, ip, lsr #16
390 orr r4, r4, r5, lsl #16
392 orr r5, r5, r6, lsl #16
394 orr r6, r6, r7, lsl #16
396 orr r7, r7, ip, lsl #16
404 bge .Lcopyin_bad2_loop16
408 RETeq /* Return now if done */
422 orr r4, r4, ip, lsr #16
424 orr r4, r4, ip, lsl #16
427 bge .Lcopyin_bad2_loop4
431 .Lcopyin_bad3_loop16:
443 orr r4, r4, r5, lsr #8
445 orr r5, r5, r6, lsr #8
447 orr r6, r6, r7, lsr #8
449 orr r7, r7, ip, lsr #8
451 orr r4, r4, r5, lsl #8
453 orr r5, r5, r6, lsl #8
455 orr r6, r6, r7, lsl #8
457 orr r7, r7, ip, lsl #8
465 bge .Lcopyin_bad3_loop16
469 RETeq /* Return now if done */
483 orr r4, r4, ip, lsr #8
485 orr r4, r4, ip, lsl #8
488 bge .Lcopyin_bad3_loop4
498 addne pc, pc, r2, lsl #3
500 ldrbt ip, [r0], #0x01
502 ldrbt ip, [r0], #0x01
510 * r0 = kernel space address
511 * r1 = user space address
514 * Copies bytes from kernel space to user space
519 movle pc, lr /* Bail early if length is <= 0 */
525 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
530 ldr r3, .L_arm_memcpy
534 ldr r3, .L_min_memcpy_size
538 stmfd sp!, {r0-r2, r4, lr}
542 mov r3, #1 /* DST_IS_USER */
543 ldr r4, .L_arm_memcpy
547 ldmfd sp!, {r0-r2, r4, lr}
552 stmfd sp!, {r10-r11, lr}
558 adr ip, .Lcopyout_fault
559 ldr r11, [r10, #PCB_ONFAULT]
560 str ip, [r10, #PCB_ONFAULT]
562 str r11, [r10, #PCB_ONFAULT]
564 ldmfd sp!, {r10-r11, pc}
568 str r11, [r10, #PCB_ONFAULT]
570 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
571 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
572 ldmfd sp!, {r10-r11, pc}
576 /* Word-align the destination buffer */
577 ands ip, r1, #0x03 /* Already word aligned? */
578 beq .Lcopyout_wordaligned /* Yup */
580 cmp r2, ip /* Enough bytes left to align it? */
581 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
584 addne pc, pc, ip, lsl #3
587 strbt ip, [r1], #0x01
589 strbt ip, [r1], #0x01
591 strbt ip, [r1], #0x01
592 cmp r2, #0x00 /* All done? */
595 /* Destination buffer is now word aligned */
596 .Lcopyout_wordaligned:
597 ands ip, r0, #0x03 /* Is src also word-aligned? */
598 bne .Lcopyout_bad_align /* Nope. Things just got bad */
599 cmp r2, #0x08 /* Less than 8 bytes remaining? */
600 blt .Lcopyout_w_less_than8
602 /* Quad-align the destination buffer */
603 tst r0, #0x07 /* Already quad aligned? */
604 ldrne ip, [r0], #0x04
606 strtne ip, [r1], #0x04
608 stmfd sp!, {r4-r9} /* Free up some registers */
609 mov r3, #-1 /* Signal restore r4-r9 */
611 /* Destination buffer word aligned, source is quad aligned */
613 blt .Lcopyout_w_lessthan128
615 /* Copy 128 bytes at a time */
617 ldrd r4, [r0], #0x08 /* LD:00-07 */
618 pld [r0, #0x18] /* Prefetch 0x20 */
619 ldrd r6, [r0], #0x08 /* LD:08-0f */
620 ldrd r8, [r0], #0x08 /* LD:10-17 */
621 strt r4, [r1], #0x04 /* ST:00-03 */
622 strt r5, [r1], #0x04 /* ST:04-07 */
623 ldrd r4, [r0], #0x08 /* LD:18-1f */
624 strt r6, [r1], #0x04 /* ST:08-0b */
625 strt r7, [r1], #0x04 /* ST:0c-0f */
626 ldrd r6, [r0], #0x08 /* LD:20-27 */
627 pld [r0, #0x18] /* Prefetch 0x40 */
628 strt r8, [r1], #0x04 /* ST:10-13 */
629 strt r9, [r1], #0x04 /* ST:14-17 */
630 ldrd r8, [r0], #0x08 /* LD:28-2f */
631 strt r4, [r1], #0x04 /* ST:18-1b */
632 strt r5, [r1], #0x04 /* ST:1c-1f */
633 ldrd r4, [r0], #0x08 /* LD:30-37 */
634 strt r6, [r1], #0x04 /* ST:20-23 */
635 strt r7, [r1], #0x04 /* ST:24-27 */
636 ldrd r6, [r0], #0x08 /* LD:38-3f */
637 strt r8, [r1], #0x04 /* ST:28-2b */
638 strt r9, [r1], #0x04 /* ST:2c-2f */
639 ldrd r8, [r0], #0x08 /* LD:40-47 */
640 pld [r0, #0x18] /* Prefetch 0x60 */
641 strt r4, [r1], #0x04 /* ST:30-33 */
642 strt r5, [r1], #0x04 /* ST:34-37 */
643 ldrd r4, [r0], #0x08 /* LD:48-4f */
644 strt r6, [r1], #0x04 /* ST:38-3b */
645 strt r7, [r1], #0x04 /* ST:3c-3f */
646 ldrd r6, [r0], #0x08 /* LD:50-57 */
647 strt r8, [r1], #0x04 /* ST:40-43 */
648 strt r9, [r1], #0x04 /* ST:44-47 */
649 ldrd r8, [r0], #0x08 /* LD:58-4f */
650 strt r4, [r1], #0x04 /* ST:48-4b */
651 strt r5, [r1], #0x04 /* ST:4c-4f */
652 ldrd r4, [r0], #0x08 /* LD:60-67 */
653 pld [r0, #0x18] /* Prefetch 0x80 */
654 strt r6, [r1], #0x04 /* ST:50-53 */
655 strt r7, [r1], #0x04 /* ST:54-57 */
656 ldrd r6, [r0], #0x08 /* LD:68-6f */
657 strt r8, [r1], #0x04 /* ST:58-5b */
658 strt r9, [r1], #0x04 /* ST:5c-5f */
659 ldrd r8, [r0], #0x08 /* LD:70-77 */
660 strt r4, [r1], #0x04 /* ST:60-63 */
661 strt r5, [r1], #0x04 /* ST:64-67 */
662 ldrd r4, [r0], #0x08 /* LD:78-7f */
663 strt r6, [r1], #0x04 /* ST:68-6b */
664 strt r7, [r1], #0x04 /* ST:6c-6f */
665 strt r8, [r1], #0x04 /* ST:70-73 */
666 strt r9, [r1], #0x04 /* ST:74-77 */
668 strt r4, [r1], #0x04 /* ST:78-7b */
669 strt r5, [r1], #0x04 /* ST:7c-7f */
670 bge .Lcopyout_w_loop128
672 .Lcopyout_w_lessthan128:
673 adds r2, r2, #0x80 /* Adjust for extra sub */
675 RETeq /* Return now if done */
677 blt .Lcopyout_w_lessthan32
679 /* Copy 32 bytes at a time */
695 bge .Lcopyout_w_loop32
697 .Lcopyout_w_lessthan32:
698 adds r2, r2, #0x20 /* Adjust for extra sub */
700 RETeq /* Return now if done */
705 add pc, pc, r5, lsl #1
708 /* At least 24 bytes remaining */
714 /* At least 16 bytes remaining */
720 /* At least 8 bytes remaining */
726 /* Less than 8 bytes remaining */
728 RETeq /* Return now if done */
731 .Lcopyout_w_less_than8:
733 ldrge ip, [r0], #0x04
734 strtge ip, [r1], #0x04
735 RETeq /* Return now if done */
739 ldrbge r2, [r0], #0x01
740 strbt ip, [r1], #0x01
742 strbtge r2, [r1], #0x01
747 * At this point, it has not been possible to word align both buffers.
748 * The destination buffer (r1) is word aligned, but the source buffer
761 .Lcopyout_bad1_loop16:
773 orr r4, r4, r5, lsr #24
775 orr r5, r5, r6, lsr #24
777 orr r6, r6, r7, lsr #24
779 orr r7, r7, ip, lsr #24
781 orr r4, r4, r5, lsl #24
783 orr r5, r5, r6, lsl #24
785 orr r6, r6, r7, lsl #24
787 orr r7, r7, ip, lsl #24
795 bge .Lcopyout_bad1_loop16
799 RETeq /* Return now if done */
804 .Lcopyout_bad1_loop4:
813 orr r4, r4, ip, lsr #24
815 orr r4, r4, ip, lsl #24
818 bge .Lcopyout_bad1_loop4
822 .Lcopyout_bad2_loop16:
834 orr r4, r4, r5, lsr #16
836 orr r5, r5, r6, lsr #16
838 orr r6, r6, r7, lsr #16
840 orr r7, r7, ip, lsr #16
842 orr r4, r4, r5, lsl #16
844 orr r5, r5, r6, lsl #16
846 orr r6, r6, r7, lsl #16
848 orr r7, r7, ip, lsl #16
856 bge .Lcopyout_bad2_loop16
860 RETeq /* Return now if done */
865 .Lcopyout_bad2_loop4:
874 orr r4, r4, ip, lsr #16
876 orr r4, r4, ip, lsl #16
879 bge .Lcopyout_bad2_loop4
883 .Lcopyout_bad3_loop16:
895 orr r4, r4, r5, lsr #8
897 orr r5, r5, r6, lsr #8
899 orr r6, r6, r7, lsr #8
901 orr r7, r7, ip, lsr #8
903 orr r4, r4, r5, lsl #8
905 orr r5, r5, r6, lsl #8
907 orr r6, r6, r7, lsl #8
909 orr r7, r7, ip, lsl #8
917 bge .Lcopyout_bad3_loop16
921 RETeq /* Return now if done */
926 .Lcopyout_bad3_loop4:
935 orr r4, r4, ip, lsr #8
937 orr r4, r4, ip, lsl #8
940 bge .Lcopyout_bad3_loop4
950 addne pc, pc, r2, lsl #3
953 strbt ip, [r1], #0x01
955 strbt ip, [r1], #0x01