1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
46 .word _C_LABEL(cpu_info)
49 .word _C_LABEL(__pcpu) + PC_CURPCB
53 * r0 = user space address
54 * r1 = kernel space address
57 * Copies bytes from user space to kernel space
62 movle pc, lr /* Bail early if length is <= 0 */
68 ldr r3, .L_min_memcpy_size
72 stmfd sp!, {r0-r2, r4, lr}
76 mov r3, #2 /* SRC_IS_USER */
81 ldmfd sp!, {r0-r2, r4, lr}
86 stmfd sp!, {r10-r11, lr}
89 /* XXX Probably not appropriate for non-Hydra SMPs */
91 bl _C_LABEL(cpu_number)
94 ldr r10, [r10, r0, lsl #2]
95 ldr r10, [r10, #CI_CURPCB]
102 adr ip, .Lcopyin_fault
103 ldr r11, [r10, #PCB_ONFAULT]
104 str ip, [r10, #PCB_ONFAULT]
106 str r11, [r10, #PCB_ONFAULT]
108 ldmfd sp!, {r10-r11, pc}
111 str r11, [r10, #PCB_ONFAULT]
113 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
114 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
115 ldmfd sp!, {r10-r11, pc}
119 /* Word-align the destination buffer */
120 ands ip, r1, #0x03 /* Already word aligned? */
121 beq .Lcopyin_wordaligned /* Yup */
123 cmp r2, ip /* Enough bytes left to align it? */
124 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
127 addne pc, pc, ip, lsl #3
129 ldrbt ip, [r0], #0x01
131 ldrbt ip, [r0], #0x01
133 ldrbt ip, [r0], #0x01
135 cmp r2, #0x00 /* All done? */
138 /* Destination buffer is now word aligned */
139 .Lcopyin_wordaligned:
140 ands ip, r0, #0x03 /* Is src also word-aligned? */
141 bne .Lcopyin_bad_align /* Nope. Things just got bad */
142 cmp r2, #0x08 /* Less than 8 bytes remaining? */
143 blt .Lcopyin_w_less_than8
145 /* Quad-align the destination buffer */
146 tst r1, #0x07 /* Already quad aligned? */
147 ldrnet ip, [r0], #0x04
148 strne ip, [r1], #0x04
150 stmfd sp!, {r4-r9} /* Free up some registers */
151 mov r3, #-1 /* Signal restore r4-r9 */
153 /* Destination buffer quad aligned, source is word aligned */
155 blt .Lcopyin_w_lessthan128
157 /* Copy 128 bytes at a time */
159 ldrt r4, [r0], #0x04 /* LD:00-03 */
160 ldrt r5, [r0], #0x04 /* LD:04-07 */
161 pld [r0, #0x18] /* Prefetch 0x20 */
162 ldrt r6, [r0], #0x04 /* LD:08-0b */
163 ldrt r7, [r0], #0x04 /* LD:0c-0f */
164 ldrt r8, [r0], #0x04 /* LD:10-13 */
165 ldrt r9, [r0], #0x04 /* LD:14-17 */
166 strd r4, [r1], #0x08 /* ST:00-07 */
167 ldrt r4, [r0], #0x04 /* LD:18-1b */
168 ldrt r5, [r0], #0x04 /* LD:1c-1f */
169 strd r6, [r1], #0x08 /* ST:08-0f */
170 ldrt r6, [r0], #0x04 /* LD:20-23 */
171 ldrt r7, [r0], #0x04 /* LD:24-27 */
172 pld [r0, #0x18] /* Prefetch 0x40 */
173 strd r8, [r1], #0x08 /* ST:10-17 */
174 ldrt r8, [r0], #0x04 /* LD:28-2b */
175 ldrt r9, [r0], #0x04 /* LD:2c-2f */
176 strd r4, [r1], #0x08 /* ST:18-1f */
177 ldrt r4, [r0], #0x04 /* LD:30-33 */
178 ldrt r5, [r0], #0x04 /* LD:34-37 */
179 strd r6, [r1], #0x08 /* ST:20-27 */
180 ldrt r6, [r0], #0x04 /* LD:38-3b */
181 ldrt r7, [r0], #0x04 /* LD:3c-3f */
182 strd r8, [r1], #0x08 /* ST:28-2f */
183 ldrt r8, [r0], #0x04 /* LD:40-43 */
184 ldrt r9, [r0], #0x04 /* LD:44-47 */
185 pld [r0, #0x18] /* Prefetch 0x60 */
186 strd r4, [r1], #0x08 /* ST:30-37 */
187 ldrt r4, [r0], #0x04 /* LD:48-4b */
188 ldrt r5, [r0], #0x04 /* LD:4c-4f */
189 strd r6, [r1], #0x08 /* ST:38-3f */
190 ldrt r6, [r0], #0x04 /* LD:50-53 */
191 ldrt r7, [r0], #0x04 /* LD:54-57 */
192 strd r8, [r1], #0x08 /* ST:40-47 */
193 ldrt r8, [r0], #0x04 /* LD:58-5b */
194 ldrt r9, [r0], #0x04 /* LD:5c-5f */
195 strd r4, [r1], #0x08 /* ST:48-4f */
196 ldrt r4, [r0], #0x04 /* LD:60-63 */
197 ldrt r5, [r0], #0x04 /* LD:64-67 */
198 pld [r0, #0x18] /* Prefetch 0x80 */
199 strd r6, [r1], #0x08 /* ST:50-57 */
200 ldrt r6, [r0], #0x04 /* LD:68-6b */
201 ldrt r7, [r0], #0x04 /* LD:6c-6f */
202 strd r8, [r1], #0x08 /* ST:58-5f */
203 ldrt r8, [r0], #0x04 /* LD:70-73 */
204 ldrt r9, [r0], #0x04 /* LD:74-77 */
205 strd r4, [r1], #0x08 /* ST:60-67 */
206 ldrt r4, [r0], #0x04 /* LD:78-7b */
207 ldrt r5, [r0], #0x04 /* LD:7c-7f */
208 strd r6, [r1], #0x08 /* ST:68-6f */
209 strd r8, [r1], #0x08 /* ST:70-77 */
211 strd r4, [r1], #0x08 /* ST:78-7f */
212 bge .Lcopyin_w_loop128
214 .Lcopyin_w_lessthan128:
215 adds r2, r2, #0x80 /* Adjust for extra sub */
219 blt .Lcopyin_w_lessthan32
221 /* Copy 32 bytes at a time */
237 bge .Lcopyin_w_loop32
239 .Lcopyin_w_lessthan32:
240 adds r2, r2, #0x20 /* Adjust for extra sub */
242 RETeq /* Return now if done */
247 add pc, pc, r5, lsl #1
250 /* At least 24 bytes remaining */
256 /* At least 16 bytes remaining */
262 /* At least 8 bytes remaining */
268 /* Less than 8 bytes remaining */
270 RETeq /* Return now if done */
273 .Lcopyin_w_less_than8:
275 ldrget ip, [r0], #0x04
276 strge ip, [r1], #0x04
277 RETeq /* Return now if done */
279 ldrbt ip, [r0], #0x01
281 ldrgebt r2, [r0], #0x01
284 strgeb r2, [r1], #0x01
289 * At this point, it has not been possible to word align both buffers.
290 * The destination buffer (r1) is word aligned, but the source buffer
303 .Lcopyin_bad1_loop16:
315 orr r4, r4, r5, lsr #24
317 orr r5, r5, r6, lsr #24
319 orr r6, r6, r7, lsr #24
321 orr r7, r7, ip, lsr #24
323 orr r4, r4, r5, lsl #24
325 orr r5, r5, r6, lsl #24
327 orr r6, r6, r7, lsl #24
329 orr r7, r7, ip, lsl #24
337 bge .Lcopyin_bad1_loop16
341 RETeq /* Return now if done */
355 orr r4, r4, ip, lsr #24
357 orr r4, r4, ip, lsl #24
360 bge .Lcopyin_bad1_loop4
364 .Lcopyin_bad2_loop16:
376 orr r4, r4, r5, lsr #16
378 orr r5, r5, r6, lsr #16
380 orr r6, r6, r7, lsr #16
382 orr r7, r7, ip, lsr #16
384 orr r4, r4, r5, lsl #16
386 orr r5, r5, r6, lsl #16
388 orr r6, r6, r7, lsl #16
390 orr r7, r7, ip, lsl #16
398 bge .Lcopyin_bad2_loop16
402 RETeq /* Return now if done */
416 orr r4, r4, ip, lsr #16
418 orr r4, r4, ip, lsl #16
421 bge .Lcopyin_bad2_loop4
425 .Lcopyin_bad3_loop16:
437 orr r4, r4, r5, lsr #8
439 orr r5, r5, r6, lsr #8
441 orr r6, r6, r7, lsr #8
443 orr r7, r7, ip, lsr #8
445 orr r4, r4, r5, lsl #8
447 orr r5, r5, r6, lsl #8
449 orr r6, r6, r7, lsl #8
451 orr r7, r7, ip, lsl #8
459 bge .Lcopyin_bad3_loop16
463 RETeq /* Return now if done */
477 orr r4, r4, ip, lsr #8
479 orr r4, r4, ip, lsl #8
482 bge .Lcopyin_bad3_loop4
492 addne pc, pc, r2, lsl #3
494 ldrbt ip, [r0], #0x01
496 ldrbt ip, [r0], #0x01
504 * r0 = kernel space address
505 * r1 = user space address
508 * Copies bytes from kernel space to user space
513 movle pc, lr /* Bail early if length is <= 0 */
515 ldr r3, .L_arm_memcpy
519 ldr r3, .L_min_memcpy_size
523 stmfd sp!, {r0-r2, r4, lr}
527 mov r3, #1 /* DST_IS_USER */
528 ldr r4, .L_arm_memcpy
532 ldmfd sp!, {r0-r2, r4, lr}
537 stmfd sp!, {r10-r11, lr}
539 #ifdef MULTIPROCESSOR
540 /* XXX Probably not appropriate for non-Hydra SMPs */
542 bl _C_LABEL(cpu_number)
545 ldr r10, [r10, r0, lsl #2]
546 ldr r10, [r10, #CI_CURPCB]
553 adr ip, .Lcopyout_fault
554 ldr r11, [r10, #PCB_ONFAULT]
555 str ip, [r10, #PCB_ONFAULT]
557 str r11, [r10, #PCB_ONFAULT]
559 ldmfd sp!, {r10-r11, pc}
562 str r11, [r10, #PCB_ONFAULT]
564 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
565 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
566 ldmfd sp!, {r10-r11, pc}
570 /* Word-align the destination buffer */
571 ands ip, r1, #0x03 /* Already word aligned? */
572 beq .Lcopyout_wordaligned /* Yup */
574 cmp r2, ip /* Enough bytes left to align it? */
575 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
578 addne pc, pc, ip, lsl #3
581 strbt ip, [r1], #0x01
583 strbt ip, [r1], #0x01
585 strbt ip, [r1], #0x01
586 cmp r2, #0x00 /* All done? */
589 /* Destination buffer is now word aligned */
590 .Lcopyout_wordaligned:
591 ands ip, r0, #0x03 /* Is src also word-aligned? */
592 bne .Lcopyout_bad_align /* Nope. Things just got bad */
593 cmp r2, #0x08 /* Less than 8 bytes remaining? */
594 blt .Lcopyout_w_less_than8
596 /* Quad-align the destination buffer */
597 tst r0, #0x07 /* Already quad aligned? */
598 ldrne ip, [r0], #0x04
600 strnet ip, [r1], #0x04
602 stmfd sp!, {r4-r9} /* Free up some registers */
603 mov r3, #-1 /* Signal restore r4-r9 */
605 /* Destination buffer word aligned, source is quad aligned */
607 blt .Lcopyout_w_lessthan128
609 /* Copy 128 bytes at a time */
611 ldrd r4, [r0], #0x08 /* LD:00-07 */
612 pld [r0, #0x18] /* Prefetch 0x20 */
613 ldrd r6, [r0], #0x08 /* LD:08-0f */
614 ldrd r8, [r0], #0x08 /* LD:10-17 */
615 strt r4, [r1], #0x04 /* ST:00-03 */
616 strt r5, [r1], #0x04 /* ST:04-07 */
617 ldrd r4, [r0], #0x08 /* LD:18-1f */
618 strt r6, [r1], #0x04 /* ST:08-0b */
619 strt r7, [r1], #0x04 /* ST:0c-0f */
620 ldrd r6, [r0], #0x08 /* LD:20-27 */
621 pld [r0, #0x18] /* Prefetch 0x40 */
622 strt r8, [r1], #0x04 /* ST:10-13 */
623 strt r9, [r1], #0x04 /* ST:14-17 */
624 ldrd r8, [r0], #0x08 /* LD:28-2f */
625 strt r4, [r1], #0x04 /* ST:18-1b */
626 strt r5, [r1], #0x04 /* ST:1c-1f */
627 ldrd r4, [r0], #0x08 /* LD:30-37 */
628 strt r6, [r1], #0x04 /* ST:20-23 */
629 strt r7, [r1], #0x04 /* ST:24-27 */
630 ldrd r6, [r0], #0x08 /* LD:38-3f */
631 strt r8, [r1], #0x04 /* ST:28-2b */
632 strt r9, [r1], #0x04 /* ST:2c-2f */
633 ldrd r8, [r0], #0x08 /* LD:40-47 */
634 pld [r0, #0x18] /* Prefetch 0x60 */
635 strt r4, [r1], #0x04 /* ST:30-33 */
636 strt r5, [r1], #0x04 /* ST:34-37 */
637 ldrd r4, [r0], #0x08 /* LD:48-4f */
638 strt r6, [r1], #0x04 /* ST:38-3b */
639 strt r7, [r1], #0x04 /* ST:3c-3f */
640 ldrd r6, [r0], #0x08 /* LD:50-57 */
641 strt r8, [r1], #0x04 /* ST:40-43 */
642 strt r9, [r1], #0x04 /* ST:44-47 */
643 ldrd r8, [r0], #0x08 /* LD:58-4f */
644 strt r4, [r1], #0x04 /* ST:48-4b */
645 strt r5, [r1], #0x04 /* ST:4c-4f */
646 ldrd r4, [r0], #0x08 /* LD:60-67 */
647 pld [r0, #0x18] /* Prefetch 0x80 */
648 strt r6, [r1], #0x04 /* ST:50-53 */
649 strt r7, [r1], #0x04 /* ST:54-57 */
650 ldrd r6, [r0], #0x08 /* LD:68-6f */
651 strt r8, [r1], #0x04 /* ST:58-5b */
652 strt r9, [r1], #0x04 /* ST:5c-5f */
653 ldrd r8, [r0], #0x08 /* LD:70-77 */
654 strt r4, [r1], #0x04 /* ST:60-63 */
655 strt r5, [r1], #0x04 /* ST:64-67 */
656 ldrd r4, [r0], #0x08 /* LD:78-7f */
657 strt r6, [r1], #0x04 /* ST:68-6b */
658 strt r7, [r1], #0x04 /* ST:6c-6f */
659 strt r8, [r1], #0x04 /* ST:70-73 */
660 strt r9, [r1], #0x04 /* ST:74-77 */
662 strt r4, [r1], #0x04 /* ST:78-7b */
663 strt r5, [r1], #0x04 /* ST:7c-7f */
664 bge .Lcopyout_w_loop128
666 .Lcopyout_w_lessthan128:
667 adds r2, r2, #0x80 /* Adjust for extra sub */
669 RETeq /* Return now if done */
671 blt .Lcopyout_w_lessthan32
673 /* Copy 32 bytes at a time */
689 bge .Lcopyout_w_loop32
691 .Lcopyout_w_lessthan32:
692 adds r2, r2, #0x20 /* Adjust for extra sub */
694 RETeq /* Return now if done */
699 add pc, pc, r5, lsl #1
702 /* At least 24 bytes remaining */
708 /* At least 16 bytes remaining */
714 /* At least 8 bytes remaining */
720 /* Less than 8 bytes remaining */
722 RETeq /* Return now if done */
725 .Lcopyout_w_less_than8:
727 ldrge ip, [r0], #0x04
728 strget ip, [r1], #0x04
729 RETeq /* Return now if done */
733 ldrgeb r2, [r0], #0x01
734 strbt ip, [r1], #0x01
736 strgebt r2, [r1], #0x01
741 * At this point, it has not been possible to word align both buffers.
742 * The destination buffer (r1) is word aligned, but the source buffer
755 .Lcopyout_bad1_loop16:
767 orr r4, r4, r5, lsr #24
769 orr r5, r5, r6, lsr #24
771 orr r6, r6, r7, lsr #24
773 orr r7, r7, ip, lsr #24
775 orr r4, r4, r5, lsl #24
777 orr r5, r5, r6, lsl #24
779 orr r6, r6, r7, lsl #24
781 orr r7, r7, ip, lsl #24
789 bge .Lcopyout_bad1_loop16
793 RETeq /* Return now if done */
798 .Lcopyout_bad1_loop4:
807 orr r4, r4, ip, lsr #24
809 orr r4, r4, ip, lsl #24
812 bge .Lcopyout_bad1_loop4
816 .Lcopyout_bad2_loop16:
828 orr r4, r4, r5, lsr #16
830 orr r5, r5, r6, lsr #16
832 orr r6, r6, r7, lsr #16
834 orr r7, r7, ip, lsr #16
836 orr r4, r4, r5, lsl #16
838 orr r5, r5, r6, lsl #16
840 orr r6, r6, r7, lsl #16
842 orr r7, r7, ip, lsl #16
850 bge .Lcopyout_bad2_loop16
854 RETeq /* Return now if done */
859 .Lcopyout_bad2_loop4:
868 orr r4, r4, ip, lsr #16
870 orr r4, r4, ip, lsl #16
873 bge .Lcopyout_bad2_loop4
877 .Lcopyout_bad3_loop16:
889 orr r4, r4, r5, lsr #8
891 orr r5, r5, r6, lsr #8
893 orr r6, r6, r7, lsr #8
895 orr r7, r7, ip, lsr #8
897 orr r4, r4, r5, lsl #8
899 orr r5, r5, r6, lsl #8
901 orr r6, r6, r7, lsl #8
903 orr r7, r7, ip, lsl #8
911 bge .Lcopyout_bad3_loop16
915 RETeq /* Return now if done */
920 .Lcopyout_bad3_loop4:
929 orr r4, r4, ip, lsr #8
931 orr r4, r4, ip, lsl #8
934 bge .Lcopyout_bad3_loop4
944 addne pc, pc, r2, lsl #3
947 strbt ip, [r1], #0x01
949 strbt ip, [r1], #0x01