1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
46 .word _C_LABEL(cpu_info)
49 .word _C_LABEL(__pcpu) + PC_CURPCB
53 * r0 = user space address
54 * r1 = kernel space address
57 * Copies bytes from user space to kernel space
62 movle pc, lr /* Bail early if length is <= 0 */
64 stmfd sp!, {r10-r11, lr}
67 /* XXX Probably not appropriate for non-Hydra SMPs */
69 bl _C_LABEL(cpu_number)
72 ldr r10, [r10, r0, lsl #2]
73 ldr r10, [r10, #CI_CURPCB]
80 adr ip, .Lcopyin_fault
81 ldr r11, [r10, #PCB_ONFAULT]
82 str ip, [r10, #PCB_ONFAULT]
84 str r11, [r10, #PCB_ONFAULT]
86 ldmfd sp!, {r10-r11, pc}
89 str r11, [r10, #PCB_ONFAULT]
91 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
92 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
93 ldmfd sp!, {r10-r11, pc}
97 /* Word-align the destination buffer */
98 ands ip, r1, #0x03 /* Already word aligned? */
99 beq .Lcopyin_wordaligned /* Yup */
101 cmp r2, ip /* Enough bytes left to align it? */
102 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
105 addne pc, pc, ip, lsl #3
107 ldrbt ip, [r0], #0x01
109 ldrbt ip, [r0], #0x01
111 ldrbt ip, [r0], #0x01
113 cmp r2, #0x00 /* All done? */
116 /* Destination buffer is now word aligned */
117 .Lcopyin_wordaligned:
118 ands ip, r0, #0x03 /* Is src also word-aligned? */
119 bne .Lcopyin_bad_align /* Nope. Things just got bad */
120 cmp r2, #0x08 /* Less than 8 bytes remaining? */
121 blt .Lcopyin_w_less_than8
123 /* Quad-align the destination buffer */
124 tst r1, #0x07 /* Already quad aligned? */
125 ldrnet ip, [r0], #0x04
126 stmfd sp!, {r4-r9} /* Free up some registers */
127 mov r3, #-1 /* Signal restore r4-r9 */
128 tst r1, #0x07 /* XXX: bug work-around */
130 strne ip, [r1], #0x04
132 /* Destination buffer quad aligned, source is word aligned */
134 blt .Lcopyin_w_lessthan128
136 /* Copy 128 bytes at a time */
138 ldrt r4, [r0], #0x04 /* LD:00-03 */
139 ldrt r5, [r0], #0x04 /* LD:04-07 */
140 pld [r0, #0x18] /* Prefetch 0x20 */
141 ldrt r6, [r0], #0x04 /* LD:08-0b */
142 ldrt r7, [r0], #0x04 /* LD:0c-0f */
143 ldrt r8, [r0], #0x04 /* LD:10-13 */
144 ldrt r9, [r0], #0x04 /* LD:14-17 */
145 strd r4, [r1], #0x08 /* ST:00-07 */
146 ldrt r4, [r0], #0x04 /* LD:18-1b */
147 ldrt r5, [r0], #0x04 /* LD:1c-1f */
148 strd r6, [r1], #0x08 /* ST:08-0f */
149 ldrt r6, [r0], #0x04 /* LD:20-23 */
150 ldrt r7, [r0], #0x04 /* LD:24-27 */
151 pld [r0, #0x18] /* Prefetch 0x40 */
152 strd r8, [r1], #0x08 /* ST:10-17 */
153 ldrt r8, [r0], #0x04 /* LD:28-2b */
154 ldrt r9, [r0], #0x04 /* LD:2c-2f */
155 strd r4, [r1], #0x08 /* ST:18-1f */
156 ldrt r4, [r0], #0x04 /* LD:30-33 */
157 ldrt r5, [r0], #0x04 /* LD:34-37 */
158 strd r6, [r1], #0x08 /* ST:20-27 */
159 ldrt r6, [r0], #0x04 /* LD:38-3b */
160 ldrt r7, [r0], #0x04 /* LD:3c-3f */
161 strd r8, [r1], #0x08 /* ST:28-2f */
162 ldrt r8, [r0], #0x04 /* LD:40-43 */
163 ldrt r9, [r0], #0x04 /* LD:44-47 */
164 pld [r0, #0x18] /* Prefetch 0x60 */
165 strd r4, [r1], #0x08 /* ST:30-37 */
166 ldrt r4, [r0], #0x04 /* LD:48-4b */
167 ldrt r5, [r0], #0x04 /* LD:4c-4f */
168 strd r6, [r1], #0x08 /* ST:38-3f */
169 ldrt r6, [r0], #0x04 /* LD:50-53 */
170 ldrt r7, [r0], #0x04 /* LD:54-57 */
171 strd r8, [r1], #0x08 /* ST:40-47 */
172 ldrt r8, [r0], #0x04 /* LD:58-5b */
173 ldrt r9, [r0], #0x04 /* LD:5c-5f */
174 strd r4, [r1], #0x08 /* ST:48-4f */
175 ldrt r4, [r0], #0x04 /* LD:60-63 */
176 ldrt r5, [r0], #0x04 /* LD:64-67 */
177 pld [r0, #0x18] /* Prefetch 0x80 */
178 strd r6, [r1], #0x08 /* ST:50-57 */
179 ldrt r6, [r0], #0x04 /* LD:68-6b */
180 ldrt r7, [r0], #0x04 /* LD:6c-6f */
181 strd r8, [r1], #0x08 /* ST:58-5f */
182 ldrt r8, [r0], #0x04 /* LD:70-73 */
183 ldrt r9, [r0], #0x04 /* LD:74-77 */
184 strd r4, [r1], #0x08 /* ST:60-67 */
185 ldrt r4, [r0], #0x04 /* LD:78-7b */
186 ldrt r5, [r0], #0x04 /* LD:7c-7f */
187 strd r6, [r1], #0x08 /* ST:68-6f */
188 strd r8, [r1], #0x08 /* ST:70-77 */
190 strd r4, [r1], #0x08 /* ST:78-7f */
191 bge .Lcopyin_w_loop128
193 .Lcopyin_w_lessthan128:
194 adds r2, r2, #0x80 /* Adjust for extra sub */
198 blt .Lcopyin_w_lessthan32
200 /* Copy 32 bytes at a time */
216 bge .Lcopyin_w_loop32
218 .Lcopyin_w_lessthan32:
219 adds r2, r2, #0x20 /* Adjust for extra sub */
221 RETeq /* Return now if done */
226 add pc, pc, r5, lsl #1
229 /* At least 24 bytes remaining */
235 /* At least 16 bytes remaining */
241 /* At least 8 bytes remaining */
247 /* Less than 8 bytes remaining */
249 RETeq /* Return now if done */
252 .Lcopyin_w_less_than8:
254 ldrget ip, [r0], #0x04
255 strge ip, [r1], #0x04
256 RETeq /* Return now if done */
258 ldrbt ip, [r0], #0x01
260 ldrgebt r2, [r0], #0x01
263 strgeb r2, [r1], #0x01
268 * At this point, it has not been possible to word align both buffers.
269 * The destination buffer (r1) is word aligned, but the source buffer
282 .Lcopyin_bad1_loop16:
294 orr r4, r4, r5, lsr #24
296 orr r5, r5, r6, lsr #24
298 orr r6, r6, r7, lsr #24
300 orr r7, r7, ip, lsr #24
302 orr r4, r4, r5, lsl #24
304 orr r5, r5, r6, lsl #24
306 orr r6, r6, r7, lsl #24
308 orr r7, r7, ip, lsl #24
316 bge .Lcopyin_bad1_loop16
320 RETeq /* Return now if done */
334 orr r4, r4, ip, lsr #24
336 orr r4, r4, ip, lsl #24
339 bge .Lcopyin_bad1_loop4
343 .Lcopyin_bad2_loop16:
355 orr r4, r4, r5, lsr #16
357 orr r5, r5, r6, lsr #16
359 orr r6, r6, r7, lsr #16
361 orr r7, r7, ip, lsr #16
363 orr r4, r4, r5, lsl #16
365 orr r5, r5, r6, lsl #16
367 orr r6, r6, r7, lsl #16
369 orr r7, r7, ip, lsl #16
377 bge .Lcopyin_bad2_loop16
381 RETeq /* Return now if done */
395 orr r4, r4, ip, lsr #16
397 orr r4, r4, ip, lsl #16
400 bge .Lcopyin_bad2_loop4
404 .Lcopyin_bad3_loop16:
416 orr r4, r4, r5, lsr #8
418 orr r5, r5, r6, lsr #8
420 orr r6, r6, r7, lsr #8
422 orr r7, r7, ip, lsr #8
424 orr r4, r4, r5, lsl #8
426 orr r5, r5, r6, lsl #8
428 orr r6, r6, r7, lsl #8
430 orr r7, r7, ip, lsl #8
438 bge .Lcopyin_bad3_loop16
442 RETeq /* Return now if done */
456 orr r4, r4, ip, lsr #8
458 orr r4, r4, ip, lsl #8
461 bge .Lcopyin_bad3_loop4
471 addne pc, pc, r2, lsl #3
473 ldrbt ip, [r0], #0x01
475 ldrbt ip, [r0], #0x01
483 * r0 = kernel space address
484 * r1 = user space address
487 * Copies bytes from kernel space to user space
492 movle pc, lr /* Bail early if length is <= 0 */
494 stmfd sp!, {r10-r11, lr}
496 #ifdef MULTIPROCESSOR
497 /* XXX Probably not appropriate for non-Hydra SMPs */
499 bl _C_LABEL(cpu_number)
502 ldr r10, [r10, r0, lsl #2]
503 ldr r10, [r10, #CI_CURPCB]
510 adr ip, .Lcopyout_fault
511 ldr r11, [r10, #PCB_ONFAULT]
512 str ip, [r10, #PCB_ONFAULT]
514 str r11, [r10, #PCB_ONFAULT]
516 ldmfd sp!, {r10-r11, pc}
519 str r11, [r10, #PCB_ONFAULT]
521 ldmgtfd sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
522 ldmltfd sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
523 ldmfd sp!, {r10-r11, pc}
527 /* Word-align the destination buffer */
528 ands ip, r1, #0x03 /* Already word aligned? */
529 beq .Lcopyout_wordaligned /* Yup */
531 cmp r2, ip /* Enough bytes left to align it? */
532 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
535 addne pc, pc, ip, lsl #3
538 strbt ip, [r1], #0x01
540 strbt ip, [r1], #0x01
542 strbt ip, [r1], #0x01
543 cmp r2, #0x00 /* All done? */
546 /* Destination buffer is now word aligned */
547 .Lcopyout_wordaligned:
548 ands ip, r0, #0x03 /* Is src also word-aligned? */
549 bne .Lcopyout_bad_align /* Nope. Things just got bad */
550 cmp r2, #0x08 /* Less than 8 bytes remaining? */
551 blt .Lcopyout_w_less_than8
553 /* Quad-align the destination buffer */
554 tst r1, #0x07 /* Already quad aligned? */
555 ldrne ip, [r0], #0x04
556 stmfd sp!, {r4-r9} /* Free up some registers */
557 mov r3, #-1 /* Signal restore r4-r9 */
558 tst r1, #0x07 /* XXX: bug work-around */
560 strnet ip, [r1], #0x04
562 /* Destination buffer quad aligned, source is word aligned */
564 blt .Lcopyout_w_lessthan128
566 /* Copy 128 bytes at a time */
568 ldr r4, [r0], #0x04 /* LD:00-03 */
569 ldr r5, [r0], #0x04 /* LD:04-07 */
570 pld [r0, #0x18] /* Prefetch 0x20 */
571 ldr r6, [r0], #0x04 /* LD:08-0b */
572 ldr r7, [r0], #0x04 /* LD:0c-0f */
573 ldr r8, [r0], #0x04 /* LD:10-13 */
574 ldr r9, [r0], #0x04 /* LD:14-17 */
575 strt r4, [r1], #0x04 /* ST:00-03 */
576 strt r5, [r1], #0x04 /* ST:04-07 */
577 ldr r4, [r0], #0x04 /* LD:18-1b */
578 ldr r5, [r0], #0x04 /* LD:1c-1f */
579 strt r6, [r1], #0x04 /* ST:08-0b */
580 strt r7, [r1], #0x04 /* ST:0c-0f */
581 ldr r6, [r0], #0x04 /* LD:20-23 */
582 ldr r7, [r0], #0x04 /* LD:24-27 */
583 pld [r0, #0x18] /* Prefetch 0x40 */
584 strt r8, [r1], #0x04 /* ST:10-13 */
585 strt r9, [r1], #0x04 /* ST:14-17 */
586 ldr r8, [r0], #0x04 /* LD:28-2b */
587 ldr r9, [r0], #0x04 /* LD:2c-2f */
588 strt r4, [r1], #0x04 /* ST:18-1b */
589 strt r5, [r1], #0x04 /* ST:1c-1f */
590 ldr r4, [r0], #0x04 /* LD:30-33 */
591 ldr r5, [r0], #0x04 /* LD:34-37 */
592 strt r6, [r1], #0x04 /* ST:20-23 */
593 strt r7, [r1], #0x04 /* ST:24-27 */
594 ldr r6, [r0], #0x04 /* LD:38-3b */
595 ldr r7, [r0], #0x04 /* LD:3c-3f */
596 strt r8, [r1], #0x04 /* ST:28-2b */
597 strt r9, [r1], #0x04 /* ST:2c-2f */
598 ldr r8, [r0], #0x04 /* LD:40-43 */
599 ldr r9, [r0], #0x04 /* LD:44-47 */
600 pld [r0, #0x18] /* Prefetch 0x60 */
601 strt r4, [r1], #0x04 /* ST:30-33 */
602 strt r5, [r1], #0x04 /* ST:34-37 */
603 ldr r4, [r0], #0x04 /* LD:48-4b */
604 ldr r5, [r0], #0x04 /* LD:4c-4f */
605 strt r6, [r1], #0x04 /* ST:38-3b */
606 strt r7, [r1], #0x04 /* ST:3c-3f */
607 ldr r6, [r0], #0x04 /* LD:50-53 */
608 ldr r7, [r0], #0x04 /* LD:54-57 */
609 strt r8, [r1], #0x04 /* ST:40-43 */
610 strt r9, [r1], #0x04 /* ST:44-47 */
611 ldr r8, [r0], #0x04 /* LD:58-5b */
612 ldr r9, [r0], #0x04 /* LD:5c-5f */
613 strt r4, [r1], #0x04 /* ST:48-4b */
614 strt r5, [r1], #0x04 /* ST:4c-4f */
615 ldr r4, [r0], #0x04 /* LD:60-63 */
616 ldr r5, [r0], #0x04 /* LD:64-67 */
617 pld [r0, #0x18] /* Prefetch 0x80 */
618 strt r6, [r1], #0x04 /* ST:50-53 */
619 strt r7, [r1], #0x04 /* ST:54-57 */
620 ldr r6, [r0], #0x04 /* LD:68-6b */
621 ldr r7, [r0], #0x04 /* LD:6c-6f */
622 strt r8, [r1], #0x04 /* ST:58-5b */
623 strt r9, [r1], #0x04 /* ST:5c-5f */
624 ldr r8, [r0], #0x04 /* LD:70-73 */
625 ldr r9, [r0], #0x04 /* LD:74-77 */
626 strt r4, [r1], #0x04 /* ST:60-63 */
627 strt r5, [r1], #0x04 /* ST:64-67 */
628 ldr r4, [r0], #0x04 /* LD:78-7b */
629 ldr r5, [r0], #0x04 /* LD:7c-7f */
630 strt r6, [r1], #0x04 /* ST:68-6b */
631 strt r7, [r1], #0x04 /* ST:6c-6f */
632 strt r8, [r1], #0x04 /* ST:70-73 */
633 strt r9, [r1], #0x04 /* ST:74-77 */
635 strt r4, [r1], #0x04 /* ST:78-7b */
636 strt r5, [r1], #0x04 /* ST:7c-7f */
637 bge .Lcopyout_w_loop128
639 .Lcopyout_w_lessthan128:
640 adds r2, r2, #0x80 /* Adjust for extra sub */
642 RETeq /* Return now if done */
644 blt .Lcopyout_w_lessthan32
646 /* Copy 32 bytes at a time */
666 bge .Lcopyout_w_loop32
668 .Lcopyout_w_lessthan32:
669 adds r2, r2, #0x20 /* Adjust for extra sub */
671 RETeq /* Return now if done */
676 add pc, pc, r5, lsl #1
679 /* At least 24 bytes remaining */
685 /* At least 16 bytes remaining */
691 /* At least 8 bytes remaining */
697 /* Less than 8 bytes remaining */
699 RETeq /* Return now if done */
702 .Lcopyout_w_less_than8:
704 ldrge ip, [r0], #0x04
705 strget ip, [r1], #0x04
706 RETeq /* Return now if done */
710 ldrgeb r2, [r0], #0x01
711 strbt ip, [r1], #0x01
713 strgebt r2, [r1], #0x01
718 * At this point, it has not been possible to word align both buffers.
719 * The destination buffer (r1) is word aligned, but the source buffer
732 .Lcopyout_bad1_loop16:
744 orr r4, r4, r5, lsr #24
746 orr r5, r5, r6, lsr #24
748 orr r6, r6, r7, lsr #24
750 orr r7, r7, ip, lsr #24
752 orr r4, r4, r5, lsl #24
754 orr r5, r5, r6, lsl #24
756 orr r6, r6, r7, lsl #24
758 orr r7, r7, ip, lsl #24
766 bge .Lcopyout_bad1_loop16
770 RETeq /* Return now if done */
775 .Lcopyout_bad1_loop4:
784 orr r4, r4, ip, lsr #24
786 orr r4, r4, ip, lsl #24
789 bge .Lcopyout_bad1_loop4
793 .Lcopyout_bad2_loop16:
805 orr r4, r4, r5, lsr #16
807 orr r5, r5, r6, lsr #16
809 orr r6, r6, r7, lsr #16
811 orr r7, r7, ip, lsr #16
813 orr r4, r4, r5, lsl #16
815 orr r5, r5, r6, lsl #16
817 orr r6, r6, r7, lsl #16
819 orr r7, r7, ip, lsl #16
827 bge .Lcopyout_bad2_loop16
831 RETeq /* Return now if done */
836 .Lcopyout_bad2_loop4:
845 orr r4, r4, ip, lsr #16
847 orr r4, r4, ip, lsl #16
850 bge .Lcopyout_bad2_loop4
854 .Lcopyout_bad3_loop16:
866 orr r4, r4, r5, lsr #8
868 orr r5, r5, r6, lsr #8
870 orr r6, r6, r7, lsr #8
872 orr r7, r7, ip, lsr #8
874 orr r4, r4, r5, lsl #8
876 orr r5, r5, r6, lsl #8
878 orr r6, r6, r7, lsl #8
880 orr r7, r7, ip, lsl #8
888 bge .Lcopyout_bad3_loop16
892 RETeq /* Return now if done */
897 .Lcopyout_bad3_loop4:
906 orr r4, r4, ip, lsr #8
908 orr r4, r4, ip, lsl #8
911 bge .Lcopyout_bad3_loop4
921 addne pc, pc, r2, lsl #3
924 strbt ip, [r1], #0x01
926 strbt ip, [r1], #0x01