1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
46 #define GET_PCB(tmp) \
47 mrc p15, 0, tmp, c13, c0, 4; \
48 add tmp, tmp, #(TD_PCB)
51 .word _C_LABEL(__pcpu) + PC_CURPCB
52 #define GET_PCB(tmp) \
57 * r0 = user space address
58 * r1 = kernel space address
61 * Copies bytes from user space to kernel space
66 movle pc, lr /* Bail early if length is <= 0 */
72 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
81 ldr r3, .L_min_memcpy_size
85 stmfd sp!, {r0-r2, r4, lr}
89 mov r3, #2 /* SRC_IS_USER */
94 ldmfd sp!, {r0-r2, r4, lr}
99 stmfd sp!, {r10-r11, lr}
105 adr ip, .Lcopyin_fault
106 ldr r11, [r10, #PCB_ONFAULT]
107 str ip, [r10, #PCB_ONFAULT]
109 str r11, [r10, #PCB_ONFAULT]
111 ldmfd sp!, {r10-r11, pc}
115 str r11, [r10, #PCB_ONFAULT]
117 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
118 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
119 ldmfd sp!, {r10-r11, pc}
123 /* Word-align the destination buffer */
124 ands ip, r1, #0x03 /* Already word aligned? */
125 beq .Lcopyin_wordaligned /* Yup */
127 cmp r2, ip /* Enough bytes left to align it? */
128 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
131 addne pc, pc, ip, lsl #3
133 ldrbt ip, [r0], #0x01
135 ldrbt ip, [r0], #0x01
137 ldrbt ip, [r0], #0x01
139 cmp r2, #0x00 /* All done? */
142 /* Destination buffer is now word aligned */
143 .Lcopyin_wordaligned:
144 ands ip, r0, #0x03 /* Is src also word-aligned? */
145 bne .Lcopyin_bad_align /* Nope. Things just got bad */
146 cmp r2, #0x08 /* Less than 8 bytes remaining? */
147 blt .Lcopyin_w_less_than8
149 /* Quad-align the destination buffer */
150 tst r1, #0x07 /* Already quad aligned? */
151 ldrtne ip, [r0], #0x04
152 strne ip, [r1], #0x04
154 stmfd sp!, {r4-r9} /* Free up some registers */
155 mov r3, #-1 /* Signal restore r4-r9 */
157 /* Destination buffer quad aligned, source is word aligned */
159 blt .Lcopyin_w_lessthan128
161 /* Copy 128 bytes at a time */
163 ldrt r4, [r0], #0x04 /* LD:00-03 */
164 ldrt r5, [r0], #0x04 /* LD:04-07 */
165 pld [r0, #0x18] /* Prefetch 0x20 */
166 ldrt r6, [r0], #0x04 /* LD:08-0b */
167 ldrt r7, [r0], #0x04 /* LD:0c-0f */
168 ldrt r8, [r0], #0x04 /* LD:10-13 */
169 ldrt r9, [r0], #0x04 /* LD:14-17 */
170 strd r4, [r1], #0x08 /* ST:00-07 */
171 ldrt r4, [r0], #0x04 /* LD:18-1b */
172 ldrt r5, [r0], #0x04 /* LD:1c-1f */
173 strd r6, [r1], #0x08 /* ST:08-0f */
174 ldrt r6, [r0], #0x04 /* LD:20-23 */
175 ldrt r7, [r0], #0x04 /* LD:24-27 */
176 pld [r0, #0x18] /* Prefetch 0x40 */
177 strd r8, [r1], #0x08 /* ST:10-17 */
178 ldrt r8, [r0], #0x04 /* LD:28-2b */
179 ldrt r9, [r0], #0x04 /* LD:2c-2f */
180 strd r4, [r1], #0x08 /* ST:18-1f */
181 ldrt r4, [r0], #0x04 /* LD:30-33 */
182 ldrt r5, [r0], #0x04 /* LD:34-37 */
183 strd r6, [r1], #0x08 /* ST:20-27 */
184 ldrt r6, [r0], #0x04 /* LD:38-3b */
185 ldrt r7, [r0], #0x04 /* LD:3c-3f */
186 strd r8, [r1], #0x08 /* ST:28-2f */
187 ldrt r8, [r0], #0x04 /* LD:40-43 */
188 ldrt r9, [r0], #0x04 /* LD:44-47 */
189 pld [r0, #0x18] /* Prefetch 0x60 */
190 strd r4, [r1], #0x08 /* ST:30-37 */
191 ldrt r4, [r0], #0x04 /* LD:48-4b */
192 ldrt r5, [r0], #0x04 /* LD:4c-4f */
193 strd r6, [r1], #0x08 /* ST:38-3f */
194 ldrt r6, [r0], #0x04 /* LD:50-53 */
195 ldrt r7, [r0], #0x04 /* LD:54-57 */
196 strd r8, [r1], #0x08 /* ST:40-47 */
197 ldrt r8, [r0], #0x04 /* LD:58-5b */
198 ldrt r9, [r0], #0x04 /* LD:5c-5f */
199 strd r4, [r1], #0x08 /* ST:48-4f */
200 ldrt r4, [r0], #0x04 /* LD:60-63 */
201 ldrt r5, [r0], #0x04 /* LD:64-67 */
202 pld [r0, #0x18] /* Prefetch 0x80 */
203 strd r6, [r1], #0x08 /* ST:50-57 */
204 ldrt r6, [r0], #0x04 /* LD:68-6b */
205 ldrt r7, [r0], #0x04 /* LD:6c-6f */
206 strd r8, [r1], #0x08 /* ST:58-5f */
207 ldrt r8, [r0], #0x04 /* LD:70-73 */
208 ldrt r9, [r0], #0x04 /* LD:74-77 */
209 strd r4, [r1], #0x08 /* ST:60-67 */
210 ldrt r4, [r0], #0x04 /* LD:78-7b */
211 ldrt r5, [r0], #0x04 /* LD:7c-7f */
212 strd r6, [r1], #0x08 /* ST:68-6f */
213 strd r8, [r1], #0x08 /* ST:70-77 */
215 strd r4, [r1], #0x08 /* ST:78-7f */
216 bge .Lcopyin_w_loop128
218 .Lcopyin_w_lessthan128:
219 adds r2, r2, #0x80 /* Adjust for extra sub */
223 blt .Lcopyin_w_lessthan32
225 /* Copy 32 bytes at a time */
241 bge .Lcopyin_w_loop32
243 .Lcopyin_w_lessthan32:
244 adds r2, r2, #0x20 /* Adjust for extra sub */
246 RETeq /* Return now if done */
251 add pc, pc, r5, lsl #1
254 /* At least 24 bytes remaining */
260 /* At least 16 bytes remaining */
266 /* At least 8 bytes remaining */
272 /* Less than 8 bytes remaining */
274 RETeq /* Return now if done */
277 .Lcopyin_w_less_than8:
279 ldrtge ip, [r0], #0x04
280 strge ip, [r1], #0x04
281 RETeq /* Return now if done */
283 ldrbt ip, [r0], #0x01
285 ldrbtge r2, [r0], #0x01
288 strbge r2, [r1], #0x01
293 * At this point, it has not been possible to word align both buffers.
294 * The destination buffer (r1) is word aligned, but the source buffer
307 .Lcopyin_bad1_loop16:
319 orr r4, r4, r5, lsr #24
321 orr r5, r5, r6, lsr #24
323 orr r6, r6, r7, lsr #24
325 orr r7, r7, ip, lsr #24
327 orr r4, r4, r5, lsl #24
329 orr r5, r5, r6, lsl #24
331 orr r6, r6, r7, lsl #24
333 orr r7, r7, ip, lsl #24
341 bge .Lcopyin_bad1_loop16
345 RETeq /* Return now if done */
359 orr r4, r4, ip, lsr #24
361 orr r4, r4, ip, lsl #24
364 bge .Lcopyin_bad1_loop4
368 .Lcopyin_bad2_loop16:
380 orr r4, r4, r5, lsr #16
382 orr r5, r5, r6, lsr #16
384 orr r6, r6, r7, lsr #16
386 orr r7, r7, ip, lsr #16
388 orr r4, r4, r5, lsl #16
390 orr r5, r5, r6, lsl #16
392 orr r6, r6, r7, lsl #16
394 orr r7, r7, ip, lsl #16
402 bge .Lcopyin_bad2_loop16
406 RETeq /* Return now if done */
420 orr r4, r4, ip, lsr #16
422 orr r4, r4, ip, lsl #16
425 bge .Lcopyin_bad2_loop4
429 .Lcopyin_bad3_loop16:
441 orr r4, r4, r5, lsr #8
443 orr r5, r5, r6, lsr #8
445 orr r6, r6, r7, lsr #8
447 orr r7, r7, ip, lsr #8
449 orr r4, r4, r5, lsl #8
451 orr r5, r5, r6, lsl #8
453 orr r6, r6, r7, lsl #8
455 orr r7, r7, ip, lsl #8
463 bge .Lcopyin_bad3_loop16
467 RETeq /* Return now if done */
481 orr r4, r4, ip, lsr #8
483 orr r4, r4, ip, lsl #8
486 bge .Lcopyin_bad3_loop4
496 addne pc, pc, r2, lsl #3
498 ldrbt ip, [r0], #0x01
500 ldrbt ip, [r0], #0x01
508 * r0 = kernel space address
509 * r1 = user space address
512 * Copies bytes from kernel space to user space
517 movle pc, lr /* Bail early if length is <= 0 */
523 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
528 ldr r3, .L_arm_memcpy
532 ldr r3, .L_min_memcpy_size
536 stmfd sp!, {r0-r2, r4, lr}
540 mov r3, #1 /* DST_IS_USER */
541 ldr r4, .L_arm_memcpy
545 ldmfd sp!, {r0-r2, r4, lr}
550 stmfd sp!, {r10-r11, lr}
556 adr ip, .Lcopyout_fault
557 ldr r11, [r10, #PCB_ONFAULT]
558 str ip, [r10, #PCB_ONFAULT]
560 str r11, [r10, #PCB_ONFAULT]
562 ldmfd sp!, {r10-r11, pc}
566 str r11, [r10, #PCB_ONFAULT]
568 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
569 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
570 ldmfd sp!, {r10-r11, pc}
574 /* Word-align the destination buffer */
575 ands ip, r1, #0x03 /* Already word aligned? */
576 beq .Lcopyout_wordaligned /* Yup */
578 cmp r2, ip /* Enough bytes left to align it? */
579 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
582 addne pc, pc, ip, lsl #3
585 strbt ip, [r1], #0x01
587 strbt ip, [r1], #0x01
589 strbt ip, [r1], #0x01
590 cmp r2, #0x00 /* All done? */
593 /* Destination buffer is now word aligned */
594 .Lcopyout_wordaligned:
595 ands ip, r0, #0x03 /* Is src also word-aligned? */
596 bne .Lcopyout_bad_align /* Nope. Things just got bad */
597 cmp r2, #0x08 /* Less than 8 bytes remaining? */
598 blt .Lcopyout_w_less_than8
600 /* Quad-align the destination buffer */
601 tst r0, #0x07 /* Already quad aligned? */
602 ldrne ip, [r0], #0x04
604 strtne ip, [r1], #0x04
606 stmfd sp!, {r4-r9} /* Free up some registers */
607 mov r3, #-1 /* Signal restore r4-r9 */
609 /* Destination buffer word aligned, source is quad aligned */
611 blt .Lcopyout_w_lessthan128
613 /* Copy 128 bytes at a time */
615 ldrd r4, [r0], #0x08 /* LD:00-07 */
616 pld [r0, #0x18] /* Prefetch 0x20 */
617 ldrd r6, [r0], #0x08 /* LD:08-0f */
618 ldrd r8, [r0], #0x08 /* LD:10-17 */
619 strt r4, [r1], #0x04 /* ST:00-03 */
620 strt r5, [r1], #0x04 /* ST:04-07 */
621 ldrd r4, [r0], #0x08 /* LD:18-1f */
622 strt r6, [r1], #0x04 /* ST:08-0b */
623 strt r7, [r1], #0x04 /* ST:0c-0f */
624 ldrd r6, [r0], #0x08 /* LD:20-27 */
625 pld [r0, #0x18] /* Prefetch 0x40 */
626 strt r8, [r1], #0x04 /* ST:10-13 */
627 strt r9, [r1], #0x04 /* ST:14-17 */
628 ldrd r8, [r0], #0x08 /* LD:28-2f */
629 strt r4, [r1], #0x04 /* ST:18-1b */
630 strt r5, [r1], #0x04 /* ST:1c-1f */
631 ldrd r4, [r0], #0x08 /* LD:30-37 */
632 strt r6, [r1], #0x04 /* ST:20-23 */
633 strt r7, [r1], #0x04 /* ST:24-27 */
634 ldrd r6, [r0], #0x08 /* LD:38-3f */
635 strt r8, [r1], #0x04 /* ST:28-2b */
636 strt r9, [r1], #0x04 /* ST:2c-2f */
637 ldrd r8, [r0], #0x08 /* LD:40-47 */
638 pld [r0, #0x18] /* Prefetch 0x60 */
639 strt r4, [r1], #0x04 /* ST:30-33 */
640 strt r5, [r1], #0x04 /* ST:34-37 */
641 ldrd r4, [r0], #0x08 /* LD:48-4f */
642 strt r6, [r1], #0x04 /* ST:38-3b */
643 strt r7, [r1], #0x04 /* ST:3c-3f */
644 ldrd r6, [r0], #0x08 /* LD:50-57 */
645 strt r8, [r1], #0x04 /* ST:40-43 */
646 strt r9, [r1], #0x04 /* ST:44-47 */
647 ldrd r8, [r0], #0x08 /* LD:58-4f */
648 strt r4, [r1], #0x04 /* ST:48-4b */
649 strt r5, [r1], #0x04 /* ST:4c-4f */
650 ldrd r4, [r0], #0x08 /* LD:60-67 */
651 pld [r0, #0x18] /* Prefetch 0x80 */
652 strt r6, [r1], #0x04 /* ST:50-53 */
653 strt r7, [r1], #0x04 /* ST:54-57 */
654 ldrd r6, [r0], #0x08 /* LD:68-6f */
655 strt r8, [r1], #0x04 /* ST:58-5b */
656 strt r9, [r1], #0x04 /* ST:5c-5f */
657 ldrd r8, [r0], #0x08 /* LD:70-77 */
658 strt r4, [r1], #0x04 /* ST:60-63 */
659 strt r5, [r1], #0x04 /* ST:64-67 */
660 ldrd r4, [r0], #0x08 /* LD:78-7f */
661 strt r6, [r1], #0x04 /* ST:68-6b */
662 strt r7, [r1], #0x04 /* ST:6c-6f */
663 strt r8, [r1], #0x04 /* ST:70-73 */
664 strt r9, [r1], #0x04 /* ST:74-77 */
666 strt r4, [r1], #0x04 /* ST:78-7b */
667 strt r5, [r1], #0x04 /* ST:7c-7f */
668 bge .Lcopyout_w_loop128
670 .Lcopyout_w_lessthan128:
671 adds r2, r2, #0x80 /* Adjust for extra sub */
673 RETeq /* Return now if done */
675 blt .Lcopyout_w_lessthan32
677 /* Copy 32 bytes at a time */
693 bge .Lcopyout_w_loop32
695 .Lcopyout_w_lessthan32:
696 adds r2, r2, #0x20 /* Adjust for extra sub */
698 RETeq /* Return now if done */
703 add pc, pc, r5, lsl #1
706 /* At least 24 bytes remaining */
712 /* At least 16 bytes remaining */
718 /* At least 8 bytes remaining */
724 /* Less than 8 bytes remaining */
726 RETeq /* Return now if done */
729 .Lcopyout_w_less_than8:
731 ldrge ip, [r0], #0x04
732 strtge ip, [r1], #0x04
733 RETeq /* Return now if done */
737 ldrbge r2, [r0], #0x01
738 strbt ip, [r1], #0x01
740 strbtge r2, [r1], #0x01
745 * At this point, it has not been possible to word align both buffers.
746 * The destination buffer (r1) is word aligned, but the source buffer
759 .Lcopyout_bad1_loop16:
771 orr r4, r4, r5, lsr #24
773 orr r5, r5, r6, lsr #24
775 orr r6, r6, r7, lsr #24
777 orr r7, r7, ip, lsr #24
779 orr r4, r4, r5, lsl #24
781 orr r5, r5, r6, lsl #24
783 orr r6, r6, r7, lsl #24
785 orr r7, r7, ip, lsl #24
793 bge .Lcopyout_bad1_loop16
797 RETeq /* Return now if done */
802 .Lcopyout_bad1_loop4:
811 orr r4, r4, ip, lsr #24
813 orr r4, r4, ip, lsl #24
816 bge .Lcopyout_bad1_loop4
820 .Lcopyout_bad2_loop16:
832 orr r4, r4, r5, lsr #16
834 orr r5, r5, r6, lsr #16
836 orr r6, r6, r7, lsr #16
838 orr r7, r7, ip, lsr #16
840 orr r4, r4, r5, lsl #16
842 orr r5, r5, r6, lsl #16
844 orr r6, r6, r7, lsl #16
846 orr r7, r7, ip, lsl #16
854 bge .Lcopyout_bad2_loop16
858 RETeq /* Return now if done */
863 .Lcopyout_bad2_loop4:
872 orr r4, r4, ip, lsr #16
874 orr r4, r4, ip, lsl #16
877 bge .Lcopyout_bad2_loop4
881 .Lcopyout_bad3_loop16:
893 orr r4, r4, r5, lsr #8
895 orr r5, r5, r6, lsr #8
897 orr r6, r6, r7, lsr #8
899 orr r7, r7, ip, lsr #8
901 orr r4, r4, r5, lsl #8
903 orr r5, r5, r6, lsl #8
905 orr r6, r6, r7, lsl #8
907 orr r7, r7, ip, lsl #8
915 bge .Lcopyout_bad3_loop16
919 RETeq /* Return now if done */
924 .Lcopyout_bad3_loop4:
933 orr r4, r4, ip, lsr #8
935 orr r4, r4, ip, lsl #8
938 bge .Lcopyout_bad3_loop4
948 addne pc, pc, r2, lsl #3
951 strbt ip, [r1], #0x01
953 strbt ip, [r1], #0x01