1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
43 #define GET_PCB(tmp) \
44 mrc p15, 0, tmp, c13, c0, 4; \
45 add tmp, tmp, #(TD_PCB)
48 * r0 = user space address
49 * r1 = kernel space address
52 * Copies bytes from user space to kernel space
57 movle pc, lr /* Bail early if length is <= 0 */
63 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
68 stmfd sp!, {r10-r11, lr}
74 adr ip, .Lcopyin_fault
75 ldr r11, [r10, #PCB_ONFAULT]
76 str ip, [r10, #PCB_ONFAULT]
78 str r11, [r10, #PCB_ONFAULT]
80 ldmfd sp!, {r10-r11, pc}
84 str r11, [r10, #PCB_ONFAULT]
86 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
87 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
88 ldmfd sp!, {r10-r11, pc}
92 /* Word-align the destination buffer */
93 ands ip, r1, #0x03 /* Already word aligned? */
94 beq .Lcopyin_wordaligned /* Yup */
96 cmp r2, ip /* Enough bytes left to align it? */
97 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
100 addne pc, pc, ip, lsl #3
102 ldrbt ip, [r0], #0x01
104 ldrbt ip, [r0], #0x01
106 ldrbt ip, [r0], #0x01
108 cmp r2, #0x00 /* All done? */
111 /* Destination buffer is now word aligned */
112 .Lcopyin_wordaligned:
113 ands ip, r0, #0x03 /* Is src also word-aligned? */
114 bne .Lcopyin_bad_align /* Nope. Things just got bad */
115 cmp r2, #0x08 /* Less than 8 bytes remaining? */
116 blt .Lcopyin_w_less_than8
118 /* Quad-align the destination buffer */
119 tst r1, #0x07 /* Already quad aligned? */
120 ldrtne ip, [r0], #0x04
121 strne ip, [r1], #0x04
123 stmfd sp!, {r4-r9} /* Free up some registers */
124 mov r3, #-1 /* Signal restore r4-r9 */
126 /* Destination buffer quad aligned, source is word aligned */
128 blt .Lcopyin_w_lessthan128
130 /* Copy 128 bytes at a time */
132 ldrt r4, [r0], #0x04 /* LD:00-03 */
133 ldrt r5, [r0], #0x04 /* LD:04-07 */
134 pld [r0, #0x18] /* Prefetch 0x20 */
135 ldrt r6, [r0], #0x04 /* LD:08-0b */
136 ldrt r7, [r0], #0x04 /* LD:0c-0f */
137 ldrt r8, [r0], #0x04 /* LD:10-13 */
138 ldrt r9, [r0], #0x04 /* LD:14-17 */
139 strd r4, [r1], #0x08 /* ST:00-07 */
140 ldrt r4, [r0], #0x04 /* LD:18-1b */
141 ldrt r5, [r0], #0x04 /* LD:1c-1f */
142 strd r6, [r1], #0x08 /* ST:08-0f */
143 ldrt r6, [r0], #0x04 /* LD:20-23 */
144 ldrt r7, [r0], #0x04 /* LD:24-27 */
145 pld [r0, #0x18] /* Prefetch 0x40 */
146 strd r8, [r1], #0x08 /* ST:10-17 */
147 ldrt r8, [r0], #0x04 /* LD:28-2b */
148 ldrt r9, [r0], #0x04 /* LD:2c-2f */
149 strd r4, [r1], #0x08 /* ST:18-1f */
150 ldrt r4, [r0], #0x04 /* LD:30-33 */
151 ldrt r5, [r0], #0x04 /* LD:34-37 */
152 strd r6, [r1], #0x08 /* ST:20-27 */
153 ldrt r6, [r0], #0x04 /* LD:38-3b */
154 ldrt r7, [r0], #0x04 /* LD:3c-3f */
155 strd r8, [r1], #0x08 /* ST:28-2f */
156 ldrt r8, [r0], #0x04 /* LD:40-43 */
157 ldrt r9, [r0], #0x04 /* LD:44-47 */
158 pld [r0, #0x18] /* Prefetch 0x60 */
159 strd r4, [r1], #0x08 /* ST:30-37 */
160 ldrt r4, [r0], #0x04 /* LD:48-4b */
161 ldrt r5, [r0], #0x04 /* LD:4c-4f */
162 strd r6, [r1], #0x08 /* ST:38-3f */
163 ldrt r6, [r0], #0x04 /* LD:50-53 */
164 ldrt r7, [r0], #0x04 /* LD:54-57 */
165 strd r8, [r1], #0x08 /* ST:40-47 */
166 ldrt r8, [r0], #0x04 /* LD:58-5b */
167 ldrt r9, [r0], #0x04 /* LD:5c-5f */
168 strd r4, [r1], #0x08 /* ST:48-4f */
169 ldrt r4, [r0], #0x04 /* LD:60-63 */
170 ldrt r5, [r0], #0x04 /* LD:64-67 */
171 pld [r0, #0x18] /* Prefetch 0x80 */
172 strd r6, [r1], #0x08 /* ST:50-57 */
173 ldrt r6, [r0], #0x04 /* LD:68-6b */
174 ldrt r7, [r0], #0x04 /* LD:6c-6f */
175 strd r8, [r1], #0x08 /* ST:58-5f */
176 ldrt r8, [r0], #0x04 /* LD:70-73 */
177 ldrt r9, [r0], #0x04 /* LD:74-77 */
178 strd r4, [r1], #0x08 /* ST:60-67 */
179 ldrt r4, [r0], #0x04 /* LD:78-7b */
180 ldrt r5, [r0], #0x04 /* LD:7c-7f */
181 strd r6, [r1], #0x08 /* ST:68-6f */
182 strd r8, [r1], #0x08 /* ST:70-77 */
184 strd r4, [r1], #0x08 /* ST:78-7f */
185 bge .Lcopyin_w_loop128
187 .Lcopyin_w_lessthan128:
188 adds r2, r2, #0x80 /* Adjust for extra sub */
192 blt .Lcopyin_w_lessthan32
194 /* Copy 32 bytes at a time */
210 bge .Lcopyin_w_loop32
212 .Lcopyin_w_lessthan32:
213 adds r2, r2, #0x20 /* Adjust for extra sub */
215 RETeq /* Return now if done */
220 add pc, pc, r5, lsl #1
223 /* At least 24 bytes remaining */
229 /* At least 16 bytes remaining */
235 /* At least 8 bytes remaining */
241 /* Less than 8 bytes remaining */
243 RETeq /* Return now if done */
246 .Lcopyin_w_less_than8:
248 ldrtge ip, [r0], #0x04
249 strge ip, [r1], #0x04
250 RETeq /* Return now if done */
252 ldrbt ip, [r0], #0x01
254 ldrbtge r2, [r0], #0x01
257 strbge r2, [r1], #0x01
262 * At this point, it has not been possible to word align both buffers.
263 * The destination buffer (r1) is word aligned, but the source buffer
276 .Lcopyin_bad1_loop16:
283 orr r4, r4, r5, lsl #24
285 orr r5, r5, r6, lsl #24
287 orr r6, r6, r7, lsl #24
289 orr r7, r7, ip, lsl #24
296 bge .Lcopyin_bad1_loop16
300 RETeq /* Return now if done */
309 orr r4, r4, ip, lsl #24
311 bge .Lcopyin_bad1_loop4
315 .Lcopyin_bad2_loop16:
322 orr r4, r4, r5, lsl #16
324 orr r5, r5, r6, lsl #16
326 orr r6, r6, r7, lsl #16
328 orr r7, r7, ip, lsl #16
335 bge .Lcopyin_bad2_loop16
339 RETeq /* Return now if done */
348 orr r4, r4, ip, lsl #16
350 bge .Lcopyin_bad2_loop4
354 .Lcopyin_bad3_loop16:
361 orr r4, r4, r5, lsl #8
363 orr r5, r5, r6, lsl #8
365 orr r6, r6, r7, lsl #8
367 orr r7, r7, ip, lsl #8
374 bge .Lcopyin_bad3_loop16
378 RETeq /* Return now if done */
387 orr r4, r4, ip, lsl #8
389 bge .Lcopyin_bad3_loop4
399 addne pc, pc, r2, lsl #3
401 ldrbt ip, [r0], #0x01
403 ldrbt ip, [r0], #0x01
411 * r0 = kernel space address
412 * r1 = user space address
415 * Copies bytes from kernel space to user space
420 movle pc, lr /* Bail early if length is <= 0 */
426 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
431 stmfd sp!, {r10-r11, lr}
437 adr ip, .Lcopyout_fault
438 ldr r11, [r10, #PCB_ONFAULT]
439 str ip, [r10, #PCB_ONFAULT]
441 str r11, [r10, #PCB_ONFAULT]
443 ldmfd sp!, {r10-r11, pc}
447 str r11, [r10, #PCB_ONFAULT]
449 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
450 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
451 ldmfd sp!, {r10-r11, pc}
455 /* Word-align the destination buffer */
456 ands ip, r1, #0x03 /* Already word aligned? */
457 beq .Lcopyout_wordaligned /* Yup */
459 cmp r2, ip /* Enough bytes left to align it? */
460 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
463 addne pc, pc, ip, lsl #3
466 strbt ip, [r1], #0x01
468 strbt ip, [r1], #0x01
470 strbt ip, [r1], #0x01
471 cmp r2, #0x00 /* All done? */
474 /* Destination buffer is now word aligned */
475 .Lcopyout_wordaligned:
476 ands ip, r0, #0x03 /* Is src also word-aligned? */
477 bne .Lcopyout_bad_align /* Nope. Things just got bad */
478 cmp r2, #0x08 /* Less than 8 bytes remaining? */
479 blt .Lcopyout_w_less_than8
481 /* Quad-align the destination buffer */
482 tst r0, #0x07 /* Already quad aligned? */
483 ldrne ip, [r0], #0x04
485 strtne ip, [r1], #0x04
487 stmfd sp!, {r4-r9} /* Free up some registers */
488 mov r3, #-1 /* Signal restore r4-r9 */
490 /* Destination buffer word aligned, source is quad aligned */
492 blt .Lcopyout_w_lessthan128
494 /* Copy 128 bytes at a time */
496 ldrd r4, [r0], #0x08 /* LD:00-07 */
497 pld [r0, #0x18] /* Prefetch 0x20 */
498 ldrd r6, [r0], #0x08 /* LD:08-0f */
499 ldrd r8, [r0], #0x08 /* LD:10-17 */
500 strt r4, [r1], #0x04 /* ST:00-03 */
501 strt r5, [r1], #0x04 /* ST:04-07 */
502 ldrd r4, [r0], #0x08 /* LD:18-1f */
503 strt r6, [r1], #0x04 /* ST:08-0b */
504 strt r7, [r1], #0x04 /* ST:0c-0f */
505 ldrd r6, [r0], #0x08 /* LD:20-27 */
506 pld [r0, #0x18] /* Prefetch 0x40 */
507 strt r8, [r1], #0x04 /* ST:10-13 */
508 strt r9, [r1], #0x04 /* ST:14-17 */
509 ldrd r8, [r0], #0x08 /* LD:28-2f */
510 strt r4, [r1], #0x04 /* ST:18-1b */
511 strt r5, [r1], #0x04 /* ST:1c-1f */
512 ldrd r4, [r0], #0x08 /* LD:30-37 */
513 strt r6, [r1], #0x04 /* ST:20-23 */
514 strt r7, [r1], #0x04 /* ST:24-27 */
515 ldrd r6, [r0], #0x08 /* LD:38-3f */
516 strt r8, [r1], #0x04 /* ST:28-2b */
517 strt r9, [r1], #0x04 /* ST:2c-2f */
518 ldrd r8, [r0], #0x08 /* LD:40-47 */
519 pld [r0, #0x18] /* Prefetch 0x60 */
520 strt r4, [r1], #0x04 /* ST:30-33 */
521 strt r5, [r1], #0x04 /* ST:34-37 */
522 ldrd r4, [r0], #0x08 /* LD:48-4f */
523 strt r6, [r1], #0x04 /* ST:38-3b */
524 strt r7, [r1], #0x04 /* ST:3c-3f */
525 ldrd r6, [r0], #0x08 /* LD:50-57 */
526 strt r8, [r1], #0x04 /* ST:40-43 */
527 strt r9, [r1], #0x04 /* ST:44-47 */
528 ldrd r8, [r0], #0x08 /* LD:58-4f */
529 strt r4, [r1], #0x04 /* ST:48-4b */
530 strt r5, [r1], #0x04 /* ST:4c-4f */
531 ldrd r4, [r0], #0x08 /* LD:60-67 */
532 pld [r0, #0x18] /* Prefetch 0x80 */
533 strt r6, [r1], #0x04 /* ST:50-53 */
534 strt r7, [r1], #0x04 /* ST:54-57 */
535 ldrd r6, [r0], #0x08 /* LD:68-6f */
536 strt r8, [r1], #0x04 /* ST:58-5b */
537 strt r9, [r1], #0x04 /* ST:5c-5f */
538 ldrd r8, [r0], #0x08 /* LD:70-77 */
539 strt r4, [r1], #0x04 /* ST:60-63 */
540 strt r5, [r1], #0x04 /* ST:64-67 */
541 ldrd r4, [r0], #0x08 /* LD:78-7f */
542 strt r6, [r1], #0x04 /* ST:68-6b */
543 strt r7, [r1], #0x04 /* ST:6c-6f */
544 strt r8, [r1], #0x04 /* ST:70-73 */
545 strt r9, [r1], #0x04 /* ST:74-77 */
547 strt r4, [r1], #0x04 /* ST:78-7b */
548 strt r5, [r1], #0x04 /* ST:7c-7f */
549 bge .Lcopyout_w_loop128
551 .Lcopyout_w_lessthan128:
552 adds r2, r2, #0x80 /* Adjust for extra sub */
554 RETeq /* Return now if done */
556 blt .Lcopyout_w_lessthan32
558 /* Copy 32 bytes at a time */
574 bge .Lcopyout_w_loop32
576 .Lcopyout_w_lessthan32:
577 adds r2, r2, #0x20 /* Adjust for extra sub */
579 RETeq /* Return now if done */
584 add pc, pc, r5, lsl #1
587 /* At least 24 bytes remaining */
593 /* At least 16 bytes remaining */
599 /* At least 8 bytes remaining */
605 /* Less than 8 bytes remaining */
607 RETeq /* Return now if done */
610 .Lcopyout_w_less_than8:
612 ldrge ip, [r0], #0x04
613 strtge ip, [r1], #0x04
614 RETeq /* Return now if done */
618 ldrbge r2, [r0], #0x01
619 strbt ip, [r1], #0x01
621 strbtge r2, [r1], #0x01
626 * At this point, it has not been possible to word align both buffers.
627 * The destination buffer (r1) is word aligned, but the source buffer
640 .Lcopyout_bad1_loop16:
647 orr r4, r4, r5, lsl #24
649 orr r5, r5, r6, lsl #24
651 orr r6, r6, r7, lsl #24
653 orr r7, r7, ip, lsl #24
660 bge .Lcopyout_bad1_loop16
664 RETeq /* Return now if done */
669 .Lcopyout_bad1_loop4:
673 orr r4, r4, ip, lsl #24
675 bge .Lcopyout_bad1_loop4
679 .Lcopyout_bad2_loop16:
686 orr r4, r4, r5, lsl #16
688 orr r5, r5, r6, lsl #16
690 orr r6, r6, r7, lsl #16
692 orr r7, r7, ip, lsl #16
699 bge .Lcopyout_bad2_loop16
703 RETeq /* Return now if done */
708 .Lcopyout_bad2_loop4:
712 orr r4, r4, ip, lsl #16
714 bge .Lcopyout_bad2_loop4
718 .Lcopyout_bad3_loop16:
725 orr r4, r4, r5, lsl #8
727 orr r5, r5, r6, lsl #8
729 orr r6, r6, r7, lsl #8
731 orr r7, r7, ip, lsl #8
738 bge .Lcopyout_bad3_loop16
742 RETeq /* Return now if done */
747 .Lcopyout_bad3_loop4:
751 orr r4, r4, ip, lsl #8
753 bge .Lcopyout_bad3_loop4
763 addne pc, pc, r2, lsl #3
766 strbt ip, [r1], #0x01
768 strbt ip, [r1], #0x01