1 /* $NetBSD: bcopyinout_xscale.S,v 1.3 2003/12/15 09:27:18 scw Exp $ */
4 * Copyright 2003 Wasabi Systems, Inc.
7 * Written by Steve C. Woodford for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
38 #include <machine/asm.h>
39 __FBSDID("$FreeBSD$");
45 #define GET_PCB(tmp) \
46 mrc p15, 0, tmp, c13, c0, 4; \
47 add tmp, tmp, #(TD_PCB)
50 * r0 = user space address
51 * r1 = kernel space address
54 * Copies bytes from user space to kernel space
59 movle pc, lr /* Bail early if length is <= 0 */
65 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
70 stmfd sp!, {r10-r11, lr}
76 adr ip, .Lcopyin_fault
77 ldr r11, [r10, #PCB_ONFAULT]
78 str ip, [r10, #PCB_ONFAULT]
80 str r11, [r10, #PCB_ONFAULT]
82 ldmfd sp!, {r10-r11, pc}
86 str r11, [r10, #PCB_ONFAULT]
88 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
89 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
90 ldmfd sp!, {r10-r11, pc}
94 /* Word-align the destination buffer */
95 ands ip, r1, #0x03 /* Already word aligned? */
96 beq .Lcopyin_wordaligned /* Yup */
98 cmp r2, ip /* Enough bytes left to align it? */
99 blt .Lcopyin_l4_2 /* Nope. Just copy bytewise */
102 addne pc, pc, ip, lsl #3
104 ldrbt ip, [r0], #0x01
106 ldrbt ip, [r0], #0x01
108 ldrbt ip, [r0], #0x01
110 cmp r2, #0x00 /* All done? */
113 /* Destination buffer is now word aligned */
114 .Lcopyin_wordaligned:
115 ands ip, r0, #0x03 /* Is src also word-aligned? */
116 bne .Lcopyin_bad_align /* Nope. Things just got bad */
117 cmp r2, #0x08 /* Less than 8 bytes remaining? */
118 blt .Lcopyin_w_less_than8
120 /* Quad-align the destination buffer */
121 tst r1, #0x07 /* Already quad aligned? */
122 ldrtne ip, [r0], #0x04
123 strne ip, [r1], #0x04
125 stmfd sp!, {r4-r9} /* Free up some registers */
126 mov r3, #-1 /* Signal restore r4-r9 */
128 /* Destination buffer quad aligned, source is word aligned */
130 blt .Lcopyin_w_lessthan128
132 /* Copy 128 bytes at a time */
134 ldrt r4, [r0], #0x04 /* LD:00-03 */
135 ldrt r5, [r0], #0x04 /* LD:04-07 */
136 pld [r0, #0x18] /* Prefetch 0x20 */
137 ldrt r6, [r0], #0x04 /* LD:08-0b */
138 ldrt r7, [r0], #0x04 /* LD:0c-0f */
139 ldrt r8, [r0], #0x04 /* LD:10-13 */
140 ldrt r9, [r0], #0x04 /* LD:14-17 */
141 strd r4, [r1], #0x08 /* ST:00-07 */
142 ldrt r4, [r0], #0x04 /* LD:18-1b */
143 ldrt r5, [r0], #0x04 /* LD:1c-1f */
144 strd r6, [r1], #0x08 /* ST:08-0f */
145 ldrt r6, [r0], #0x04 /* LD:20-23 */
146 ldrt r7, [r0], #0x04 /* LD:24-27 */
147 pld [r0, #0x18] /* Prefetch 0x40 */
148 strd r8, [r1], #0x08 /* ST:10-17 */
149 ldrt r8, [r0], #0x04 /* LD:28-2b */
150 ldrt r9, [r0], #0x04 /* LD:2c-2f */
151 strd r4, [r1], #0x08 /* ST:18-1f */
152 ldrt r4, [r0], #0x04 /* LD:30-33 */
153 ldrt r5, [r0], #0x04 /* LD:34-37 */
154 strd r6, [r1], #0x08 /* ST:20-27 */
155 ldrt r6, [r0], #0x04 /* LD:38-3b */
156 ldrt r7, [r0], #0x04 /* LD:3c-3f */
157 strd r8, [r1], #0x08 /* ST:28-2f */
158 ldrt r8, [r0], #0x04 /* LD:40-43 */
159 ldrt r9, [r0], #0x04 /* LD:44-47 */
160 pld [r0, #0x18] /* Prefetch 0x60 */
161 strd r4, [r1], #0x08 /* ST:30-37 */
162 ldrt r4, [r0], #0x04 /* LD:48-4b */
163 ldrt r5, [r0], #0x04 /* LD:4c-4f */
164 strd r6, [r1], #0x08 /* ST:38-3f */
165 ldrt r6, [r0], #0x04 /* LD:50-53 */
166 ldrt r7, [r0], #0x04 /* LD:54-57 */
167 strd r8, [r1], #0x08 /* ST:40-47 */
168 ldrt r8, [r0], #0x04 /* LD:58-5b */
169 ldrt r9, [r0], #0x04 /* LD:5c-5f */
170 strd r4, [r1], #0x08 /* ST:48-4f */
171 ldrt r4, [r0], #0x04 /* LD:60-63 */
172 ldrt r5, [r0], #0x04 /* LD:64-67 */
173 pld [r0, #0x18] /* Prefetch 0x80 */
174 strd r6, [r1], #0x08 /* ST:50-57 */
175 ldrt r6, [r0], #0x04 /* LD:68-6b */
176 ldrt r7, [r0], #0x04 /* LD:6c-6f */
177 strd r8, [r1], #0x08 /* ST:58-5f */
178 ldrt r8, [r0], #0x04 /* LD:70-73 */
179 ldrt r9, [r0], #0x04 /* LD:74-77 */
180 strd r4, [r1], #0x08 /* ST:60-67 */
181 ldrt r4, [r0], #0x04 /* LD:78-7b */
182 ldrt r5, [r0], #0x04 /* LD:7c-7f */
183 strd r6, [r1], #0x08 /* ST:68-6f */
184 strd r8, [r1], #0x08 /* ST:70-77 */
186 strd r4, [r1], #0x08 /* ST:78-7f */
187 bge .Lcopyin_w_loop128
189 .Lcopyin_w_lessthan128:
190 adds r2, r2, #0x80 /* Adjust for extra sub */
194 blt .Lcopyin_w_lessthan32
196 /* Copy 32 bytes at a time */
212 bge .Lcopyin_w_loop32
214 .Lcopyin_w_lessthan32:
215 adds r2, r2, #0x20 /* Adjust for extra sub */
217 RETeq /* Return now if done */
222 add pc, pc, r5, lsl #1
225 /* At least 24 bytes remaining */
231 /* At least 16 bytes remaining */
237 /* At least 8 bytes remaining */
243 /* Less than 8 bytes remaining */
245 RETeq /* Return now if done */
248 .Lcopyin_w_less_than8:
250 ldrtge ip, [r0], #0x04
251 strge ip, [r1], #0x04
252 RETeq /* Return now if done */
254 ldrbt ip, [r0], #0x01
256 ldrbtge r2, [r0], #0x01
259 strbge r2, [r1], #0x01
264 * At this point, it has not been possible to word align both buffers.
265 * The destination buffer (r1) is word aligned, but the source buffer
278 .Lcopyin_bad1_loop16:
285 orr r4, r4, r5, lsl #24
287 orr r5, r5, r6, lsl #24
289 orr r6, r6, r7, lsl #24
291 orr r7, r7, ip, lsl #24
298 bge .Lcopyin_bad1_loop16
302 RETeq /* Return now if done */
311 orr r4, r4, ip, lsl #24
313 bge .Lcopyin_bad1_loop4
317 .Lcopyin_bad2_loop16:
324 orr r4, r4, r5, lsl #16
326 orr r5, r5, r6, lsl #16
328 orr r6, r6, r7, lsl #16
330 orr r7, r7, ip, lsl #16
337 bge .Lcopyin_bad2_loop16
341 RETeq /* Return now if done */
350 orr r4, r4, ip, lsl #16
352 bge .Lcopyin_bad2_loop4
356 .Lcopyin_bad3_loop16:
363 orr r4, r4, r5, lsl #8
365 orr r5, r5, r6, lsl #8
367 orr r6, r6, r7, lsl #8
369 orr r7, r7, ip, lsl #8
376 bge .Lcopyin_bad3_loop16
380 RETeq /* Return now if done */
389 orr r4, r4, ip, lsl #8
391 bge .Lcopyin_bad3_loop4
401 addne pc, pc, r2, lsl #3
403 ldrbt ip, [r0], #0x01
405 ldrbt ip, [r0], #0x01
413 * r0 = kernel space address
414 * r1 = user space address
417 * Copies bytes from kernel space to user space
422 movle pc, lr /* Bail early if length is <= 0 */
428 ldr r12, =(VM_MAXUSER_ADDRESS + 1)
433 stmfd sp!, {r10-r11, lr}
439 adr ip, .Lcopyout_fault
440 ldr r11, [r10, #PCB_ONFAULT]
441 str ip, [r10, #PCB_ONFAULT]
443 str r11, [r10, #PCB_ONFAULT]
445 ldmfd sp!, {r10-r11, pc}
449 str r11, [r10, #PCB_ONFAULT]
451 ldmfdgt sp!, {r4-r7} /* r3 > 0 Restore r4-r7 */
452 ldmfdlt sp!, {r4-r9} /* r3 < 0 Restore r4-r9 */
453 ldmfd sp!, {r10-r11, pc}
457 /* Word-align the destination buffer */
458 ands ip, r1, #0x03 /* Already word aligned? */
459 beq .Lcopyout_wordaligned /* Yup */
461 cmp r2, ip /* Enough bytes left to align it? */
462 blt .Lcopyout_l4_2 /* Nope. Just copy bytewise */
465 addne pc, pc, ip, lsl #3
468 strbt ip, [r1], #0x01
470 strbt ip, [r1], #0x01
472 strbt ip, [r1], #0x01
473 cmp r2, #0x00 /* All done? */
476 /* Destination buffer is now word aligned */
477 .Lcopyout_wordaligned:
478 ands ip, r0, #0x03 /* Is src also word-aligned? */
479 bne .Lcopyout_bad_align /* Nope. Things just got bad */
480 cmp r2, #0x08 /* Less than 8 bytes remaining? */
481 blt .Lcopyout_w_less_than8
483 /* Quad-align the destination buffer */
484 tst r0, #0x07 /* Already quad aligned? */
485 ldrne ip, [r0], #0x04
487 strtne ip, [r1], #0x04
489 stmfd sp!, {r4-r9} /* Free up some registers */
490 mov r3, #-1 /* Signal restore r4-r9 */
492 /* Destination buffer word aligned, source is quad aligned */
494 blt .Lcopyout_w_lessthan128
496 /* Copy 128 bytes at a time */
498 ldrd r4, [r0], #0x08 /* LD:00-07 */
499 pld [r0, #0x18] /* Prefetch 0x20 */
500 ldrd r6, [r0], #0x08 /* LD:08-0f */
501 ldrd r8, [r0], #0x08 /* LD:10-17 */
502 strt r4, [r1], #0x04 /* ST:00-03 */
503 strt r5, [r1], #0x04 /* ST:04-07 */
504 ldrd r4, [r0], #0x08 /* LD:18-1f */
505 strt r6, [r1], #0x04 /* ST:08-0b */
506 strt r7, [r1], #0x04 /* ST:0c-0f */
507 ldrd r6, [r0], #0x08 /* LD:20-27 */
508 pld [r0, #0x18] /* Prefetch 0x40 */
509 strt r8, [r1], #0x04 /* ST:10-13 */
510 strt r9, [r1], #0x04 /* ST:14-17 */
511 ldrd r8, [r0], #0x08 /* LD:28-2f */
512 strt r4, [r1], #0x04 /* ST:18-1b */
513 strt r5, [r1], #0x04 /* ST:1c-1f */
514 ldrd r4, [r0], #0x08 /* LD:30-37 */
515 strt r6, [r1], #0x04 /* ST:20-23 */
516 strt r7, [r1], #0x04 /* ST:24-27 */
517 ldrd r6, [r0], #0x08 /* LD:38-3f */
518 strt r8, [r1], #0x04 /* ST:28-2b */
519 strt r9, [r1], #0x04 /* ST:2c-2f */
520 ldrd r8, [r0], #0x08 /* LD:40-47 */
521 pld [r0, #0x18] /* Prefetch 0x60 */
522 strt r4, [r1], #0x04 /* ST:30-33 */
523 strt r5, [r1], #0x04 /* ST:34-37 */
524 ldrd r4, [r0], #0x08 /* LD:48-4f */
525 strt r6, [r1], #0x04 /* ST:38-3b */
526 strt r7, [r1], #0x04 /* ST:3c-3f */
527 ldrd r6, [r0], #0x08 /* LD:50-57 */
528 strt r8, [r1], #0x04 /* ST:40-43 */
529 strt r9, [r1], #0x04 /* ST:44-47 */
530 ldrd r8, [r0], #0x08 /* LD:58-4f */
531 strt r4, [r1], #0x04 /* ST:48-4b */
532 strt r5, [r1], #0x04 /* ST:4c-4f */
533 ldrd r4, [r0], #0x08 /* LD:60-67 */
534 pld [r0, #0x18] /* Prefetch 0x80 */
535 strt r6, [r1], #0x04 /* ST:50-53 */
536 strt r7, [r1], #0x04 /* ST:54-57 */
537 ldrd r6, [r0], #0x08 /* LD:68-6f */
538 strt r8, [r1], #0x04 /* ST:58-5b */
539 strt r9, [r1], #0x04 /* ST:5c-5f */
540 ldrd r8, [r0], #0x08 /* LD:70-77 */
541 strt r4, [r1], #0x04 /* ST:60-63 */
542 strt r5, [r1], #0x04 /* ST:64-67 */
543 ldrd r4, [r0], #0x08 /* LD:78-7f */
544 strt r6, [r1], #0x04 /* ST:68-6b */
545 strt r7, [r1], #0x04 /* ST:6c-6f */
546 strt r8, [r1], #0x04 /* ST:70-73 */
547 strt r9, [r1], #0x04 /* ST:74-77 */
549 strt r4, [r1], #0x04 /* ST:78-7b */
550 strt r5, [r1], #0x04 /* ST:7c-7f */
551 bge .Lcopyout_w_loop128
553 .Lcopyout_w_lessthan128:
554 adds r2, r2, #0x80 /* Adjust for extra sub */
556 RETeq /* Return now if done */
558 blt .Lcopyout_w_lessthan32
560 /* Copy 32 bytes at a time */
576 bge .Lcopyout_w_loop32
578 .Lcopyout_w_lessthan32:
579 adds r2, r2, #0x20 /* Adjust for extra sub */
581 RETeq /* Return now if done */
586 add pc, pc, r5, lsl #1
589 /* At least 24 bytes remaining */
595 /* At least 16 bytes remaining */
601 /* At least 8 bytes remaining */
607 /* Less than 8 bytes remaining */
609 RETeq /* Return now if done */
612 .Lcopyout_w_less_than8:
614 ldrge ip, [r0], #0x04
615 strtge ip, [r1], #0x04
616 RETeq /* Return now if done */
620 ldrbge r2, [r0], #0x01
621 strbt ip, [r1], #0x01
623 strbtge r2, [r1], #0x01
628 * At this point, it has not been possible to word align both buffers.
629 * The destination buffer (r1) is word aligned, but the source buffer
642 .Lcopyout_bad1_loop16:
649 orr r4, r4, r5, lsl #24
651 orr r5, r5, r6, lsl #24
653 orr r6, r6, r7, lsl #24
655 orr r7, r7, ip, lsl #24
662 bge .Lcopyout_bad1_loop16
666 RETeq /* Return now if done */
671 .Lcopyout_bad1_loop4:
675 orr r4, r4, ip, lsl #24
677 bge .Lcopyout_bad1_loop4
681 .Lcopyout_bad2_loop16:
688 orr r4, r4, r5, lsl #16
690 orr r5, r5, r6, lsl #16
692 orr r6, r6, r7, lsl #16
694 orr r7, r7, ip, lsl #16
701 bge .Lcopyout_bad2_loop16
705 RETeq /* Return now if done */
710 .Lcopyout_bad2_loop4:
714 orr r4, r4, ip, lsl #16
716 bge .Lcopyout_bad2_loop4
720 .Lcopyout_bad3_loop16:
727 orr r4, r4, r5, lsl #8
729 orr r5, r5, r6, lsl #8
731 orr r6, r6, r7, lsl #8
733 orr r7, r7, ip, lsl #8
740 bge .Lcopyout_bad3_loop16
744 RETeq /* Return now if done */
749 .Lcopyout_bad3_loop4:
753 orr r4, r4, ip, lsl #8
755 bge .Lcopyout_bad3_loop4
765 addne pc, pc, r2, lsl #3
768 strbt ip, [r1], #0x01
770 strbt ip, [r1], #0x01