1 /* $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $ */
4 * Copyright (c) 2002 Wasabi Systems, Inc.
7 * Written by Allen Briggs for Wasabi Systems, Inc.
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 * 3. All advertising materials mentioning features or use of this software
18 * must display the following acknowledgement:
19 * This product includes software developed for the NetBSD Project by
20 * Wasabi Systems, Inc.
21 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22 * or promote products derived from this software without specific prior
25 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
29 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35 * POSSIBILITY OF SUCH DAMAGE.
41 #include <machine/asm.h>
44 .word _C_LABEL(_arm_memcpy)
46 .word _C_LABEL(_min_memcpy_size)
48 __FBSDID("$FreeBSD$");
50 #include <arm/arm/bcopyinout_xscale.S>
58 .word _C_LABEL(cpu_info)
61 .word _C_LABEL(__pcpu) + PC_CURPCB
64 #define SAVE_REGS stmfd sp!, {r4-r11}
65 #define RESTORE_REGS ldmfd sp!, {r4-r11}
67 #if defined(_ARM_ARCH_5E)
69 #define PREFETCH(rx,o) pld [ rx , HELLOCPP (o) ]
71 #define PREFETCH(rx,o)
75 * r0 = user space address
76 * r1 = kernel space address
79 * Copies bytes from user space to kernel space
81 * We save/restore r4-r11:
85 /* Quick exit if length is zero */
94 ldr r3, .L_min_memcpy_size
98 stmfd sp!, {r0-r2, r4, lr}
102 mov r3, #2 /* SRC_IS_USER */
103 ldr r4, .L_arm_memcpy
107 ldmfd sp!, {r0-r2, r4, lr}
113 #ifdef MULTIPROCESSOR
114 /* XXX Probably not appropriate for non-Hydra SMPs */
115 stmfd sp!, {r0-r2, r14}
116 bl _C_LABEL(cpu_number)
118 ldr r4, [r4, r0, lsl #2]
119 ldr r4, [r4, #CI_CURPCB]
120 ldmfd sp!, {r0-r2, r14}
126 ldr r5, [r4, #PCB_ONFAULT]
128 str r3, [r4, #PCB_ONFAULT]
134 * If not too many bytes, take the slow path.
140 * Align destination to word boundary.
143 ldr pc, [pc, r6, lsl #2]
149 .Lial3: ldrbt r6, [r0], #1
152 .Lial2: ldrbt r7, [r0], #1
155 .Lial1: ldrbt r6, [r0], #1
161 * If few bytes left, finish slow.
167 * If source is not aligned, finish slow.
172 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
176 * Align destination to cacheline boundary.
177 * If source and destination are nicely aligned, this can be a big
178 * win. If not, it's still cheaper to copy in groups of 32 even if
179 * we don't get the nice cacheline alignment.
192 .Lical28:ldrt r6, [r0], #4
195 .Lical24:ldrt r7, [r0], #4
198 .Lical20:ldrt r6, [r0], #4
201 .Lical16:ldrt r7, [r0], #4
204 .Lical12:ldrt r6, [r0], #4
207 .Lical8:ldrt r7, [r0], #4
210 .Lical4:ldrt r6, [r0], #4
215 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
216 * part of the code, and we may have knocked that down by as much
217 * as 0x1c getting aligned).
219 * This loop basically works out to:
221 * prefetch-next-cacheline(s)
224 * } while (bytes >= 0x40);
234 /* Copy a cacheline */
251 /* Copy a cacheline */
276 * If we're done, bail.
283 ldr pc, [pc, r6, lsl #2]
289 .Lic4: ldrbt r6, [r0], #1
292 .Lic3: ldrbt r7, [r0], #1
295 .Lic2: ldrbt r6, [r0], #1
298 .Lic1: ldrbt r7, [r0], #1
307 str r5, [r4, #PCB_ONFAULT]
313 mov r0, #14 /* EFAULT */
314 str r5, [r4, #PCB_ONFAULT]
320 * r0 = kernel space address
321 * r1 = user space address
324 * Copies bytes from kernel space to user space
326 * We save/restore r4-r11:
331 /* Quick exit if length is zero */
336 ldr r3, .L_arm_memcpy
340 ldr r3, .L_min_memcpy_size
344 stmfd sp!, {r0-r2, r4, lr}
348 mov r3, #1 /* DST_IS_USER */
349 ldr r4, .L_arm_memcpy
353 ldmfd sp!, {r0-r2, r4, lr}
359 #ifdef MULTIPROCESSOR
360 /* XXX Probably not appropriate for non-Hydra SMPs */
361 stmfd sp!, {r0-r2, r14}
362 bl _C_LABEL(cpu_number)
364 ldr r4, [r4, r0, lsl #2]
365 ldr r4, [r4, #CI_CURPCB]
366 ldmfd sp!, {r0-r2, r14}
372 ldr r5, [r4, #PCB_ONFAULT]
374 str r3, [r4, #PCB_ONFAULT]
380 * If not too many bytes, take the slow path.
386 * Align destination to word boundary.
389 ldr pc, [pc, r6, lsl #2]
395 .Lal3: ldrb r6, [r0], #1
398 .Lal2: ldrb r7, [r0], #1
401 .Lal1: ldrb r6, [r0], #1
407 * If few bytes left, finish slow.
413 * If source is not aligned, finish slow.
418 cmp r2, #0x60 /* Must be > 0x5f for unrolled cacheline */
422 * Align source & destination to cacheline boundary.
435 .Lcal28:ldr r6, [r0], #4
438 .Lcal24:ldr r7, [r0], #4
441 .Lcal20:ldr r6, [r0], #4
444 .Lcal16:ldr r7, [r0], #4
447 .Lcal12:ldr r6, [r0], #4
450 .Lcal8: ldr r7, [r0], #4
453 .Lcal4: ldr r6, [r0], #4
458 * We start with > 0x40 bytes to copy (>= 0x60 got us into this
459 * part of the code, and we may have knocked that down by as much
460 * as 0x1c getting aligned).
462 * This loop basically works out to:
464 * prefetch-next-cacheline(s)
467 * } while (bytes >= 0x40);
477 /* Copy a cacheline */
494 /* Copy a cacheline */
519 * If we're done, bail.
526 ldr pc, [pc, r6, lsl #2]
532 .Lc4: ldrb r6, [r0], #1
535 .Lc3: ldrb r7, [r0], #1
538 .Lc2: ldrb r6, [r0], #1
541 .Lc1: ldrb r7, [r0], #1
550 str r5, [r4, #PCB_ONFAULT]
557 * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
559 * Copies a single 8-bit value from src to dest, returning 0 on success,
560 * else EFAULT if a page fault occurred.
562 ENTRY(badaddr_read_1)
563 #ifdef MULTIPROCESSOR
564 /* XXX Probably not appropriate for non-Hydra SMPs */
565 stmfd sp!, {r0-r1, r14}
566 bl _C_LABEL(cpu_number)
568 ldr r2, [r2, r0, lsl #2]
569 ldr r2, [r2, #CI_CURPCB]
570 ldmfd sp!, {r0-r1, r14}
575 ldr ip, [r2, #PCB_ONFAULT]
577 str r3, [r2, #PCB_ONFAULT]
586 mov r0, #0 /* No fault */
587 1: str ip, [r2, #PCB_ONFAULT]
591 * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
593 * Copies a single 16-bit value from src to dest, returning 0 on success,
594 * else EFAULT if a page fault occurred.
596 ENTRY(badaddr_read_2)
597 #ifdef MULTIPROCESSOR
598 /* XXX Probably not appropriate for non-Hydra SMPs */
599 stmfd sp!, {r0-r1, r14}
600 bl _C_LABEL(cpu_number)
602 ldr r2, [r2, r0, lsl #2]
603 ldr r2, [r2, #CI_CURPCB]
604 ldmfd sp!, {r0-r1, r14}
609 ldr ip, [r2, #PCB_ONFAULT]
611 str r3, [r2, #PCB_ONFAULT]
620 mov r0, #0 /* No fault */
621 1: str ip, [r2, #PCB_ONFAULT]
625 * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
627 * Copies a single 32-bit value from src to dest, returning 0 on success,
628 * else EFAULT if a page fault occurred.
630 ENTRY(badaddr_read_4)
631 #ifdef MULTIPROCESSOR
632 /* XXX Probably not appropriate for non-Hydra SMPs */
633 stmfd sp!, {r0-r1, r14}
634 bl _C_LABEL(cpu_number)
636 ldr r2, [r2, r0, lsl #2]
637 ldr r2, [r2, #CI_CURPCB]
638 ldmfd sp!, {r0-r1, r14}
643 ldr ip, [r2, #PCB_ONFAULT]
645 str r3, [r2, #PCB_ONFAULT]
654 mov r0, #0 /* No fault */
655 1: str ip, [r2, #PCB_ONFAULT]