sys/arm/arm/bcopyinout.S

   1 /*      $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $      */
   2
   3 /*-
   4  * Copyright (c) 2002 Wasabi Systems, Inc.
   5  * All rights reserved.
   6  *
   7  * Written by Allen Briggs for Wasabi Systems, Inc.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. All advertising materials mentioning features or use of this software
  18  *    must display the following acknowledgement:
  19  *      This product includes software developed for the NetBSD Project by
  20  *      Wasabi Systems, Inc.
  21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  22  *    or promote products derived from this software without specific prior
  23  *    written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35  * POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38
  39 #include "assym.inc"
  40
  41 #include <machine/asm.h>
  42 #include <sys/errno.h>
  43
  44 .L_arm_memcpy:
  45         .word   _C_LABEL(_arm_memcpy)
  46 .L_min_memcpy_size:
  47         .word   _C_LABEL(_min_memcpy_size)
  48
  49 __FBSDID("$FreeBSD$");
  50 #ifdef _ARM_ARCH_5E
  51 #include <arm/arm/bcopyinout_xscale.S>
  52 #else
  53
  54         .text
  55         .align  2
  56
  57 #define GET_PCB(tmp) \
  58         mrc p15, 0, tmp, c13, c0, 4; \
  59         add     tmp, tmp, #(TD_PCB)
  60
  61 #define SAVE_REGS       stmfd   sp!, {r4-r11}; _SAVE({r4-r11})
  62 #define RESTORE_REGS    ldmfd   sp!, {r4-r11}
  63
  64 #if defined(_ARM_ARCH_5E)
  65 #define HELLOCPP #
  66 #define PREFETCH(rx,o)  pld     [ rx , HELLOCPP (o) ]
  67 #else
  68 #define PREFETCH(rx,o)
  69 #endif
  70
  71 /*
  72  * r0 = user space address
  73  * r1 = kernel space address
  74  * r2 = length
  75  *
  76  * Copies bytes from user space to kernel space
  77  *
  78  * We save/restore r4-r11:
  79  * r4-r11 are scratch
  80  */
  81 ENTRY(copyin)
  82         /* Quick exit if length is zero */
  83         teq     r2, #0
  84         moveq   r0, #0
  85         RETeq
  86
  87         adds    r3, r0, r2
  88         movcs   r0, #EFAULT
  89         RETc(cs)
  90
  91         ldr     r12, =(VM_MAXUSER_ADDRESS + 1)
  92         cmp     r3, r12
  93         movcs   r0, #EFAULT
  94         RETc(cs)
  95
  96         ldr     r3, .L_arm_memcpy
  97         ldr     r3, [r3]
  98         cmp     r3, #0
  99         beq     .Lnormal
 100         ldr     r3, .L_min_memcpy_size
 101         ldr     r3, [r3]
 102         cmp     r2, r3
 103         blt     .Lnormal
 104         stmfd   sp!, {r0-r2, r4, lr}
 105         mov     r3, r0
 106         mov     r0, r1
 107         mov     r1, r3
 108         mov     r3, #2 /* SRC_IS_USER */
 109         ldr     r4, .L_arm_memcpy
 110         mov     lr, pc
 111         ldr     pc, [r4]
 112         cmp     r0, #0
 113         ldmfd   sp!, {r0-r2, r4, lr}
 114         moveq   r0, #0
 115         RETeq
 116
 117 .Lnormal:
 118         SAVE_REGS
 119         GET_PCB(r4)
 120         ldr     r4, [r4]
 121
 122
 123         ldr     r5, [r4, #PCB_ONFAULT]
 124         adr     r3, .Lcopyfault
 125         str     r3, [r4, #PCB_ONFAULT]
 126
 127         PREFETCH(r0, 0)
 128         PREFETCH(r1, 0)
 129
 130         /*
 131          * If not too many bytes, take the slow path.
 132          */
 133         cmp     r2, #0x08
 134         blt     .Licleanup
 135
 136         /*
 137          * Align destination to word boundary.
 138          */
 139         and     r6, r1, #0x3
 140         ldr     pc, [pc, r6, lsl #2]
 141         b       .Lialend
 142         .word   .Lialend
 143         .word   .Lial3
 144         .word   .Lial2
 145         .word   .Lial1
 146 .Lial3: ldrbt   r6, [r0], #1
 147         sub     r2, r2, #1
 148         strb    r6, [r1], #1
 149 .Lial2: ldrbt   r7, [r0], #1
 150         sub     r2, r2, #1
 151         strb    r7, [r1], #1
 152 .Lial1: ldrbt   r6, [r0], #1
 153         sub     r2, r2, #1
 154         strb    r6, [r1], #1
 155 .Lialend:
 156
 157         /*
 158          * If few bytes left, finish slow.
 159          */
 160         cmp     r2, #0x08
 161         blt     .Licleanup
 162
 163         /*
 164          * If source is not aligned, finish slow.
 165          */
 166         ands    r3, r0, #0x03
 167         bne     .Licleanup
 168
 169         cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
 170         blt     .Licleanup8
 171
 172         /*
 173          * Align destination to cacheline boundary.
 174          * If source and destination are nicely aligned, this can be a big
 175          * win.  If not, it's still cheaper to copy in groups of 32 even if
 176          * we don't get the nice cacheline alignment.
 177          */
 178         and     r6, r1, #0x1f
 179         ldr     pc, [pc, r6]
 180         b       .Licaligned
 181         .word   .Licaligned
 182         .word   .Lical28
 183         .word   .Lical24
 184         .word   .Lical20
 185         .word   .Lical16
 186         .word   .Lical12
 187         .word   .Lical8
 188         .word   .Lical4
 189 .Lical28:ldrt   r6, [r0], #4
 190         sub     r2, r2, #4
 191         str     r6, [r1], #4
 192 .Lical24:ldrt   r7, [r0], #4
 193         sub     r2, r2, #4
 194         str     r7, [r1], #4
 195 .Lical20:ldrt   r6, [r0], #4
 196         sub     r2, r2, #4
 197         str     r6, [r1], #4
 198 .Lical16:ldrt   r7, [r0], #4
 199         sub     r2, r2, #4
 200         str     r7, [r1], #4
 201 .Lical12:ldrt   r6, [r0], #4
 202         sub     r2, r2, #4
 203         str     r6, [r1], #4
 204 .Lical8:ldrt    r7, [r0], #4
 205         sub     r2, r2, #4
 206         str     r7, [r1], #4
 207 .Lical4:ldrt    r6, [r0], #4
 208         sub     r2, r2, #4
 209         str     r6, [r1], #4
 210
 211         /*
 212          * We start with > 0x40 bytes to copy (>= 0x60 got us into this
 213          * part of the code, and we may have knocked that down by as much
 214          * as 0x1c getting aligned).
 215          *
 216          * This loop basically works out to:
 217          * do {
 218          *      prefetch-next-cacheline(s)
 219          *      bytes -= 0x20;
 220          *      copy cacheline
 221          * } while (bytes >= 0x40);
 222          * bytes -= 0x20;
 223          * copy cacheline
 224          */
 225 .Licaligned:
 226         PREFETCH(r0, 32)
 227         PREFETCH(r1, 32)
 228
 229         sub     r2, r2, #0x20
 230
 231         /* Copy a cacheline */
 232         ldrt    r10, [r0], #4
 233         ldrt    r11, [r0], #4
 234         ldrt    r6, [r0], #4
 235         ldrt    r7, [r0], #4
 236         ldrt    r8, [r0], #4
 237         ldrt    r9, [r0], #4
 238         stmia   r1!, {r10-r11}
 239         ldrt    r10, [r0], #4
 240         ldrt    r11, [r0], #4
 241         stmia   r1!, {r6-r11}
 242
 243         cmp     r2, #0x40
 244         bge     .Licaligned
 245
 246         sub     r2, r2, #0x20
 247
 248         /* Copy a cacheline */
 249         ldrt    r10, [r0], #4
 250         ldrt    r11, [r0], #4
 251         ldrt    r6, [r0], #4
 252         ldrt    r7, [r0], #4
 253         ldrt    r8, [r0], #4
 254         ldrt    r9, [r0], #4
 255         stmia   r1!, {r10-r11}
 256         ldrt    r10, [r0], #4
 257         ldrt    r11, [r0], #4
 258         stmia   r1!, {r6-r11}
 259
 260         cmp     r2, #0x08
 261         blt     .Liprecleanup
 262
 263 .Licleanup8:
 264         ldrt    r8, [r0], #4
 265         ldrt    r9, [r0], #4
 266         sub     r2, r2, #8
 267         stmia   r1!, {r8, r9}
 268         cmp     r2, #8
 269         bge     .Licleanup8
 270
 271 .Liprecleanup:
 272         /*
 273          * If we're done, bail.
 274          */
 275         cmp     r2, #0
 276         beq     .Lout
 277
 278 .Licleanup:
 279         and     r6, r2, #0x3
 280         ldr     pc, [pc, r6, lsl #2]
 281         b       .Licend
 282         .word   .Lic4
 283         .word   .Lic1
 284         .word   .Lic2
 285         .word   .Lic3
 286 .Lic4:  ldrbt   r6, [r0], #1
 287         sub     r2, r2, #1
 288         strb    r6, [r1], #1
 289 .Lic3:  ldrbt   r7, [r0], #1
 290         sub     r2, r2, #1
 291         strb    r7, [r1], #1
 292 .Lic2:  ldrbt   r6, [r0], #1
 293         sub     r2, r2, #1
 294         strb    r6, [r1], #1
 295 .Lic1:  ldrbt   r7, [r0], #1
 296         subs    r2, r2, #1
 297         strb    r7, [r1], #1
 298 .Licend:
 299         bne     .Licleanup
 300
 301 .Liout:
 302         mov     r0, #0
 303
 304         str     r5, [r4, #PCB_ONFAULT]
 305         RESTORE_REGS
 306
 307         RET
 308
 309 .Lcopyfault:
 310         ldr     r0, =EFAULT
 311         str     r5, [r4, #PCB_ONFAULT]
 312         RESTORE_REGS
 313
 314         RET
 315 END(copyin)
 316
 317 /*
 318  * r0 = kernel space address
 319  * r1 = user space address
 320  * r2 = length
 321  *
 322  * Copies bytes from kernel space to user space
 323  *
 324  * We save/restore r4-r11:
 325  * r4-r11 are scratch
 326  */
 327
 328 ENTRY(copyout)
 329         /* Quick exit if length is zero */
 330         teq     r2, #0
 331         moveq   r0, #0
 332         RETeq
 333
 334         adds    r3, r1, r2
 335         movcs   r0, #EFAULT
 336         RETc(cs)
 337
 338         ldr     r12, =(VM_MAXUSER_ADDRESS + 1)
 339         cmp     r3, r12
 340         movcs   r0, #EFAULT
 341         RETc(cs)
 342
 343         ldr     r3, .L_arm_memcpy
 344         ldr     r3, [r3]
 345         cmp     r3, #0
 346         beq     .Lnormale
 347         ldr     r3, .L_min_memcpy_size
 348         ldr     r3, [r3]
 349         cmp     r2, r3
 350         blt     .Lnormale
 351         stmfd   sp!, {r0-r2, r4, lr}
 352         _SAVE({r0-r2, r4, lr})
 353         mov     r3, r0
 354         mov     r0, r1
 355         mov     r1, r3
 356         mov     r3, #1 /* DST_IS_USER */
 357         ldr     r4, .L_arm_memcpy
 358         mov     lr, pc
 359         ldr     pc, [r4]
 360         cmp     r0, #0
 361         ldmfd   sp!, {r0-r2, r4, lr}
 362         moveq   r0, #0
 363         RETeq
 364
 365 .Lnormale:
 366         SAVE_REGS
 367         GET_PCB(r4)
 368         ldr     r4, [r4]
 369
 370         ldr     r5, [r4, #PCB_ONFAULT]
 371         adr     r3, .Lcopyfault
 372         str     r3, [r4, #PCB_ONFAULT]
 373
 374         PREFETCH(r0, 0)
 375         PREFETCH(r1, 0)
 376
 377         /*
 378          * If not too many bytes, take the slow path.
 379          */
 380         cmp     r2, #0x08
 381         blt     .Lcleanup
 382
 383         /*
 384          * Align destination to word boundary.
 385          */
 386         and     r6, r1, #0x3
 387         ldr     pc, [pc, r6, lsl #2]
 388         b       .Lalend
 389         .word   .Lalend
 390         .word   .Lal3
 391         .word   .Lal2
 392         .word   .Lal1
 393 .Lal3:  ldrb    r6, [r0], #1
 394         sub     r2, r2, #1
 395         strbt   r6, [r1], #1
 396 .Lal2:  ldrb    r7, [r0], #1
 397         sub     r2, r2, #1
 398         strbt   r7, [r1], #1
 399 .Lal1:  ldrb    r6, [r0], #1
 400         sub     r2, r2, #1
 401         strbt   r6, [r1], #1
 402 .Lalend:
 403
 404         /*
 405          * If few bytes left, finish slow.
 406          */
 407         cmp     r2, #0x08
 408         blt     .Lcleanup
 409
 410         /*
 411          * If source is not aligned, finish slow.
 412          */
 413         ands    r3, r0, #0x03
 414         bne     .Lcleanup
 415
 416         cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
 417         blt     .Lcleanup8
 418
 419         /*
 420          * Align source & destination to cacheline boundary.
 421          */
 422         and     r6, r1, #0x1f
 423         ldr     pc, [pc, r6]
 424         b       .Lcaligned
 425         .word   .Lcaligned
 426         .word   .Lcal28
 427         .word   .Lcal24
 428         .word   .Lcal20
 429         .word   .Lcal16
 430         .word   .Lcal12
 431         .word   .Lcal8
 432         .word   .Lcal4
 433 .Lcal28:ldr     r6, [r0], #4
 434         sub     r2, r2, #4
 435         strt    r6, [r1], #4
 436 .Lcal24:ldr     r7, [r0], #4
 437         sub     r2, r2, #4
 438         strt    r7, [r1], #4
 439 .Lcal20:ldr     r6, [r0], #4
 440         sub     r2, r2, #4
 441         strt    r6, [r1], #4
 442 .Lcal16:ldr     r7, [r0], #4
 443         sub     r2, r2, #4
 444         strt    r7, [r1], #4
 445 .Lcal12:ldr     r6, [r0], #4
 446         sub     r2, r2, #4
 447         strt    r6, [r1], #4
 448 .Lcal8: ldr     r7, [r0], #4
 449         sub     r2, r2, #4
 450         strt    r7, [r1], #4
 451 .Lcal4: ldr     r6, [r0], #4
 452         sub     r2, r2, #4
 453         strt    r6, [r1], #4
 454
 455         /*
 456          * We start with > 0x40 bytes to copy (>= 0x60 got us into this
 457          * part of the code, and we may have knocked that down by as much
 458          * as 0x1c getting aligned).
 459          *
 460          * This loop basically works out to:
 461          * do {
 462          *      prefetch-next-cacheline(s)
 463          *      bytes -= 0x20;
 464          *      copy cacheline
 465          * } while (bytes >= 0x40);
 466          * bytes -= 0x20;
 467          * copy cacheline
 468          */
 469 .Lcaligned:
 470         PREFETCH(r0, 32)
 471         PREFETCH(r1, 32)
 472
 473         sub     r2, r2, #0x20
 474
 475         /* Copy a cacheline */
 476         ldmia   r0!, {r6-r11}
 477         strt    r6, [r1], #4
 478         strt    r7, [r1], #4
 479         ldmia   r0!, {r6-r7}
 480         strt    r8, [r1], #4
 481         strt    r9, [r1], #4
 482         strt    r10, [r1], #4
 483         strt    r11, [r1], #4
 484         strt    r6, [r1], #4
 485         strt    r7, [r1], #4
 486
 487         cmp     r2, #0x40
 488         bge     .Lcaligned
 489
 490         sub     r2, r2, #0x20
 491
 492         /* Copy a cacheline */
 493         ldmia   r0!, {r6-r11}
 494         strt    r6, [r1], #4
 495         strt    r7, [r1], #4
 496         ldmia   r0!, {r6-r7}
 497         strt    r8, [r1], #4
 498         strt    r9, [r1], #4
 499         strt    r10, [r1], #4
 500         strt    r11, [r1], #4
 501         strt    r6, [r1], #4
 502         strt    r7, [r1], #4
 503
 504         cmp     r2, #0x08
 505         blt     .Lprecleanup
 506
 507 .Lcleanup8:
 508         ldmia   r0!, {r8-r9}
 509         sub     r2, r2, #8
 510         strt    r8, [r1], #4
 511         strt    r9, [r1], #4
 512         cmp     r2, #8
 513         bge     .Lcleanup8
 514
 515 .Lprecleanup:
 516         /*
 517          * If we're done, bail.
 518          */
 519         cmp     r2, #0
 520         beq     .Lout
 521
 522 .Lcleanup:
 523         and     r6, r2, #0x3
 524         ldr     pc, [pc, r6, lsl #2]
 525         b       .Lcend
 526         .word   .Lc4
 527         .word   .Lc1
 528         .word   .Lc2
 529         .word   .Lc3
 530 .Lc4:   ldrb    r6, [r0], #1
 531         sub     r2, r2, #1
 532         strbt   r6, [r1], #1
 533 .Lc3:   ldrb    r7, [r0], #1
 534         sub     r2, r2, #1
 535         strbt   r7, [r1], #1
 536 .Lc2:   ldrb    r6, [r0], #1
 537         sub     r2, r2, #1
 538         strbt   r6, [r1], #1
 539 .Lc1:   ldrb    r7, [r0], #1
 540         subs    r2, r2, #1
 541         strbt   r7, [r1], #1
 542 .Lcend:
 543         bne     .Lcleanup
 544
 545 .Lout:
 546         mov     r0, #0
 547
 548         str     r5, [r4, #PCB_ONFAULT]
 549         RESTORE_REGS
 550
 551         RET
 552 END(copyout)
 553 #endif
 554
 555 /*
 556  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
 557  *
 558  * Copies a single 8-bit value from src to dest, returning 0 on success,
 559  * else EFAULT if a page fault occurred.
 560  */
 561 ENTRY(badaddr_read_1)
 562         GET_PCB(r2)
 563         ldr     r2, [r2]
 564
 565         ldr     ip, [r2, #PCB_ONFAULT]
 566         adr     r3, 1f
 567         str     r3, [r2, #PCB_ONFAULT]
 568         nop
 569         nop
 570         nop
 571         ldrb    r3, [r0]
 572         nop
 573         nop
 574         nop
 575         strb    r3, [r1]
 576         mov     r0, #0          /* No fault */
 577 1:      str     ip, [r2, #PCB_ONFAULT]
 578         RET
 579 END(badaddr_read_1)
 580
 581 /*
 582  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
 583  *
 584  * Copies a single 16-bit value from src to dest, returning 0 on success,
 585  * else EFAULT if a page fault occurred.
 586  */
 587 ENTRY(badaddr_read_2)
 588         GET_PCB(r2)
 589         ldr     r2, [r2]
 590
 591         ldr     ip, [r2, #PCB_ONFAULT]
 592         adr     r3, 1f
 593         str     r3, [r2, #PCB_ONFAULT]
 594         nop
 595         nop
 596         nop
 597         ldrh    r3, [r0]
 598         nop
 599         nop
 600         nop
 601         strh    r3, [r1]
 602         mov     r0, #0          /* No fault */
 603 1:      str     ip, [r2, #PCB_ONFAULT]
 604         RET
 605 END(badaddr_read_2)
 606
 607 /*
 608  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
 609  *
 610  * Copies a single 32-bit value from src to dest, returning 0 on success,
 611  * else EFAULT if a page fault occurred.
 612  */
 613 ENTRY(badaddr_read_4)
 614         GET_PCB(r2)
 615         ldr     r2, [r2]
 616
 617         ldr     ip, [r2, #PCB_ONFAULT]
 618         adr     r3, 1f
 619         str     r3, [r2, #PCB_ONFAULT]
 620         nop
 621         nop
 622         nop
 623         ldr     r3, [r0]
 624         nop
 625         nop
 626         nop
 627         str     r3, [r1]
 628         mov     r0, #0          /* No fault */
 629 1:      str     ip, [r2, #PCB_ONFAULT]
 630         RET
 631 END(badaddr_read_4)
 632