sys/arm/arm/bcopyinout.S

   1 /*      $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $      */
   2
   3 /*-
   4  * Copyright (c) 2002 Wasabi Systems, Inc.
   5  * All rights reserved.
   6  *
   7  * Written by Allen Briggs for Wasabi Systems, Inc.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. All advertising materials mentioning features or use of this software
  18  *    must display the following acknowledgement:
  19  *      This product includes software developed for the NetBSD Project by
  20  *      Wasabi Systems, Inc.
  21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  22  *    or promote products derived from this software without specific prior
  23  *    written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35  * POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38
  39 #include "assym.s"
  40
  41 #include <machine/acle-compat.h>
  42 #include <machine/asm.h>
  43 #include <sys/errno.h>
  44
  45 .L_arm_memcpy:
  46         .word   _C_LABEL(_arm_memcpy)
  47 .L_min_memcpy_size:
  48         .word   _C_LABEL(_min_memcpy_size)
  49
  50 __FBSDID("$FreeBSD$");
  51 #ifdef _ARM_ARCH_5E
  52 #include <arm/arm/bcopyinout_xscale.S>
  53 #else
  54
  55         .text
  56         .align  2
  57
  58 #if __ARM_ARCH >= 6
  59 #define GET_PCB(tmp) \
  60         mrc p15, 0, tmp, c13, c0, 4; \
  61         add     tmp, tmp, #(TD_PCB)
  62 #else
  63 .Lcurpcb:
  64         .word   _C_LABEL(__pcpu) + PC_CURPCB
  65
  66 #define GET_PCB(tmp) \
  67         ldr     tmp, .Lcurpcb
  68 #endif
  69
  70
  71 #define SAVE_REGS       stmfd   sp!, {r4-r11}
  72 #define RESTORE_REGS    ldmfd   sp!, {r4-r11}
  73
  74 #if defined(_ARM_ARCH_5E)
  75 #define HELLOCPP #
  76 #define PREFETCH(rx,o)  pld     [ rx , HELLOCPP (o) ]
  77 #else
  78 #define PREFETCH(rx,o)
  79 #endif
  80
  81 /*
  82  * r0 = user space address
  83  * r1 = kernel space address
  84  * r2 = length
  85  *
  86  * Copies bytes from user space to kernel space
  87  *
  88  * We save/restore r4-r11:
  89  * r4-r11 are scratch
  90  */
  91 ENTRY(copyin)
  92         /* Quick exit if length is zero */
  93         teq     r2, #0
  94         moveq   r0, #0
  95         RETeq
  96
  97         ldr     r3, .L_arm_memcpy
  98         ldr     r3, [r3]
  99         cmp     r3, #0
 100         beq     .Lnormal
 101         ldr     r3, .L_min_memcpy_size
 102         ldr     r3, [r3]
 103         cmp     r2, r3
 104         blt     .Lnormal
 105         stmfd   sp!, {r0-r2, r4, lr}
 106         mov     r3, r0
 107         mov     r0, r1
 108         mov     r1, r3
 109         mov     r3, #2 /* SRC_IS_USER */
 110         ldr     r4, .L_arm_memcpy
 111         mov     lr, pc
 112         ldr     pc, [r4]
 113         cmp     r0, #0
 114         ldmfd   sp!, {r0-r2, r4, lr}
 115         moveq   r0, #0
 116         RETeq
 117
 118 .Lnormal:
 119         SAVE_REGS
 120         GET_PCB(r4)
 121         ldr     r4, [r4]
 122
 123
 124         ldr     r5, [r4, #PCB_ONFAULT]
 125         adr     r3, .Lcopyfault
 126         str     r3, [r4, #PCB_ONFAULT]
 127
 128         PREFETCH(r0, 0)
 129         PREFETCH(r1, 0)
 130
 131         /*
 132          * If not too many bytes, take the slow path.
 133          */
 134         cmp     r2, #0x08
 135         blt     .Licleanup
 136
 137         /*
 138          * Align destination to word boundary.
 139          */
 140         and     r6, r1, #0x3
 141         ldr     pc, [pc, r6, lsl #2]
 142         b       .Lialend
 143         .word   .Lialend
 144         .word   .Lial3
 145         .word   .Lial2
 146         .word   .Lial1
 147 .Lial3: ldrbt   r6, [r0], #1
 148         sub     r2, r2, #1
 149         strb    r6, [r1], #1
 150 .Lial2: ldrbt   r7, [r0], #1
 151         sub     r2, r2, #1
 152         strb    r7, [r1], #1
 153 .Lial1: ldrbt   r6, [r0], #1
 154         sub     r2, r2, #1
 155         strb    r6, [r1], #1
 156 .Lialend:
 157
 158         /*
 159          * If few bytes left, finish slow.
 160          */
 161         cmp     r2, #0x08
 162         blt     .Licleanup
 163
 164         /*
 165          * If source is not aligned, finish slow.
 166          */
 167         ands    r3, r0, #0x03
 168         bne     .Licleanup
 169
 170         cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
 171         blt     .Licleanup8
 172
 173         /*
 174          * Align destination to cacheline boundary.
 175          * If source and destination are nicely aligned, this can be a big
 176          * win.  If not, it's still cheaper to copy in groups of 32 even if
 177          * we don't get the nice cacheline alignment.
 178          */
 179         and     r6, r1, #0x1f
 180         ldr     pc, [pc, r6]
 181         b       .Licaligned
 182         .word   .Licaligned
 183         .word   .Lical28
 184         .word   .Lical24
 185         .word   .Lical20
 186         .word   .Lical16
 187         .word   .Lical12
 188         .word   .Lical8
 189         .word   .Lical4
 190 .Lical28:ldrt   r6, [r0], #4
 191         sub     r2, r2, #4
 192         str     r6, [r1], #4
 193 .Lical24:ldrt   r7, [r0], #4
 194         sub     r2, r2, #4
 195         str     r7, [r1], #4
 196 .Lical20:ldrt   r6, [r0], #4
 197         sub     r2, r2, #4
 198         str     r6, [r1], #4
 199 .Lical16:ldrt   r7, [r0], #4
 200         sub     r2, r2, #4
 201         str     r7, [r1], #4
 202 .Lical12:ldrt   r6, [r0], #4
 203         sub     r2, r2, #4
 204         str     r6, [r1], #4
 205 .Lical8:ldrt    r7, [r0], #4
 206         sub     r2, r2, #4
 207         str     r7, [r1], #4
 208 .Lical4:ldrt    r6, [r0], #4
 209         sub     r2, r2, #4
 210         str     r6, [r1], #4
 211
 212         /*
 213          * We start with > 0x40 bytes to copy (>= 0x60 got us into this
 214          * part of the code, and we may have knocked that down by as much
 215          * as 0x1c getting aligned).
 216          *
 217          * This loop basically works out to:
 218          * do {
 219          *      prefetch-next-cacheline(s)
 220          *      bytes -= 0x20;
 221          *      copy cacheline
 222          * } while (bytes >= 0x40);
 223          * bytes -= 0x20;
 224          * copy cacheline
 225          */
 226 .Licaligned:
 227         PREFETCH(r0, 32)
 228         PREFETCH(r1, 32)
 229
 230         sub     r2, r2, #0x20
 231
 232         /* Copy a cacheline */
 233         ldrt    r10, [r0], #4
 234         ldrt    r11, [r0], #4
 235         ldrt    r6, [r0], #4
 236         ldrt    r7, [r0], #4
 237         ldrt    r8, [r0], #4
 238         ldrt    r9, [r0], #4
 239         stmia   r1!, {r10-r11}
 240         ldrt    r10, [r0], #4
 241         ldrt    r11, [r0], #4
 242         stmia   r1!, {r6-r11}
 243
 244         cmp     r2, #0x40
 245         bge     .Licaligned
 246
 247         sub     r2, r2, #0x20
 248
 249         /* Copy a cacheline */
 250         ldrt    r10, [r0], #4
 251         ldrt    r11, [r0], #4
 252         ldrt    r6, [r0], #4
 253         ldrt    r7, [r0], #4
 254         ldrt    r8, [r0], #4
 255         ldrt    r9, [r0], #4
 256         stmia   r1!, {r10-r11}
 257         ldrt    r10, [r0], #4
 258         ldrt    r11, [r0], #4
 259         stmia   r1!, {r6-r11}
 260
 261         cmp     r2, #0x08
 262         blt     .Liprecleanup
 263
 264 .Licleanup8:
 265         ldrt    r8, [r0], #4
 266         ldrt    r9, [r0], #4
 267         sub     r2, r2, #8
 268         stmia   r1!, {r8, r9}
 269         cmp     r2, #8
 270         bge     .Licleanup8
 271
 272 .Liprecleanup:
 273         /*
 274          * If we're done, bail.
 275          */
 276         cmp     r2, #0
 277         beq     .Lout
 278
 279 .Licleanup:
 280         and     r6, r2, #0x3
 281         ldr     pc, [pc, r6, lsl #2]
 282         b       .Licend
 283         .word   .Lic4
 284         .word   .Lic1
 285         .word   .Lic2
 286         .word   .Lic3
 287 .Lic4:  ldrbt   r6, [r0], #1
 288         sub     r2, r2, #1
 289         strb    r6, [r1], #1
 290 .Lic3:  ldrbt   r7, [r0], #1
 291         sub     r2, r2, #1
 292         strb    r7, [r1], #1
 293 .Lic2:  ldrbt   r6, [r0], #1
 294         sub     r2, r2, #1
 295         strb    r6, [r1], #1
 296 .Lic1:  ldrbt   r7, [r0], #1
 297         subs    r2, r2, #1
 298         strb    r7, [r1], #1
 299 .Licend:
 300         bne     .Licleanup
 301
 302 .Liout:
 303         mov     r0, #0
 304
 305         str     r5, [r4, #PCB_ONFAULT]
 306         RESTORE_REGS
 307
 308         RET
 309
 310 .Lcopyfault:
 311         ldr     r0, =EFAULT
 312         str     r5, [r4, #PCB_ONFAULT]
 313         RESTORE_REGS
 314
 315         RET
 316 END(copyin)
 317
 318 /*
 319  * r0 = kernel space address
 320  * r1 = user space address
 321  * r2 = length
 322  *
 323  * Copies bytes from kernel space to user space
 324  *
 325  * We save/restore r4-r11:
 326  * r4-r11 are scratch
 327  */
 328
 329 ENTRY(copyout)
 330         /* Quick exit if length is zero */
 331         teq     r2, #0
 332         moveq   r0, #0
 333         RETeq
 334
 335         ldr     r3, .L_arm_memcpy
 336         ldr     r3, [r3]
 337         cmp     r3, #0
 338         beq     .Lnormale
 339         ldr     r3, .L_min_memcpy_size
 340         ldr     r3, [r3]
 341         cmp     r2, r3
 342         blt     .Lnormale
 343         stmfd   sp!, {r0-r2, r4, lr}
 344         mov     r3, r0
 345         mov     r0, r1
 346         mov     r1, r3
 347         mov     r3, #1 /* DST_IS_USER */
 348         ldr     r4, .L_arm_memcpy
 349         mov     lr, pc
 350         ldr     pc, [r4]
 351         cmp     r0, #0
 352         ldmfd   sp!, {r0-r2, r4, lr}
 353         moveq   r0, #0
 354         RETeq
 355
 356 .Lnormale:
 357         SAVE_REGS
 358         GET_PCB(r4)
 359         ldr     r4, [r4]
 360
 361         ldr     r5, [r4, #PCB_ONFAULT]
 362         adr     r3, .Lcopyfault
 363         str     r3, [r4, #PCB_ONFAULT]
 364
 365         PREFETCH(r0, 0)
 366         PREFETCH(r1, 0)
 367
 368         /*
 369          * If not too many bytes, take the slow path.
 370          */
 371         cmp     r2, #0x08
 372         blt     .Lcleanup
 373
 374         /*
 375          * Align destination to word boundary.
 376          */
 377         and     r6, r1, #0x3
 378         ldr     pc, [pc, r6, lsl #2]
 379         b       .Lalend
 380         .word   .Lalend
 381         .word   .Lal3
 382         .word   .Lal2
 383         .word   .Lal1
 384 .Lal3:  ldrb    r6, [r0], #1
 385         sub     r2, r2, #1
 386         strbt   r6, [r1], #1
 387 .Lal2:  ldrb    r7, [r0], #1
 388         sub     r2, r2, #1
 389         strbt   r7, [r1], #1
 390 .Lal1:  ldrb    r6, [r0], #1
 391         sub     r2, r2, #1
 392         strbt   r6, [r1], #1
 393 .Lalend:
 394
 395         /*
 396          * If few bytes left, finish slow.
 397          */
 398         cmp     r2, #0x08
 399         blt     .Lcleanup
 400
 401         /*
 402          * If source is not aligned, finish slow.
 403          */
 404         ands    r3, r0, #0x03
 405         bne     .Lcleanup
 406
 407         cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
 408         blt     .Lcleanup8
 409
 410         /*
 411          * Align source & destination to cacheline boundary.
 412          */
 413         and     r6, r1, #0x1f
 414         ldr     pc, [pc, r6]
 415         b       .Lcaligned
 416         .word   .Lcaligned
 417         .word   .Lcal28
 418         .word   .Lcal24
 419         .word   .Lcal20
 420         .word   .Lcal16
 421         .word   .Lcal12
 422         .word   .Lcal8
 423         .word   .Lcal4
 424 .Lcal28:ldr     r6, [r0], #4
 425         sub     r2, r2, #4
 426         strt    r6, [r1], #4
 427 .Lcal24:ldr     r7, [r0], #4
 428         sub     r2, r2, #4
 429         strt    r7, [r1], #4
 430 .Lcal20:ldr     r6, [r0], #4
 431         sub     r2, r2, #4
 432         strt    r6, [r1], #4
 433 .Lcal16:ldr     r7, [r0], #4
 434         sub     r2, r2, #4
 435         strt    r7, [r1], #4
 436 .Lcal12:ldr     r6, [r0], #4
 437         sub     r2, r2, #4
 438         strt    r6, [r1], #4
 439 .Lcal8: ldr     r7, [r0], #4
 440         sub     r2, r2, #4
 441         strt    r7, [r1], #4
 442 .Lcal4: ldr     r6, [r0], #4
 443         sub     r2, r2, #4
 444         strt    r6, [r1], #4
 445
 446         /*
 447          * We start with > 0x40 bytes to copy (>= 0x60 got us into this
 448          * part of the code, and we may have knocked that down by as much
 449          * as 0x1c getting aligned).
 450          *
 451          * This loop basically works out to:
 452          * do {
 453          *      prefetch-next-cacheline(s)
 454          *      bytes -= 0x20;
 455          *      copy cacheline
 456          * } while (bytes >= 0x40);
 457          * bytes -= 0x20;
 458          * copy cacheline
 459          */
 460 .Lcaligned:
 461         PREFETCH(r0, 32)
 462         PREFETCH(r1, 32)
 463
 464         sub     r2, r2, #0x20
 465
 466         /* Copy a cacheline */
 467         ldmia   r0!, {r6-r11}
 468         strt    r6, [r1], #4
 469         strt    r7, [r1], #4
 470         ldmia   r0!, {r6-r7}
 471         strt    r8, [r1], #4
 472         strt    r9, [r1], #4
 473         strt    r10, [r1], #4
 474         strt    r11, [r1], #4
 475         strt    r6, [r1], #4
 476         strt    r7, [r1], #4
 477
 478         cmp     r2, #0x40
 479         bge     .Lcaligned
 480
 481         sub     r2, r2, #0x20
 482
 483         /* Copy a cacheline */
 484         ldmia   r0!, {r6-r11}
 485         strt    r6, [r1], #4
 486         strt    r7, [r1], #4
 487         ldmia   r0!, {r6-r7}
 488         strt    r8, [r1], #4
 489         strt    r9, [r1], #4
 490         strt    r10, [r1], #4
 491         strt    r11, [r1], #4
 492         strt    r6, [r1], #4
 493         strt    r7, [r1], #4
 494
 495         cmp     r2, #0x08
 496         blt     .Lprecleanup
 497
 498 .Lcleanup8:
 499         ldmia   r0!, {r8-r9}
 500         sub     r2, r2, #8
 501         strt    r8, [r1], #4
 502         strt    r9, [r1], #4
 503         cmp     r2, #8
 504         bge     .Lcleanup8
 505
 506 .Lprecleanup:
 507         /*
 508          * If we're done, bail.
 509          */
 510         cmp     r2, #0
 511         beq     .Lout
 512
 513 .Lcleanup:
 514         and     r6, r2, #0x3
 515         ldr     pc, [pc, r6, lsl #2]
 516         b       .Lcend
 517         .word   .Lc4
 518         .word   .Lc1
 519         .word   .Lc2
 520         .word   .Lc3
 521 .Lc4:   ldrb    r6, [r0], #1
 522         sub     r2, r2, #1
 523         strbt   r6, [r1], #1
 524 .Lc3:   ldrb    r7, [r0], #1
 525         sub     r2, r2, #1
 526         strbt   r7, [r1], #1
 527 .Lc2:   ldrb    r6, [r0], #1
 528         sub     r2, r2, #1
 529         strbt   r6, [r1], #1
 530 .Lc1:   ldrb    r7, [r0], #1
 531         subs    r2, r2, #1
 532         strbt   r7, [r1], #1
 533 .Lcend:
 534         bne     .Lcleanup
 535
 536 .Lout:
 537         mov     r0, #0
 538
 539         str     r5, [r4, #PCB_ONFAULT]
 540         RESTORE_REGS
 541
 542         RET
 543 END(copyout)
 544 #endif
 545
 546 /*
 547  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
 548  *
 549  * Copies a single 8-bit value from src to dest, returning 0 on success,
 550  * else EFAULT if a page fault occurred.
 551  */
 552 ENTRY(badaddr_read_1)
 553         GET_PCB(r2)
 554         ldr     r2, [r2]
 555
 556         ldr     ip, [r2, #PCB_ONFAULT]
 557         adr     r3, 1f
 558         str     r3, [r2, #PCB_ONFAULT]
 559         nop
 560         nop
 561         nop
 562         ldrb    r3, [r0]
 563         nop
 564         nop
 565         nop
 566         strb    r3, [r1]
 567         mov     r0, #0          /* No fault */
 568 1:      str     ip, [r2, #PCB_ONFAULT]
 569         RET
 570 END(badaddr_read_1)
 571
 572 /*
 573  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
 574  *
 575  * Copies a single 16-bit value from src to dest, returning 0 on success,
 576  * else EFAULT if a page fault occurred.
 577  */
 578 ENTRY(badaddr_read_2)
 579         GET_PCB(r2)
 580         ldr     r2, [r2]
 581
 582         ldr     ip, [r2, #PCB_ONFAULT]
 583         adr     r3, 1f
 584         str     r3, [r2, #PCB_ONFAULT]
 585         nop
 586         nop
 587         nop
 588         ldrh    r3, [r0]
 589         nop
 590         nop
 591         nop
 592         strh    r3, [r1]
 593         mov     r0, #0          /* No fault */
 594 1:      str     ip, [r2, #PCB_ONFAULT]
 595         RET
 596 END(badaddr_read_2)
 597
 598 /*
 599  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
 600  *
 601  * Copies a single 32-bit value from src to dest, returning 0 on success,
 602  * else EFAULT if a page fault occurred.
 603  */
 604 ENTRY(badaddr_read_4)
 605         GET_PCB(r2)
 606         ldr     r2, [r2]
 607
 608         ldr     ip, [r2, #PCB_ONFAULT]
 609         adr     r3, 1f
 610         str     r3, [r2, #PCB_ONFAULT]
 611         nop
 612         nop
 613         nop
 614         ldr     r3, [r0]
 615         nop
 616         nop
 617         nop
 618         str     r3, [r1]
 619         mov     r0, #0          /* No fault */
 620 1:      str     ip, [r2, #PCB_ONFAULT]
 621         RET
 622 END(badaddr_read_4)
 623