sys/arm/arm/bcopyinout.S

   1 /*      $NetBSD: bcopyinout.S,v 1.11 2003/10/13 21:22:40 scw Exp $      */
   2
   3 /*-
   4  * Copyright (c) 2002 Wasabi Systems, Inc.
   5  * All rights reserved.
   6  *
   7  * Written by Allen Briggs for Wasabi Systems, Inc.
   8  *
   9  * Redistribution and use in source and binary forms, with or without
  10  * modification, are permitted provided that the following conditions
  11  * are met:
  12  * 1. Redistributions of source code must retain the above copyright
  13  *    notice, this list of conditions and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  * 3. All advertising materials mentioning features or use of this software
  18  *    must display the following acknowledgement:
  19  *      This product includes software developed for the NetBSD Project by
  20  *      Wasabi Systems, Inc.
  21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
  22  *    or promote products derived from this software without specific prior
  23  *    written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
  26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
  29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  35  * POSSIBILITY OF SUCH DAMAGE.
  36  */
  37
  38
  39 #include "assym.s"
  40
  41 #include <machine/asm.h>
  42
  43 .L_arm_memcpy:
  44         .word   _C_LABEL(_arm_memcpy)
  45 .L_min_memcpy_size:
  46         .word   _C_LABEL(_min_memcpy_size)
  47
  48 __FBSDID("$FreeBSD$");
  49 #ifdef _ARM_ARCH_5E
  50 #include <arm/arm/bcopyinout_xscale.S>
  51 #else
  52
  53         .text
  54         .align  0
  55
  56 #ifdef MULTIPROCESSOR
  57 .Lcpu_info:
  58         .word   _C_LABEL(cpu_info)
  59 #else
  60 .Lcurpcb:
  61         .word _C_LABEL(__pcpu) + PC_CURPCB
  62 #endif
  63
  64 #define SAVE_REGS       stmfd   sp!, {r4-r11}
  65 #define RESTORE_REGS    ldmfd   sp!, {r4-r11}
  66
  67 #if defined(_ARM_ARCH_5E)
  68 #define HELLOCPP #
  69 #define PREFETCH(rx,o)  pld     [ rx , HELLOCPP (o) ]
  70 #else
  71 #define PREFETCH(rx,o)
  72 #endif
  73
  74 /*
  75  * r0 = user space address
  76  * r1 = kernel space address
  77  * r2 = length
  78  *
  79  * Copies bytes from user space to kernel space
  80  *
  81  * We save/restore r4-r11:
  82  * r4-r11 are scratch
  83  */
  84 ENTRY(copyin)
  85         /* Quick exit if length is zero */
  86         teq     r2, #0
  87         moveq   r0, #0
  88         RETeq
  89
  90         ldr     r3, .L_arm_memcpy
  91         ldr     r3, [r3]
  92         cmp     r3, #0
  93         beq     .Lnormal
  94         ldr     r3, .L_min_memcpy_size
  95         ldr     r3, [r3]
  96         cmp     r2, r3
  97         blt     .Lnormal
  98         stmfd   sp!, {r0-r2, r4, lr}
  99         mov     r3, r0
 100         mov     r0, r1
 101         mov     r1, r3
 102         mov     r3, #2 /* SRC_IS_USER */
 103         ldr     r4, .L_arm_memcpy
 104         mov     lr, pc
 105         ldr     pc, [r4]
 106         cmp     r0, #0
 107         ldmfd   sp!, {r0-r2, r4, lr}
 108         moveq   r0, #0
 109         RETeq
 110
 111 .Lnormal:
 112         SAVE_REGS
 113 #ifdef MULTIPROCESSOR
 114         /* XXX Probably not appropriate for non-Hydra SMPs */
 115         stmfd   sp!, {r0-r2, r14}
 116         bl      _C_LABEL(cpu_number)
 117         ldr     r4, .Lcpu_info
 118         ldr     r4, [r4, r0, lsl #2]
 119         ldr     r4, [r4, #CI_CURPCB]
 120         ldmfd   sp!, {r0-r2, r14}
 121 #else
 122         ldr     r4, .Lcurpcb
 123         ldr     r4, [r4]
 124 #endif
 125
 126         ldr     r5, [r4, #PCB_ONFAULT]
 127         adr     r3, .Lcopyfault
 128         str     r3, [r4, #PCB_ONFAULT]
 129
 130         PREFETCH(r0, 0)
 131         PREFETCH(r1, 0)
 132
 133         /*
 134          * If not too many bytes, take the slow path.
 135          */
 136         cmp     r2, #0x08
 137         blt     .Licleanup
 138
 139         /*
 140          * Align destination to word boundary.
 141          */
 142         and     r6, r1, #0x3
 143         ldr     pc, [pc, r6, lsl #2]
 144         b       .Lialend
 145         .word   .Lialend
 146         .word   .Lial3
 147         .word   .Lial2
 148         .word   .Lial1
 149 .Lial3: ldrbt   r6, [r0], #1
 150         sub     r2, r2, #1
 151         strb    r6, [r1], #1
 152 .Lial2: ldrbt   r7, [r0], #1
 153         sub     r2, r2, #1
 154         strb    r7, [r1], #1
 155 .Lial1: ldrbt   r6, [r0], #1
 156         sub     r2, r2, #1
 157         strb    r6, [r1], #1
 158 .Lialend:
 159
 160         /*
 161          * If few bytes left, finish slow.
 162          */
 163         cmp     r2, #0x08
 164         blt     .Licleanup
 165
 166         /*
 167          * If source is not aligned, finish slow.
 168          */
 169         ands    r3, r0, #0x03
 170         bne     .Licleanup
 171
 172         cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
 173         blt     .Licleanup8
 174
 175         /*
 176          * Align destination to cacheline boundary.
 177          * If source and destination are nicely aligned, this can be a big
 178          * win.  If not, it's still cheaper to copy in groups of 32 even if
 179          * we don't get the nice cacheline alignment.
 180          */
 181         and     r6, r1, #0x1f
 182         ldr     pc, [pc, r6]
 183         b       .Licaligned
 184         .word   .Licaligned
 185         .word   .Lical28
 186         .word   .Lical24
 187         .word   .Lical20
 188         .word   .Lical16
 189         .word   .Lical12
 190         .word   .Lical8
 191         .word   .Lical4
 192 .Lical28:ldrt   r6, [r0], #4
 193         sub     r2, r2, #4
 194         str     r6, [r1], #4
 195 .Lical24:ldrt   r7, [r0], #4
 196         sub     r2, r2, #4
 197         str     r7, [r1], #4
 198 .Lical20:ldrt   r6, [r0], #4
 199         sub     r2, r2, #4
 200         str     r6, [r1], #4
 201 .Lical16:ldrt   r7, [r0], #4
 202         sub     r2, r2, #4
 203         str     r7, [r1], #4
 204 .Lical12:ldrt   r6, [r0], #4
 205         sub     r2, r2, #4
 206         str     r6, [r1], #4
 207 .Lical8:ldrt    r7, [r0], #4
 208         sub     r2, r2, #4
 209         str     r7, [r1], #4
 210 .Lical4:ldrt    r6, [r0], #4
 211         sub     r2, r2, #4
 212         str     r6, [r1], #4
 213
 214         /*
 215          * We start with > 0x40 bytes to copy (>= 0x60 got us into this
 216          * part of the code, and we may have knocked that down by as much
 217          * as 0x1c getting aligned).
 218          *
 219          * This loop basically works out to:
 220          * do {
 221          *      prefetch-next-cacheline(s)
 222          *      bytes -= 0x20;
 223          *      copy cacheline
 224          * } while (bytes >= 0x40);
 225          * bytes -= 0x20;
 226          * copy cacheline
 227          */
 228 .Licaligned:
 229         PREFETCH(r0, 32)
 230         PREFETCH(r1, 32)
 231
 232         sub     r2, r2, #0x20
 233
 234         /* Copy a cacheline */
 235         ldrt    r10, [r0], #4
 236         ldrt    r11, [r0], #4
 237         ldrt    r6, [r0], #4
 238         ldrt    r7, [r0], #4
 239         ldrt    r8, [r0], #4
 240         ldrt    r9, [r0], #4
 241         stmia   r1!, {r10-r11}
 242         ldrt    r10, [r0], #4
 243         ldrt    r11, [r0], #4
 244         stmia   r1!, {r6-r11}
 245
 246         cmp     r2, #0x40
 247         bge     .Licaligned
 248
 249         sub     r2, r2, #0x20
 250
 251         /* Copy a cacheline */
 252         ldrt    r10, [r0], #4
 253         ldrt    r11, [r0], #4
 254         ldrt    r6, [r0], #4
 255         ldrt    r7, [r0], #4
 256         ldrt    r8, [r0], #4
 257         ldrt    r9, [r0], #4
 258         stmia   r1!, {r10-r11}
 259         ldrt    r10, [r0], #4
 260         ldrt    r11, [r0], #4
 261         stmia   r1!, {r6-r11}
 262
 263         cmp     r2, #0x08
 264         blt     .Liprecleanup
 265
 266 .Licleanup8:
 267         ldrt    r8, [r0], #4
 268         ldrt    r9, [r0], #4
 269         sub     r2, r2, #8
 270         stmia   r1!, {r8, r9}
 271         cmp     r2, #8
 272         bge     .Licleanup8
 273
 274 .Liprecleanup:
 275         /*
 276          * If we're done, bail.
 277          */
 278         cmp     r2, #0
 279         beq     .Lout
 280
 281 .Licleanup:
 282         and     r6, r2, #0x3
 283         ldr     pc, [pc, r6, lsl #2]
 284         b       .Licend
 285         .word   .Lic4
 286         .word   .Lic1
 287         .word   .Lic2
 288         .word   .Lic3
 289 .Lic4:  ldrbt   r6, [r0], #1
 290         sub     r2, r2, #1
 291         strb    r6, [r1], #1
 292 .Lic3:  ldrbt   r7, [r0], #1
 293         sub     r2, r2, #1
 294         strb    r7, [r1], #1
 295 .Lic2:  ldrbt   r6, [r0], #1
 296         sub     r2, r2, #1
 297         strb    r6, [r1], #1
 298 .Lic1:  ldrbt   r7, [r0], #1
 299         subs    r2, r2, #1
 300         strb    r7, [r1], #1
 301 .Licend:
 302         bne     .Licleanup
 303
 304 .Liout:
 305         mov     r0, #0
 306
 307         str     r5, [r4, #PCB_ONFAULT]
 308         RESTORE_REGS
 309
 310         RET
 311
 312 .Lcopyfault:
 313         mov     r0, #14 /* EFAULT */
 314         str     r5, [r4, #PCB_ONFAULT]
 315         RESTORE_REGS
 316
 317         RET
 318
 319 /*
 320  * r0 = kernel space address
 321  * r1 = user space address
 322  * r2 = length
 323  *
 324  * Copies bytes from kernel space to user space
 325  *
 326  * We save/restore r4-r11:
 327  * r4-r11 are scratch
 328  */
 329
 330 ENTRY(copyout)
 331         /* Quick exit if length is zero */
 332         teq     r2, #0
 333         moveq   r0, #0
 334         RETeq
 335
 336         ldr     r3, .L_arm_memcpy
 337         ldr     r3, [r3]
 338         cmp     r3, #0
 339         beq     .Lnormale
 340         ldr     r3, .L_min_memcpy_size
 341         ldr     r3, [r3]
 342         cmp     r2, r3
 343         blt     .Lnormale
 344         stmfd   sp!, {r0-r2, r4, lr}
 345         mov     r3, r0
 346         mov     r0, r1
 347         mov     r1, r3
 348         mov     r3, #1 /* DST_IS_USER */
 349         ldr     r4, .L_arm_memcpy
 350         mov     lr, pc
 351         ldr     pc, [r4]
 352         cmp     r0, #0
 353         ldmfd   sp!, {r0-r2, r4, lr}
 354         moveq   r0, #0
 355         RETeq
 356
 357 .Lnormale:
 358         SAVE_REGS
 359 #ifdef MULTIPROCESSOR
 360         /* XXX Probably not appropriate for non-Hydra SMPs */
 361         stmfd   sp!, {r0-r2, r14}
 362         bl      _C_LABEL(cpu_number)
 363         ldr     r4, .Lcpu_info
 364         ldr     r4, [r4, r0, lsl #2]
 365         ldr     r4, [r4, #CI_CURPCB]
 366         ldmfd   sp!, {r0-r2, r14}
 367 #else
 368         ldr     r4, .Lcurpcb
 369         ldr     r4, [r4]
 370 #endif
 371
 372         ldr     r5, [r4, #PCB_ONFAULT]
 373         adr     r3, .Lcopyfault
 374         str     r3, [r4, #PCB_ONFAULT]
 375
 376         PREFETCH(r0, 0)
 377         PREFETCH(r1, 0)
 378
 379         /*
 380          * If not too many bytes, take the slow path.
 381          */
 382         cmp     r2, #0x08
 383         blt     .Lcleanup
 384
 385         /*
 386          * Align destination to word boundary.
 387          */
 388         and     r6, r1, #0x3
 389         ldr     pc, [pc, r6, lsl #2]
 390         b       .Lalend
 391         .word   .Lalend
 392         .word   .Lal3
 393         .word   .Lal2
 394         .word   .Lal1
 395 .Lal3:  ldrb    r6, [r0], #1
 396         sub     r2, r2, #1
 397         strbt   r6, [r1], #1
 398 .Lal2:  ldrb    r7, [r0], #1
 399         sub     r2, r2, #1
 400         strbt   r7, [r1], #1
 401 .Lal1:  ldrb    r6, [r0], #1
 402         sub     r2, r2, #1
 403         strbt   r6, [r1], #1
 404 .Lalend:
 405
 406         /*
 407          * If few bytes left, finish slow.
 408          */
 409         cmp     r2, #0x08
 410         blt     .Lcleanup
 411
 412         /*
 413          * If source is not aligned, finish slow.
 414          */
 415         ands    r3, r0, #0x03
 416         bne     .Lcleanup
 417
 418         cmp     r2, #0x60       /* Must be > 0x5f for unrolled cacheline */
 419         blt     .Lcleanup8
 420
 421         /*
 422          * Align source & destination to cacheline boundary.
 423          */
 424         and     r6, r1, #0x1f
 425         ldr     pc, [pc, r6]
 426         b       .Lcaligned
 427         .word   .Lcaligned
 428         .word   .Lcal28
 429         .word   .Lcal24
 430         .word   .Lcal20
 431         .word   .Lcal16
 432         .word   .Lcal12
 433         .word   .Lcal8
 434         .word   .Lcal4
 435 .Lcal28:ldr     r6, [r0], #4
 436         sub     r2, r2, #4
 437         strt    r6, [r1], #4
 438 .Lcal24:ldr     r7, [r0], #4
 439         sub     r2, r2, #4
 440         strt    r7, [r1], #4
 441 .Lcal20:ldr     r6, [r0], #4
 442         sub     r2, r2, #4
 443         strt    r6, [r1], #4
 444 .Lcal16:ldr     r7, [r0], #4
 445         sub     r2, r2, #4
 446         strt    r7, [r1], #4
 447 .Lcal12:ldr     r6, [r0], #4
 448         sub     r2, r2, #4
 449         strt    r6, [r1], #4
 450 .Lcal8: ldr     r7, [r0], #4
 451         sub     r2, r2, #4
 452         strt    r7, [r1], #4
 453 .Lcal4: ldr     r6, [r0], #4
 454         sub     r2, r2, #4
 455         strt    r6, [r1], #4
 456
 457         /*
 458          * We start with > 0x40 bytes to copy (>= 0x60 got us into this
 459          * part of the code, and we may have knocked that down by as much
 460          * as 0x1c getting aligned).
 461          *
 462          * This loop basically works out to:
 463          * do {
 464          *      prefetch-next-cacheline(s)
 465          *      bytes -= 0x20;
 466          *      copy cacheline
 467          * } while (bytes >= 0x40);
 468          * bytes -= 0x20;
 469          * copy cacheline
 470          */
 471 .Lcaligned:
 472         PREFETCH(r0, 32)
 473         PREFETCH(r1, 32)
 474
 475         sub     r2, r2, #0x20
 476
 477         /* Copy a cacheline */
 478         ldmia   r0!, {r6-r11}
 479         strt    r6, [r1], #4
 480         strt    r7, [r1], #4
 481         ldmia   r0!, {r6-r7}
 482         strt    r8, [r1], #4
 483         strt    r9, [r1], #4
 484         strt    r10, [r1], #4
 485         strt    r11, [r1], #4
 486         strt    r6, [r1], #4
 487         strt    r7, [r1], #4
 488
 489         cmp     r2, #0x40
 490         bge     .Lcaligned
 491
 492         sub     r2, r2, #0x20
 493
 494         /* Copy a cacheline */
 495         ldmia   r0!, {r6-r11}
 496         strt    r6, [r1], #4
 497         strt    r7, [r1], #4
 498         ldmia   r0!, {r6-r7}
 499         strt    r8, [r1], #4
 500         strt    r9, [r1], #4
 501         strt    r10, [r1], #4
 502         strt    r11, [r1], #4
 503         strt    r6, [r1], #4
 504         strt    r7, [r1], #4
 505
 506         cmp     r2, #0x08
 507         blt     .Lprecleanup
 508
 509 .Lcleanup8:
 510         ldmia   r0!, {r8-r9}
 511         sub     r2, r2, #8
 512         strt    r8, [r1], #4
 513         strt    r9, [r1], #4
 514         cmp     r2, #8
 515         bge     .Lcleanup8
 516
 517 .Lprecleanup:
 518         /*
 519          * If we're done, bail.
 520          */
 521         cmp     r2, #0
 522         beq     .Lout
 523
 524 .Lcleanup:
 525         and     r6, r2, #0x3
 526         ldr     pc, [pc, r6, lsl #2]
 527         b       .Lcend
 528         .word   .Lc4
 529         .word   .Lc1
 530         .word   .Lc2
 531         .word   .Lc3
 532 .Lc4:   ldrb    r6, [r0], #1
 533         sub     r2, r2, #1
 534         strbt   r6, [r1], #1
 535 .Lc3:   ldrb    r7, [r0], #1
 536         sub     r2, r2, #1
 537         strbt   r7, [r1], #1
 538 .Lc2:   ldrb    r6, [r0], #1
 539         sub     r2, r2, #1
 540         strbt   r6, [r1], #1
 541 .Lc1:   ldrb    r7, [r0], #1
 542         subs    r2, r2, #1
 543         strbt   r7, [r1], #1
 544 .Lcend:
 545         bne     .Lcleanup
 546
 547 .Lout:
 548         mov     r0, #0
 549
 550         str     r5, [r4, #PCB_ONFAULT]
 551         RESTORE_REGS
 552
 553         RET
 554 #endif
 555
 556 /*
 557  * int badaddr_read_1(const uint8_t *src, uint8_t *dest)
 558  *
 559  * Copies a single 8-bit value from src to dest, returning 0 on success,
 560  * else EFAULT if a page fault occurred.
 561  */
 562 ENTRY(badaddr_read_1)
 563 #ifdef MULTIPROCESSOR
 564         /* XXX Probably not appropriate for non-Hydra SMPs */
 565         stmfd   sp!, {r0-r1, r14}
 566         bl      _C_LABEL(cpu_number)
 567         ldr     r2, .Lcpu_info
 568         ldr     r2, [r2, r0, lsl #2]
 569         ldr     r2, [r2, #CI_CURPCB]
 570         ldmfd   sp!, {r0-r1, r14}
 571 #else
 572         ldr     r2, .Lcurpcb
 573         ldr     r2, [r2]
 574 #endif
 575         ldr     ip, [r2, #PCB_ONFAULT]
 576         adr     r3, 1f
 577         str     r3, [r2, #PCB_ONFAULT]
 578         nop
 579         nop
 580         nop
 581         ldrb    r3, [r0]
 582         nop
 583         nop
 584         nop
 585         strb    r3, [r1]
 586         mov     r0, #0          /* No fault */
 587 1:      str     ip, [r2, #PCB_ONFAULT]
 588         RET
 589
 590 /*
 591  * int badaddr_read_2(const uint16_t *src, uint16_t *dest)
 592  *
 593  * Copies a single 16-bit value from src to dest, returning 0 on success,
 594  * else EFAULT if a page fault occurred.
 595  */
 596 ENTRY(badaddr_read_2)
 597 #ifdef MULTIPROCESSOR
 598         /* XXX Probably not appropriate for non-Hydra SMPs */
 599         stmfd   sp!, {r0-r1, r14}
 600         bl      _C_LABEL(cpu_number)
 601         ldr     r2, .Lcpu_info
 602         ldr     r2, [r2, r0, lsl #2]
 603         ldr     r2, [r2, #CI_CURPCB]
 604         ldmfd   sp!, {r0-r1, r14}
 605 #else
 606         ldr     r2, .Lcurpcb
 607         ldr     r2, [r2]
 608 #endif
 609         ldr     ip, [r2, #PCB_ONFAULT]
 610         adr     r3, 1f
 611         str     r3, [r2, #PCB_ONFAULT]
 612         nop
 613         nop
 614         nop
 615         ldrh    r3, [r0]
 616         nop
 617         nop
 618         nop
 619         strh    r3, [r1]
 620         mov     r0, #0          /* No fault */
 621 1:      str     ip, [r2, #PCB_ONFAULT]
 622         RET
 623
 624 /*
 625  * int badaddr_read_4(const uint32_t *src, uint32_t *dest)
 626  *
 627  * Copies a single 32-bit value from src to dest, returning 0 on success,
 628  * else EFAULT if a page fault occurred.
 629  */
 630 ENTRY(badaddr_read_4)
 631 #ifdef MULTIPROCESSOR
 632         /* XXX Probably not appropriate for non-Hydra SMPs */
 633         stmfd   sp!, {r0-r1, r14}
 634         bl      _C_LABEL(cpu_number)
 635         ldr     r2, .Lcpu_info
 636         ldr     r2, [r2, r0, lsl #2]
 637         ldr     r2, [r2, #CI_CURPCB]
 638         ldmfd   sp!, {r0-r1, r14}
 639 #else
 640         ldr     r2, .Lcurpcb
 641         ldr     r2, [r2]
 642 #endif
 643         ldr     ip, [r2, #PCB_ONFAULT]
 644         adr     r3, 1f
 645         str     r3, [r2, #PCB_ONFAULT]
 646         nop
 647         nop
 648         nop
 649         ldr     r3, [r0]
 650         nop
 651         nop
 652         nop
 653         str     r3, [r1]
 654         mov     r0, #0          /* No fault */
 655 1:      str     ip, [r2, #PCB_ONFAULT]
 656         RET
 657