sys/arm/arm/blockio.S

   1 /*      $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $       */
   2
   3 /*-
   4  * Copyright (c) 2001 Ben Harris.
   5  * Copyright (c) 1994 Mark Brinicombe.
   6  * Copyright (c) 1994 Brini.
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of the company nor the name of the author may be used to
  23  *    endorse or promote products derived from this software without specific
  24  *    prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  27  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  28  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  29  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  30  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  * RiscBSD kernel project
  39  *
  40  * blockio.S
  41  *
  42  * optimised block read/write from/to IO routines.
  43  *
  44  * Created      : 08/10/94
  45  * Modified     : 22/01/99  -- R.Earnshaw
  46  *                             Faster, and small tweaks for StrongARM
  47  */
  48
  49 #include <machine/asm.h>
  50
  51 __FBSDID("$FreeBSD$");
  52
  53         .syntax unified
  54
  55 /*
  56  * Read bytes from an I/O address into a block of memory
  57  *
  58  * r0 = address to read from (IO)
  59  * r1 = address to write to (memory)
  60  * r2 = length
  61  */
  62
  63 /* This code will look very familiar if you've read _memcpy(). */
  64 ENTRY(read_multi_1)
  65         mov     ip, sp
  66         stmfd   sp!, {fp, ip, lr, pc}
  67         sub     fp, ip, #4
  68         subs    r2, r2, #4              /* r2 = length - 4 */
  69         blt     .Lrm1_l4                        /* less than 4 bytes */
  70         ands    r12, r1, #3
  71         beq     .Lrm1_main              /* aligned destination */
  72         rsb     r12, r12, #4
  73         cmp     r12, #2
  74         ldrb    r3, [r0]
  75         strb    r3, [r1], #1
  76         ldrbge  r3, [r0]
  77         strbge  r3, [r1], #1
  78         ldrbgt  r3, [r0]
  79         strbgt  r3, [r1], #1
  80         subs    r2, r2, r12
  81         blt     .Lrm1_l4
  82 .Lrm1_main:
  83 .Lrm1loop:
  84         ldrb    r3, [r0]
  85         ldrb    r12, [r0]
  86         orr     r3, r3, r12, lsl #8
  87         ldrb    r12, [r0]
  88         orr     r3, r3, r12, lsl #16
  89         ldrb    r12, [r0]
  90         orr     r3, r3, r12, lsl #24
  91         str     r3, [r1], #4
  92         subs    r2, r2, #4
  93         bge     .Lrm1loop
  94 .Lrm1_l4:
  95         adds    r2, r2, #4                      /* r2 = length again */
  96         ldmdbeq fp, {fp, sp, pc}
  97         RETeq
  98         cmp     r2, #2
  99         ldrb    r3, [r0]
 100         strb    r3, [r1], #1
 101         ldrbge  r3, [r0]
 102         strbge  r3, [r1], #1
 103         ldrbgt  r3, [r0]
 104         strbgt  r3, [r1], #1
 105         ldmdb   fp, {fp, sp, pc}
 106 END(read_multi_1)
 107
 108 /*
 109  * Write bytes to an I/O address from a block of memory
 110  *
 111  * r0 = address to write to (IO)
 112  * r1 = address to read from (memory)
 113  * r2 = length
 114  */
 115
 116 /* This code will look very familiar if you've read _memcpy(). */
 117 ENTRY(write_multi_1)
 118         mov     ip, sp
 119         stmfd   sp!, {fp, ip, lr, pc}
 120         sub     fp, ip, #4
 121         subs    r2, r2, #4              /* r2 = length - 4 */
 122         blt     .Lwm1_l4                /* less than 4 bytes */
 123         ands    r12, r1, #3
 124         beq     .Lwm1_main              /* aligned source */
 125         rsb     r12, r12, #4
 126         cmp     r12, #2
 127         ldrb    r3, [r1], #1
 128         strb    r3, [r0]
 129         ldrbge  r3, [r1], #1
 130         strbge  r3, [r0]
 131         ldrbgt  r3, [r1], #1
 132         strbgt  r3, [r0]
 133         subs    r2, r2, r12
 134         blt     .Lwm1_l4
 135 .Lwm1_main:
 136 .Lwm1loop:
 137         ldr     r3, [r1], #4
 138         strb    r3, [r0]
 139         mov     r3, r3, lsr #8
 140         strb    r3, [r0]
 141         mov     r3, r3, lsr #8
 142         strb    r3, [r0]
 143         mov     r3, r3, lsr #8
 144         strb    r3, [r0]
 145         subs    r2, r2, #4
 146         bge     .Lwm1loop
 147 .Lwm1_l4:
 148         adds    r2, r2, #4                      /* r2 = length again */
 149         ldmdbeq fp, {fp, sp, pc}
 150         cmp     r2, #2
 151         ldrb    r3, [r1], #1
 152         strb    r3, [r0]
 153         ldrbge  r3, [r1], #1
 154         strbge  r3, [r0]
 155         ldrbgt  r3, [r1], #1
 156         strbgt  r3, [r0]
 157         ldmdb   fp, {fp, sp, pc}
 158 END(write_multi_1)
 159
 160 /*
 161  * Reads short ints (16 bits) from an I/O address into a block of memory
 162  *
 163  * r0 = address to read from (IO)
 164  * r1 = address to write to (memory)
 165  * r2 = length
 166  */
 167
 168 ENTRY(insw)
 169 /* Make sure that we have a positive length */
 170         cmp     r2, #0x00000000
 171         movle   pc, lr
 172
 173 /* If the destination address and the size is word aligned, do it fast */
 174
 175         tst     r2, #0x00000001
 176         tsteq   r1, #0x00000003
 177         beq     .Lfastinsw
 178
 179 /* Non aligned insw */
 180
 181 .Linswloop:
 182         ldr     r3, [r0]
 183         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 184         strb    r3, [r1], #0x0001
 185         mov     r3, r3, lsr #8
 186         strb    r3, [r1], #0x0001
 187         bgt     .Linswloop
 188
 189         RET
 190
 191 /* Word aligned insw */
 192
 193 .Lfastinsw:
 194
 195 .Lfastinswloop:
 196         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 197                                          * word accesses */
 198         ldr     ip, [r0]
 199         mov     r3, r3, lsr #16         /* Put the two shorts together */
 200         orr     r3, r3, ip, lsl #16
 201         str     r3, [r1], #0x0004       /* Store */
 202         subs    r2, r2, #0x00000002     /* Next */
 203         bgt     .Lfastinswloop
 204
 205         RET
 206 END(insw)
 207
 208 /*
 209  * Writes short ints (16 bits) from a block of memory to an I/O address
 210  *
 211  * r0 = address to write to (IO)
 212  * r1 = address to read from (memory)
 213  * r2 = length
 214  */
 215
 216 ENTRY(outsw)
 217 /* Make sure that we have a positive length */
 218         cmp     r2, #0x00000000
 219         movle   pc, lr
 220
 221 /* If the destination address and the size is word aligned, do it fast */
 222
 223         tst     r2, #0x00000001
 224         tsteq   r1, #0x00000003
 225         beq     .Lfastoutsw
 226
 227 /* Non aligned outsw */
 228
 229 .Loutswloop:
 230         ldrb    r3, [r1], #0x0001
 231         ldrb    ip, [r1], #0x0001
 232         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 233         orr     r3, r3, ip, lsl #8
 234         orr     r3, r3, r3, lsl #16
 235         str     r3, [r0]
 236         bgt     .Loutswloop
 237
 238         RET
 239
 240 /* Word aligned outsw */
 241
 242 .Lfastoutsw:
 243
 244 .Lfastoutswloop:
 245         ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
 246         subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
 247
 248         eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
 249         eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
 250         eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
 251
 252         str     r3, [r0]
 253         str     ip, [r0]
 254
 255 /*      mov     ip, r3, lsl #16
 256  *      orr     ip, ip, ip, lsr #16
 257  *      str     ip, [r0]
 258  *
 259  *      mov     ip, r3, lsr #16
 260  *      orr     ip, ip, ip, lsl #16
 261  *      str     ip, [r0]
 262  */
 263
 264         bgt     .Lfastoutswloop
 265
 266         RET
 267 END(outsw)
 268
 269 /*
 270  * reads short ints (16 bits) from an I/O address into a block of memory
 271  * with a length garenteed to be a multiple of 16 bytes
 272  * with a word aligned destination address
 273  *
 274  * r0 = address to read from (IO)
 275  * r1 = address to write to (memory)
 276  * r2 = length
 277  */
 278
 279 ENTRY(insw16)
 280 /* Make sure that we have a positive length */
 281         cmp     r2, #0x00000000
 282         movle   pc, lr
 283
 284 /* If the destination address is word aligned and the size suitably
 285    aligned, do it fast */
 286
 287         tst     r2, #0x00000007
 288         tsteq   r1, #0x00000003
 289
 290         bne     _C_LABEL(insw)
 291
 292 /* Word aligned insw */
 293
 294         stmfd   sp!, {r4,r5,lr}
 295
 296 .Linsw16loop:
 297         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 298                                          * word accesses */
 299         ldr     lr, [r0]
 300         mov     r3, r3, lsr #16         /* Put the two shorts together */
 301         orr     r3, r3, lr, lsl #16
 302
 303         ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
 304                                          * word accesses */
 305         ldr     lr, [r0]
 306         mov     r4, r4, lsr #16         /* Put the two shorts together */
 307         orr     r4, r4, lr, lsl #16
 308
 309         ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
 310                                          * word accesses */
 311         ldr     lr, [r0]
 312         mov     r5, r5, lsr #16         /* Put the two shorts together */
 313         orr     r5, r5, lr, lsl #16
 314
 315         ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
 316                                          * word accesses */
 317         ldr     lr, [r0]
 318         mov     ip, ip, lsr #16         /* Put the two shorts together */
 319         orr     ip, ip, lr, lsl #16
 320
 321         stmia   r1!, {r3-r5,ip}
 322         subs    r2, r2, #0x00000008     /* Next */
 323         bgt     .Linsw16loop
 324
 325         ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
 326 END(insw16)
 327
 328 /*
 329  * Writes short ints (16 bits) from a block of memory to an I/O address
 330  *
 331  * r0 = address to write to (IO)
 332  * r1 = address to read from (memory)
 333  * r2 = length
 334  */
 335
 336 ENTRY(outsw16)
 337 /* Make sure that we have a positive length */
 338         cmp     r2, #0x00000000
 339         movle   pc, lr
 340
 341 /* If the destination address is word aligned and the size suitably
 342    aligned, do it fast */
 343
 344         tst     r2, #0x00000007
 345         tsteq   r1, #0x00000003
 346
 347         bne     _C_LABEL(outsw)
 348
 349 /* Word aligned outsw */
 350
 351         stmfd   sp!, {r4,r5,lr}
 352
 353 .Loutsw16loop:
 354         ldmia   r1!, {r4,r5,ip,lr}
 355
 356         eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
 357         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 358         eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 359         str     r3, [r0]
 360         str     r4, [r0]
 361
 362 /*      mov     r3, r4, lsl #16
 363  *      orr     r3, r3, r3, lsr #16
 364  *      str     r3, [r0]
 365  *
 366  *      mov     r3, r4, lsr #16
 367  *      orr     r3, r3, r3, lsl #16
 368  *      str     r3, [r0]
 369  */
 370
 371         eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
 372         eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 373         eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 374         str     r3, [r0]
 375         str     r5, [r0]
 376
 377         eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
 378         eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 379         eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 380         str     r3, [r0]
 381         str     ip, [r0]
 382
 383         eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
 384         eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 385         eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 386         str     r3, [r0]
 387         str     lr, [r0]
 388
 389         subs    r2, r2, #0x00000008
 390         bgt     .Loutsw16loop
 391
 392         ldmfd   sp!, {r4,r5,pc}         /* and go home */
 393 END(outsw16)
 394
 395 /*
 396  * reads short ints (16 bits) from an I/O address into a block of memory
 397  * The I/O address is assumed to be mapped multiple times in a block of
 398  * 8 words.
 399  * The destination address should be word aligned.
 400  *
 401  * r0 = address to read from (IO)
 402  * r1 = address to write to (memory)
 403  * r2 = length
 404  */
 405
 406 ENTRY(inswm8)
 407 /* Make sure that we have a positive length */
 408         cmp     r2, #0x00000000
 409         movle   pc, lr
 410
 411 /* If the destination address is word aligned and the size suitably
 412    aligned, do it fast */
 413
 414         tst     r1, #0x00000003
 415
 416         bne     _C_LABEL(insw)
 417
 418 /* Word aligned insw */
 419
 420         stmfd   sp!, {r4-r9,lr}
 421
 422         mov     lr, #0xff000000
 423         orr     lr, lr, #0x00ff0000
 424
 425 .Linswm8_loop8:
 426         cmp     r2, #8
 427         bcc     .Linswm8_l8
 428
 429         ldmia   r0, {r3-r9,ip}
 430
 431         bic     r3, r3, lr
 432         orr     r3, r3, r4, lsl #16
 433         bic     r5, r5, lr
 434         orr     r4, r5, r6, lsl #16
 435         bic     r7, r7, lr
 436         orr     r5, r7, r8, lsl #16
 437         bic     r9, r9, lr
 438         orr     r6, r9, ip, lsl #16
 439
 440         stmia   r1!, {r3-r6}
 441
 442         subs    r2, r2, #0x00000008     /* Next */
 443         bne     .Linswm8_loop8
 444         beq     .Linswm8_l1
 445
 446 .Linswm8_l8:
 447         cmp     r2, #4
 448         bcc     .Linswm8_l4
 449
 450         ldmia   r0, {r3-r6}
 451
 452         bic     r3, r3, lr
 453         orr     r3, r3, r4, lsl #16
 454         bic     r5, r5, lr
 455         orr     r4, r5, r6, lsl #16
 456
 457         stmia   r1!, {r3-r4}
 458
 459         subs    r2, r2, #0x00000004
 460         beq     .Linswm8_l1
 461
 462 .Linswm8_l4:
 463         cmp     r2, #2
 464         bcc     .Linswm8_l2
 465
 466         ldmia   r0, {r3-r4}
 467
 468         bic     r3, r3, lr
 469         orr     r3, r3, r4, lsl #16
 470         str     r3, [r1], #0x0004
 471
 472         subs    r2, r2, #0x00000002
 473         beq     .Linswm8_l1
 474
 475 .Linswm8_l2:
 476         cmp     r2, #1
 477         bcc     .Linswm8_l1
 478
 479         ldr     r3, [r0]
 480         subs    r2, r2, #0x00000001     /* Test in load delay slot */
 481                                         /* XXX, why don't we use result?  */
 482
 483         strb    r3, [r1], #0x0001
 484         mov     r3, r3, lsr #8
 485         strb    r3, [r1], #0x0001
 486
 487
 488 .Linswm8_l1:
 489         ldmfd   sp!, {r4-r9,pc}         /* And go home */
 490 END(inswm8)
 491
 492 /*
 493  * write short ints (16 bits) to an I/O address from a block of memory
 494  * The I/O address is assumed to be mapped multiple times in a block of
 495  * 8 words.
 496  * The source address should be word aligned.
 497  *
 498  * r0 = address to read to (IO)
 499  * r1 = address to write from (memory)
 500  * r2 = length
 501  */
 502
 503 ENTRY(outswm8)
 504 /* Make sure that we have a positive length */
 505         cmp     r2, #0x00000000
 506         movle   pc, lr
 507
 508 /* If the destination address is word aligned and the size suitably
 509    aligned, do it fast */
 510
 511         tst     r1, #0x00000003
 512
 513         bne     _C_LABEL(outsw)
 514
 515 /* Word aligned outsw */
 516
 517         stmfd   sp!, {r4-r8,lr}
 518
 519 .Loutswm8_loop8:
 520         cmp     r2, #8
 521         bcc     .Loutswm8_l8
 522
 523         ldmia   r1!, {r3,r5,r7,ip}
 524
 525         eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
 526         eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
 527         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 528
 529         eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
 530         eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 531         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 532
 533         eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
 534         eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 535         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 536
 537         eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
 538         eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
 539         eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
 540
 541         stmia   r0, {r3-r8,ip,lr}
 542
 543         subs    r2, r2, #0x00000008     /* Next */
 544         bne     .Loutswm8_loop8
 545         beq     .Loutswm8_l1
 546
 547 .Loutswm8_l8:
 548         cmp     r2, #4
 549         bcc     .Loutswm8_l4
 550
 551         ldmia   r1!, {r3-r4}
 552
 553         eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
 554         eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 555         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 556
 557         eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
 558         eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 559         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 560
 561         stmia   r0, {r5-r8}
 562
 563         subs    r2, r2, #0x00000004
 564         beq     .Loutswm8_l1
 565
 566 .Loutswm8_l4:
 567         cmp     r2, #2
 568         bcc     .Loutswm8_l2
 569
 570         ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
 571         subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
 572
 573         eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
 574         eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
 575         eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
 576
 577         stmia   r0, {r4, r5}
 578
 579         beq     .Loutswm8_l1
 580
 581 .Loutswm8_l2:
 582         cmp     r2, #1
 583         bcc     .Loutswm8_l1
 584
 585         ldrb    r3, [r1], #0x0001
 586         ldrb    r4, [r1], #0x0001
 587         subs    r2, r2, #0x00000001     /* Done test in load delay slot */
 588                                         /* XXX This test isn't used?  */
 589         orr     r3, r3, r4, lsl #8
 590         orr     r3, r3, r3, lsl #16
 591         str     r3, [r0]
 592
 593 .Loutswm8_l1:
 594         ldmfd   sp!, {r4-r8,pc}         /* And go home */
 595 END(outswm8)
 596