sys/arm/arm/blockio.S

   1 /*      $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $       */
   2
   3 /*-
   4  * Copyright (c) 2001 Ben Harris.
   5  * Copyright (c) 1994 Mark Brinicombe.
   6  * Copyright (c) 1994 Brini.
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of the company nor the name of the author may be used to
  23  *    endorse or promote products derived from this software without specific
  24  *    prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  27  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  28  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  29  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  30  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  * RiscBSD kernel project
  39  *
  40  * blockio.S
  41  *
  42  * optimised block read/write from/to IO routines.
  43  *
  44  * Created      : 08/10/94
  45  * Modified     : 22/01/99  -- R.Earnshaw
  46  *                             Faster, and small tweaks for StrongARM
  47  */
  48
  49 #include <machine/asm.h>
  50         .syntax unified
  51
  52 /*
  53  * Read bytes from an I/O address into a block of memory
  54  *
  55  * r0 = address to read from (IO)
  56  * r1 = address to write to (memory)
  57  * r2 = length
  58  */
  59
  60 /* This code will look very familiar if you've read _memcpy(). */
  61 ENTRY(read_multi_1)
  62         mov     ip, sp
  63         stmfd   sp!, {fp, ip, lr, pc}
  64         sub     fp, ip, #4
  65         subs    r2, r2, #4              /* r2 = length - 4 */
  66         blt     .Lrm1_l4                        /* less than 4 bytes */
  67         ands    r12, r1, #3
  68         beq     .Lrm1_main              /* aligned destination */
  69         rsb     r12, r12, #4
  70         cmp     r12, #2
  71         ldrb    r3, [r0]
  72         strb    r3, [r1], #1
  73         ldrbge  r3, [r0]
  74         strbge  r3, [r1], #1
  75         ldrbgt  r3, [r0]
  76         strbgt  r3, [r1], #1
  77         subs    r2, r2, r12
  78         blt     .Lrm1_l4
  79 .Lrm1_main:
  80 .Lrm1loop:
  81         ldrb    r3, [r0]
  82         ldrb    r12, [r0]
  83         orr     r3, r3, r12, lsl #8
  84         ldrb    r12, [r0]
  85         orr     r3, r3, r12, lsl #16
  86         ldrb    r12, [r0]
  87         orr     r3, r3, r12, lsl #24
  88         str     r3, [r1], #4
  89         subs    r2, r2, #4
  90         bge     .Lrm1loop
  91 .Lrm1_l4:
  92         adds    r2, r2, #4                      /* r2 = length again */
  93         ldmdbeq fp, {fp, sp, pc}
  94         RETeq
  95         cmp     r2, #2
  96         ldrb    r3, [r0]
  97         strb    r3, [r1], #1
  98         ldrbge  r3, [r0]
  99         strbge  r3, [r1], #1
 100         ldrbgt  r3, [r0]
 101         strbgt  r3, [r1], #1
 102         ldmdb   fp, {fp, sp, pc}
 103 END(read_multi_1)
 104
 105 /*
 106  * Write bytes to an I/O address from a block of memory
 107  *
 108  * r0 = address to write to (IO)
 109  * r1 = address to read from (memory)
 110  * r2 = length
 111  */
 112
 113 /* This code will look very familiar if you've read _memcpy(). */
 114 ENTRY(write_multi_1)
 115         mov     ip, sp
 116         stmfd   sp!, {fp, ip, lr, pc}
 117         sub     fp, ip, #4
 118         subs    r2, r2, #4              /* r2 = length - 4 */
 119         blt     .Lwm1_l4                /* less than 4 bytes */
 120         ands    r12, r1, #3
 121         beq     .Lwm1_main              /* aligned source */
 122         rsb     r12, r12, #4
 123         cmp     r12, #2
 124         ldrb    r3, [r1], #1
 125         strb    r3, [r0]
 126         ldrbge  r3, [r1], #1
 127         strbge  r3, [r0]
 128         ldrbgt  r3, [r1], #1
 129         strbgt  r3, [r0]
 130         subs    r2, r2, r12
 131         blt     .Lwm1_l4
 132 .Lwm1_main:
 133 .Lwm1loop:
 134         ldr     r3, [r1], #4
 135         strb    r3, [r0]
 136         mov     r3, r3, lsr #8
 137         strb    r3, [r0]
 138         mov     r3, r3, lsr #8
 139         strb    r3, [r0]
 140         mov     r3, r3, lsr #8
 141         strb    r3, [r0]
 142         subs    r2, r2, #4
 143         bge     .Lwm1loop
 144 .Lwm1_l4:
 145         adds    r2, r2, #4                      /* r2 = length again */
 146         ldmdbeq fp, {fp, sp, pc}
 147         cmp     r2, #2
 148         ldrb    r3, [r1], #1
 149         strb    r3, [r0]
 150         ldrbge  r3, [r1], #1
 151         strbge  r3, [r0]
 152         ldrbgt  r3, [r1], #1
 153         strbgt  r3, [r0]
 154         ldmdb   fp, {fp, sp, pc}
 155 END(write_multi_1)
 156
 157 /*
 158  * Reads short ints (16 bits) from an I/O address into a block of memory
 159  *
 160  * r0 = address to read from (IO)
 161  * r1 = address to write to (memory)
 162  * r2 = length
 163  */
 164
 165 ENTRY(insw)
 166 /* Make sure that we have a positive length */
 167         cmp     r2, #0x00000000
 168         movle   pc, lr
 169
 170 /* If the destination address and the size is word aligned, do it fast */
 171
 172         tst     r2, #0x00000001
 173         tsteq   r1, #0x00000003
 174         beq     .Lfastinsw
 175
 176 /* Non aligned insw */
 177
 178 .Linswloop:
 179         ldr     r3, [r0]
 180         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 181         strb    r3, [r1], #0x0001
 182         mov     r3, r3, lsr #8
 183         strb    r3, [r1], #0x0001
 184         bgt     .Linswloop
 185
 186         RET
 187
 188 /* Word aligned insw */
 189
 190 .Lfastinsw:
 191
 192 .Lfastinswloop:
 193         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 194                                          * word accesses */
 195         ldr     ip, [r0]
 196         mov     r3, r3, lsr #16         /* Put the two shorts together */
 197         orr     r3, r3, ip, lsl #16
 198         str     r3, [r1], #0x0004       /* Store */
 199         subs    r2, r2, #0x00000002     /* Next */
 200         bgt     .Lfastinswloop
 201
 202         RET
 203 END(insw)
 204
 205 /*
 206  * Writes short ints (16 bits) from a block of memory to an I/O address
 207  *
 208  * r0 = address to write to (IO)
 209  * r1 = address to read from (memory)
 210  * r2 = length
 211  */
 212
 213 ENTRY(outsw)
 214 /* Make sure that we have a positive length */
 215         cmp     r2, #0x00000000
 216         movle   pc, lr
 217
 218 /* If the destination address and the size is word aligned, do it fast */
 219
 220         tst     r2, #0x00000001
 221         tsteq   r1, #0x00000003
 222         beq     .Lfastoutsw
 223
 224 /* Non aligned outsw */
 225
 226 .Loutswloop:
 227         ldrb    r3, [r1], #0x0001
 228         ldrb    ip, [r1], #0x0001
 229         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 230         orr     r3, r3, ip, lsl #8
 231         orr     r3, r3, r3, lsl #16
 232         str     r3, [r0]
 233         bgt     .Loutswloop
 234
 235         RET
 236
 237 /* Word aligned outsw */
 238
 239 .Lfastoutsw:
 240
 241 .Lfastoutswloop:
 242         ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
 243         subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
 244
 245         eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
 246         eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
 247         eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
 248
 249         str     r3, [r0]
 250         str     ip, [r0]
 251
 252 /*      mov     ip, r3, lsl #16
 253  *      orr     ip, ip, ip, lsr #16
 254  *      str     ip, [r0]
 255  *
 256  *      mov     ip, r3, lsr #16
 257  *      orr     ip, ip, ip, lsl #16
 258  *      str     ip, [r0]
 259  */
 260
 261         bgt     .Lfastoutswloop
 262
 263         RET
 264 END(outsw)
 265
 266 /*
 267  * reads short ints (16 bits) from an I/O address into a block of memory
 268  * with a length garenteed to be a multiple of 16 bytes
 269  * with a word aligned destination address
 270  *
 271  * r0 = address to read from (IO)
 272  * r1 = address to write to (memory)
 273  * r2 = length
 274  */
 275
 276 ENTRY(insw16)
 277 /* Make sure that we have a positive length */
 278         cmp     r2, #0x00000000
 279         movle   pc, lr
 280
 281 /* If the destination address is word aligned and the size suitably
 282    aligned, do it fast */
 283
 284         tst     r2, #0x00000007
 285         tsteq   r1, #0x00000003
 286
 287         bne     _C_LABEL(insw)
 288
 289 /* Word aligned insw */
 290
 291         stmfd   sp!, {r4,r5,lr}
 292
 293 .Linsw16loop:
 294         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 295                                          * word accesses */
 296         ldr     lr, [r0]
 297         mov     r3, r3, lsr #16         /* Put the two shorts together */
 298         orr     r3, r3, lr, lsl #16
 299
 300         ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
 301                                          * word accesses */
 302         ldr     lr, [r0]
 303         mov     r4, r4, lsr #16         /* Put the two shorts together */
 304         orr     r4, r4, lr, lsl #16
 305
 306         ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
 307                                          * word accesses */
 308         ldr     lr, [r0]
 309         mov     r5, r5, lsr #16         /* Put the two shorts together */
 310         orr     r5, r5, lr, lsl #16
 311
 312         ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
 313                                          * word accesses */
 314         ldr     lr, [r0]
 315         mov     ip, ip, lsr #16         /* Put the two shorts together */
 316         orr     ip, ip, lr, lsl #16
 317
 318         stmia   r1!, {r3-r5,ip}
 319         subs    r2, r2, #0x00000008     /* Next */
 320         bgt     .Linsw16loop
 321
 322         ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
 323 END(insw16)
 324
 325 /*
 326  * Writes short ints (16 bits) from a block of memory to an I/O address
 327  *
 328  * r0 = address to write to (IO)
 329  * r1 = address to read from (memory)
 330  * r2 = length
 331  */
 332
 333 ENTRY(outsw16)
 334 /* Make sure that we have a positive length */
 335         cmp     r2, #0x00000000
 336         movle   pc, lr
 337
 338 /* If the destination address is word aligned and the size suitably
 339    aligned, do it fast */
 340
 341         tst     r2, #0x00000007
 342         tsteq   r1, #0x00000003
 343
 344         bne     _C_LABEL(outsw)
 345
 346 /* Word aligned outsw */
 347
 348         stmfd   sp!, {r4,r5,lr}
 349
 350 .Loutsw16loop:
 351         ldmia   r1!, {r4,r5,ip,lr}
 352
 353         eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
 354         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 355         eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 356         str     r3, [r0]
 357         str     r4, [r0]
 358
 359 /*      mov     r3, r4, lsl #16
 360  *      orr     r3, r3, r3, lsr #16
 361  *      str     r3, [r0]
 362  *
 363  *      mov     r3, r4, lsr #16
 364  *      orr     r3, r3, r3, lsl #16
 365  *      str     r3, [r0]
 366  */
 367
 368         eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
 369         eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 370         eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 371         str     r3, [r0]
 372         str     r5, [r0]
 373
 374         eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
 375         eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 376         eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 377         str     r3, [r0]
 378         str     ip, [r0]
 379
 380         eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
 381         eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 382         eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 383         str     r3, [r0]
 384         str     lr, [r0]
 385
 386         subs    r2, r2, #0x00000008
 387         bgt     .Loutsw16loop
 388
 389         ldmfd   sp!, {r4,r5,pc}         /* and go home */
 390 END(outsw16)
 391
 392 /*
 393  * reads short ints (16 bits) from an I/O address into a block of memory
 394  * The I/O address is assumed to be mapped multiple times in a block of
 395  * 8 words.
 396  * The destination address should be word aligned.
 397  *
 398  * r0 = address to read from (IO)
 399  * r1 = address to write to (memory)
 400  * r2 = length
 401  */
 402
 403 ENTRY(inswm8)
 404 /* Make sure that we have a positive length */
 405         cmp     r2, #0x00000000
 406         movle   pc, lr
 407
 408 /* If the destination address is word aligned and the size suitably
 409    aligned, do it fast */
 410
 411         tst     r1, #0x00000003
 412
 413         bne     _C_LABEL(insw)
 414
 415 /* Word aligned insw */
 416
 417         stmfd   sp!, {r4-r9,lr}
 418
 419         mov     lr, #0xff000000
 420         orr     lr, lr, #0x00ff0000
 421
 422 .Linswm8_loop8:
 423         cmp     r2, #8
 424         bcc     .Linswm8_l8
 425
 426         ldmia   r0, {r3-r9,ip}
 427
 428         bic     r3, r3, lr
 429         orr     r3, r3, r4, lsl #16
 430         bic     r5, r5, lr
 431         orr     r4, r5, r6, lsl #16
 432         bic     r7, r7, lr
 433         orr     r5, r7, r8, lsl #16
 434         bic     r9, r9, lr
 435         orr     r6, r9, ip, lsl #16
 436
 437         stmia   r1!, {r3-r6}
 438
 439         subs    r2, r2, #0x00000008     /* Next */
 440         bne     .Linswm8_loop8
 441         beq     .Linswm8_l1
 442
 443 .Linswm8_l8:
 444         cmp     r2, #4
 445         bcc     .Linswm8_l4
 446
 447         ldmia   r0, {r3-r6}
 448
 449         bic     r3, r3, lr
 450         orr     r3, r3, r4, lsl #16
 451         bic     r5, r5, lr
 452         orr     r4, r5, r6, lsl #16
 453
 454         stmia   r1!, {r3-r4}
 455
 456         subs    r2, r2, #0x00000004
 457         beq     .Linswm8_l1
 458
 459 .Linswm8_l4:
 460         cmp     r2, #2
 461         bcc     .Linswm8_l2
 462
 463         ldmia   r0, {r3-r4}
 464
 465         bic     r3, r3, lr
 466         orr     r3, r3, r4, lsl #16
 467         str     r3, [r1], #0x0004
 468
 469         subs    r2, r2, #0x00000002
 470         beq     .Linswm8_l1
 471
 472 .Linswm8_l2:
 473         cmp     r2, #1
 474         bcc     .Linswm8_l1
 475
 476         ldr     r3, [r0]
 477         subs    r2, r2, #0x00000001     /* Test in load delay slot */
 478                                         /* XXX, why don't we use result?  */
 479
 480         strb    r3, [r1], #0x0001
 481         mov     r3, r3, lsr #8
 482         strb    r3, [r1], #0x0001
 483
 484
 485 .Linswm8_l1:
 486         ldmfd   sp!, {r4-r9,pc}         /* And go home */
 487 END(inswm8)
 488
 489 /*
 490  * write short ints (16 bits) to an I/O address from a block of memory
 491  * The I/O address is assumed to be mapped multiple times in a block of
 492  * 8 words.
 493  * The source address should be word aligned.
 494  *
 495  * r0 = address to read to (IO)
 496  * r1 = address to write from (memory)
 497  * r2 = length
 498  */
 499
 500 ENTRY(outswm8)
 501 /* Make sure that we have a positive length */
 502         cmp     r2, #0x00000000
 503         movle   pc, lr
 504
 505 /* If the destination address is word aligned and the size suitably
 506    aligned, do it fast */
 507
 508         tst     r1, #0x00000003
 509
 510         bne     _C_LABEL(outsw)
 511
 512 /* Word aligned outsw */
 513
 514         stmfd   sp!, {r4-r8,lr}
 515
 516 .Loutswm8_loop8:
 517         cmp     r2, #8
 518         bcc     .Loutswm8_l8
 519
 520         ldmia   r1!, {r3,r5,r7,ip}
 521
 522         eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
 523         eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
 524         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 525
 526         eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
 527         eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 528         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 529
 530         eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
 531         eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 532         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 533
 534         eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
 535         eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
 536         eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
 537
 538         stmia   r0, {r3-r8,ip,lr}
 539
 540         subs    r2, r2, #0x00000008     /* Next */
 541         bne     .Loutswm8_loop8
 542         beq     .Loutswm8_l1
 543
 544 .Loutswm8_l8:
 545         cmp     r2, #4
 546         bcc     .Loutswm8_l4
 547
 548         ldmia   r1!, {r3-r4}
 549
 550         eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
 551         eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 552         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 553
 554         eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
 555         eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 556         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 557
 558         stmia   r0, {r5-r8}
 559
 560         subs    r2, r2, #0x00000004
 561         beq     .Loutswm8_l1
 562
 563 .Loutswm8_l4:
 564         cmp     r2, #2
 565         bcc     .Loutswm8_l2
 566
 567         ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
 568         subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
 569
 570         eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
 571         eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
 572         eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
 573
 574         stmia   r0, {r4, r5}
 575
 576         beq     .Loutswm8_l1
 577
 578 .Loutswm8_l2:
 579         cmp     r2, #1
 580         bcc     .Loutswm8_l1
 581
 582         ldrb    r3, [r1], #0x0001
 583         ldrb    r4, [r1], #0x0001
 584         subs    r2, r2, #0x00000001     /* Done test in load delay slot */
 585                                         /* XXX This test isn't used?  */
 586         orr     r3, r3, r4, lsl #8
 587         orr     r3, r3, r3, lsl #16
 588         str     r3, [r0]
 589
 590 .Loutswm8_l1:
 591         ldmfd   sp!, {r4-r8,pc}         /* And go home */
 592 END(outswm8)
 593