sys/arm/arm/blockio.S

   1 /*      $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $       */
   2
   3 /*-
   4  * Copyright (c) 2001 Ben Harris.
   5  * Copyright (c) 1994 Mark Brinicombe.
   6  * Copyright (c) 1994 Brini.
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of the company nor the name of the author may be used to
  23  *    endorse or promote products derived from this software without specific
  24  *    prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  27  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  28  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  29  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  30  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  * RiscBSD kernel project
  39  *
  40  * blockio.S
  41  *
  42  * optimised block read/write from/to IO routines.
  43  *
  44  * Created      : 08/10/94
  45  * Modified     : 22/01/99  -- R.Earnshaw
  46  *                             Faster, and small tweaks for StrongARM
  47  */
  48
  49 #include <machine/asm.h>
  50
  51 __FBSDID("$FreeBSD$");
  52
  53 /*
  54  * Read bytes from an I/O address into a block of memory
  55  *
  56  * r0 = address to read from (IO)
  57  * r1 = address to write to (memory)
  58  * r2 = length
  59  */
  60
  61 /* This code will look very familiar if you've read _memcpy(). */
  62 ENTRY(read_multi_1)
  63         mov     ip, sp
  64         stmfd   sp!, {fp, ip, lr, pc}
  65         sub     fp, ip, #4
  66         subs    r2, r2, #4              /* r2 = length - 4 */
  67         blt     .Lrm1_l4                        /* less than 4 bytes */
  68         ands    r12, r1, #3
  69         beq     .Lrm1_main              /* aligned destination */
  70         rsb     r12, r12, #4
  71         cmp     r12, #2
  72         ldrb    r3, [r0]
  73         strb    r3, [r1], #1
  74         ldrgeb  r3, [r0]
  75         strgeb  r3, [r1], #1
  76         ldrgtb  r3, [r0]
  77         strgtb  r3, [r1], #1
  78         subs    r2, r2, r12
  79         blt     .Lrm1_l4
  80 .Lrm1_main:
  81 .Lrm1loop:
  82         ldrb    r3, [r0]
  83         ldrb    r12, [r0]
  84         orr     r3, r3, r12, lsl #8
  85         ldrb    r12, [r0]
  86         orr     r3, r3, r12, lsl #16
  87         ldrb    r12, [r0]
  88         orr     r3, r3, r12, lsl #24
  89         str     r3, [r1], #4
  90         subs    r2, r2, #4
  91         bge     .Lrm1loop
  92 .Lrm1_l4:
  93         adds    r2, r2, #4                      /* r2 = length again */
  94         ldmeqdb fp, {fp, sp, pc}
  95         RETeq
  96         cmp     r2, #2
  97         ldrb    r3, [r0]
  98         strb    r3, [r1], #1
  99         ldrgeb  r3, [r0]
 100         strgeb  r3, [r1], #1
 101         ldrgtb  r3, [r0]
 102         strgtb  r3, [r1], #1
 103         ldmdb   fp, {fp, sp, pc}
 104
 105 /*
 106  * Write bytes to an I/O address from a block of memory
 107  *
 108  * r0 = address to write to (IO)
 109  * r1 = address to read from (memory)
 110  * r2 = length
 111  */
 112
 113 /* This code will look very familiar if you've read _memcpy(). */
 114 ENTRY(write_multi_1)
 115         mov     ip, sp
 116         stmfd   sp!, {fp, ip, lr, pc}
 117         sub     fp, ip, #4
 118         subs    r2, r2, #4              /* r2 = length - 4 */
 119         blt     .Lwm1_l4                /* less than 4 bytes */
 120         ands    r12, r1, #3
 121         beq     .Lwm1_main              /* aligned source */
 122         rsb     r12, r12, #4
 123         cmp     r12, #2
 124         ldrb    r3, [r1], #1
 125         strb    r3, [r0]
 126         ldrgeb  r3, [r1], #1
 127         strgeb  r3, [r0]
 128         ldrgtb  r3, [r1], #1
 129         strgtb  r3, [r0]
 130         subs    r2, r2, r12
 131         blt     .Lwm1_l4
 132 .Lwm1_main:
 133 .Lwm1loop:
 134         ldr     r3, [r1], #4
 135         strb    r3, [r0]
 136         mov     r3, r3, lsr #8
 137         strb    r3, [r0]
 138         mov     r3, r3, lsr #8
 139         strb    r3, [r0]
 140         mov     r3, r3, lsr #8
 141         strb    r3, [r0]
 142         subs    r2, r2, #4
 143         bge     .Lwm1loop
 144 .Lwm1_l4:
 145         adds    r2, r2, #4                      /* r2 = length again */
 146         ldmeqdb fp, {fp, sp, pc}
 147         cmp     r2, #2
 148         ldrb    r3, [r1], #1
 149         strb    r3, [r0]
 150         ldrgeb  r3, [r1], #1
 151         strgeb  r3, [r0]
 152         ldrgtb  r3, [r1], #1
 153         strgtb  r3, [r0]
 154         ldmdb   fp, {fp, sp, pc}
 155
 156 /*
 157  * Reads short ints (16 bits) from an I/O address into a block of memory
 158  *
 159  * r0 = address to read from (IO)
 160  * r1 = address to write to (memory)
 161  * r2 = length
 162  */
 163
 164 ENTRY(insw)
 165 /* Make sure that we have a positive length */
 166         cmp     r2, #0x00000000
 167         movle   pc, lr
 168
 169 /* If the destination address and the size is word aligned, do it fast */
 170
 171         tst     r2, #0x00000001
 172         tsteq   r1, #0x00000003
 173         beq     .Lfastinsw
 174
 175 /* Non aligned insw */
 176
 177 .Linswloop:
 178         ldr     r3, [r0]
 179         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 180         strb    r3, [r1], #0x0001
 181         mov     r3, r3, lsr #8
 182         strb    r3, [r1], #0x0001
 183         bgt     .Linswloop
 184
 185         RET
 186
 187 /* Word aligned insw */
 188
 189 .Lfastinsw:
 190
 191 .Lfastinswloop:
 192         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 193                                          * word accesses */
 194         ldr     ip, [r0]
 195         mov     r3, r3, lsr #16         /* Put the two shorts together */
 196         orr     r3, r3, ip, lsl #16
 197         str     r3, [r1], #0x0004       /* Store */
 198         subs    r2, r2, #0x00000002     /* Next */
 199         bgt     .Lfastinswloop
 200
 201         RET
 202
 203
 204 /*
 205  * Writes short ints (16 bits) from a block of memory to an I/O address
 206  *
 207  * r0 = address to write to (IO)
 208  * r1 = address to read from (memory)
 209  * r2 = length
 210  */
 211
 212 ENTRY(outsw)
 213 /* Make sure that we have a positive length */
 214         cmp     r2, #0x00000000
 215         movle   pc, lr
 216
 217 /* If the destination address and the size is word aligned, do it fast */
 218
 219         tst     r2, #0x00000001
 220         tsteq   r1, #0x00000003
 221         beq     .Lfastoutsw
 222
 223 /* Non aligned outsw */
 224
 225 .Loutswloop:
 226         ldrb    r3, [r1], #0x0001
 227         ldrb    ip, [r1], #0x0001
 228         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 229         orr     r3, r3, ip, lsl #8
 230         orr     r3, r3, r3, lsl #16
 231         str     r3, [r0]
 232         bgt     .Loutswloop
 233
 234         RET
 235
 236 /* Word aligned outsw */
 237
 238 .Lfastoutsw:
 239
 240 .Lfastoutswloop:
 241         ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
 242         subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
 243
 244         eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
 245         eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
 246         eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
 247
 248         str     r3, [r0]
 249         str     ip, [r0]
 250
 251 /*      mov     ip, r3, lsl #16
 252  *      orr     ip, ip, ip, lsr #16
 253  *      str     ip, [r0]
 254  *
 255  *      mov     ip, r3, lsr #16
 256  *      orr     ip, ip, ip, lsl #16
 257  *      str     ip, [r0]
 258  */
 259
 260         bgt     .Lfastoutswloop
 261
 262         RET
 263
 264 /*
 265  * reads short ints (16 bits) from an I/O address into a block of memory
 266  * with a length garenteed to be a multiple of 16 bytes
 267  * with a word aligned destination address
 268  *
 269  * r0 = address to read from (IO)
 270  * r1 = address to write to (memory)
 271  * r2 = length
 272  */
 273
 274 ENTRY(insw16)
 275 /* Make sure that we have a positive length */
 276         cmp     r2, #0x00000000
 277         movle   pc, lr
 278
 279 /* If the destination address is word aligned and the size suitably
 280    aligned, do it fast */
 281
 282         tst     r2, #0x00000007
 283         tsteq   r1, #0x00000003
 284
 285         bne     _C_LABEL(insw)
 286
 287 /* Word aligned insw */
 288
 289         stmfd   sp!, {r4,r5,lr}
 290
 291 .Linsw16loop:
 292         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 293                                          * word accesses */
 294         ldr     lr, [r0]
 295         mov     r3, r3, lsr #16         /* Put the two shorts together */
 296         orr     r3, r3, lr, lsl #16
 297
 298         ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
 299                                          * word accesses */
 300         ldr     lr, [r0]
 301         mov     r4, r4, lsr #16         /* Put the two shorts together */
 302         orr     r4, r4, lr, lsl #16
 303
 304         ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
 305                                          * word accesses */
 306         ldr     lr, [r0]
 307         mov     r5, r5, lsr #16         /* Put the two shorts together */
 308         orr     r5, r5, lr, lsl #16
 309
 310         ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
 311                                          * word accesses */
 312         ldr     lr, [r0]
 313         mov     ip, ip, lsr #16         /* Put the two shorts together */
 314         orr     ip, ip, lr, lsl #16
 315
 316         stmia   r1!, {r3-r5,ip}
 317         subs    r2, r2, #0x00000008     /* Next */
 318         bgt     .Linsw16loop
 319
 320         ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
 321
 322
 323 /*
 324  * Writes short ints (16 bits) from a block of memory to an I/O address
 325  *
 326  * r0 = address to write to (IO)
 327  * r1 = address to read from (memory)
 328  * r2 = length
 329  */
 330
 331 ENTRY(outsw16)
 332 /* Make sure that we have a positive length */
 333         cmp     r2, #0x00000000
 334         movle   pc, lr
 335
 336 /* If the destination address is word aligned and the size suitably
 337    aligned, do it fast */
 338
 339         tst     r2, #0x00000007
 340         tsteq   r1, #0x00000003
 341
 342         bne     _C_LABEL(outsw)
 343
 344 /* Word aligned outsw */
 345
 346         stmfd   sp!, {r4,r5,lr}
 347
 348 .Loutsw16loop:
 349         ldmia   r1!, {r4,r5,ip,lr}
 350
 351         eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
 352         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 353         eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 354         str     r3, [r0]
 355         str     r4, [r0]
 356
 357 /*      mov     r3, r4, lsl #16
 358  *      orr     r3, r3, r3, lsr #16
 359  *      str     r3, [r0]
 360  *
 361  *      mov     r3, r4, lsr #16
 362  *      orr     r3, r3, r3, lsl #16
 363  *      str     r3, [r0]
 364  */
 365
 366         eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
 367         eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 368         eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 369         str     r3, [r0]
 370         str     r5, [r0]
 371
 372         eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
 373         eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 374         eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 375         str     r3, [r0]
 376         str     ip, [r0]
 377
 378         eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
 379         eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 380         eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 381         str     r3, [r0]
 382         str     lr, [r0]
 383
 384         subs    r2, r2, #0x00000008
 385         bgt     .Loutsw16loop
 386
 387         ldmfd   sp!, {r4,r5,pc}         /* and go home */
 388
 389 /*
 390  * reads short ints (16 bits) from an I/O address into a block of memory
 391  * The I/O address is assumed to be mapped multiple times in a block of
 392  * 8 words.
 393  * The destination address should be word aligned.
 394  *
 395  * r0 = address to read from (IO)
 396  * r1 = address to write to (memory)
 397  * r2 = length
 398  */
 399
 400 ENTRY(inswm8)
 401 /* Make sure that we have a positive length */
 402         cmp     r2, #0x00000000
 403         movle   pc, lr
 404
 405 /* If the destination address is word aligned and the size suitably
 406    aligned, do it fast */
 407
 408         tst     r1, #0x00000003
 409
 410         bne     _C_LABEL(insw)
 411
 412 /* Word aligned insw */
 413
 414         stmfd   sp!, {r4-r9,lr}
 415
 416         mov     lr, #0xff000000
 417         orr     lr, lr, #0x00ff0000
 418
 419 .Linswm8_loop8:
 420         cmp     r2, #8
 421         bcc     .Linswm8_l8
 422
 423         ldmia   r0, {r3-r9,ip}
 424
 425         bic     r3, r3, lr
 426         orr     r3, r3, r4, lsl #16
 427         bic     r5, r5, lr
 428         orr     r4, r5, r6, lsl #16
 429         bic     r7, r7, lr
 430         orr     r5, r7, r8, lsl #16
 431         bic     r9, r9, lr
 432         orr     r6, r9, ip, lsl #16
 433
 434         stmia   r1!, {r3-r6}
 435
 436         subs    r2, r2, #0x00000008     /* Next */
 437         bne     .Linswm8_loop8
 438         beq     .Linswm8_l1
 439
 440 .Linswm8_l8:
 441         cmp     r2, #4
 442         bcc     .Linswm8_l4
 443
 444         ldmia   r0, {r3-r6}
 445
 446         bic     r3, r3, lr
 447         orr     r3, r3, r4, lsl #16
 448         bic     r5, r5, lr
 449         orr     r4, r5, r6, lsl #16
 450
 451         stmia   r1!, {r3-r4}
 452
 453         subs    r2, r2, #0x00000004
 454         beq     .Linswm8_l1
 455
 456 .Linswm8_l4:
 457         cmp     r2, #2
 458         bcc     .Linswm8_l2
 459
 460         ldmia   r0, {r3-r4}
 461
 462         bic     r3, r3, lr
 463         orr     r3, r3, r4, lsl #16
 464         str     r3, [r1], #0x0004
 465
 466         subs    r2, r2, #0x00000002
 467         beq     .Linswm8_l1
 468
 469 .Linswm8_l2:
 470         cmp     r2, #1
 471         bcc     .Linswm8_l1
 472
 473         ldr     r3, [r0]
 474         subs    r2, r2, #0x00000001     /* Test in load delay slot */
 475                                         /* XXX, why don't we use result?  */
 476
 477         strb    r3, [r1], #0x0001
 478         mov     r3, r3, lsr #8
 479         strb    r3, [r1], #0x0001
 480
 481
 482 .Linswm8_l1:
 483         ldmfd   sp!, {r4-r9,pc}         /* And go home */
 484
 485 /*
 486  * write short ints (16 bits) to an I/O address from a block of memory
 487  * The I/O address is assumed to be mapped multiple times in a block of
 488  * 8 words.
 489  * The source address should be word aligned.
 490  *
 491  * r0 = address to read to (IO)
 492  * r1 = address to write from (memory)
 493  * r2 = length
 494  */
 495
 496 ENTRY(outswm8)
 497 /* Make sure that we have a positive length */
 498         cmp     r2, #0x00000000
 499         movle   pc, lr
 500
 501 /* If the destination address is word aligned and the size suitably
 502    aligned, do it fast */
 503
 504         tst     r1, #0x00000003
 505
 506         bne     _C_LABEL(outsw)
 507
 508 /* Word aligned outsw */
 509
 510         stmfd   sp!, {r4-r8,lr}
 511
 512 .Loutswm8_loop8:
 513         cmp     r2, #8
 514         bcc     .Loutswm8_l8
 515
 516         ldmia   r1!, {r3,r5,r7,ip}
 517
 518         eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
 519         eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
 520         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 521
 522         eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
 523         eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 524         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 525
 526         eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
 527         eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 528         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 529
 530         eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
 531         eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
 532         eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
 533
 534         stmia   r0, {r3-r8,ip,lr}
 535
 536         subs    r2, r2, #0x00000008     /* Next */
 537         bne     .Loutswm8_loop8
 538         beq     .Loutswm8_l1
 539
 540 .Loutswm8_l8:
 541         cmp     r2, #4
 542         bcc     .Loutswm8_l4
 543
 544         ldmia   r1!, {r3-r4}
 545
 546         eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
 547         eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 548         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 549
 550         eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
 551         eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 552         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 553
 554         stmia   r0, {r5-r8}
 555
 556         subs    r2, r2, #0x00000004
 557         beq     .Loutswm8_l1
 558
 559 .Loutswm8_l4:
 560         cmp     r2, #2
 561         bcc     .Loutswm8_l2
 562
 563         ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
 564         subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
 565
 566         eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
 567         eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
 568         eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
 569
 570         stmia   r0, {r4, r5}
 571
 572         beq     .Loutswm8_l1
 573
 574 .Loutswm8_l2:
 575         cmp     r2, #1
 576         bcc     .Loutswm8_l1
 577
 578         ldrb    r3, [r1], #0x0001
 579         ldrb    r4, [r1], #0x0001
 580         subs    r2, r2, #0x00000001     /* Done test in load delay slot */
 581                                         /* XXX This test isn't used?  */
 582         orr     r3, r3, r4, lsl #8
 583         orr     r3, r3, r3, lsl #16
 584         str     r3, [r0]
 585
 586 .Loutswm8_l1:
 587         ldmfd   sp!, {r4-r8,pc}         /* And go home */