sys/arm/arm/blockio.S

   1 /*      $NetBSD: blockio.S,v 1.5 2002/08/15 01:38:16 briggs Exp $       */
   2
   3 /*-
   4  * Copyright (c) 2001 Ben Harris.
   5  * Copyright (c) 1994 Mark Brinicombe.
   6  * Copyright (c) 1994 Brini.
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of the company nor the name of the author may be used to
  23  *    endorse or promote products derived from this software without specific
  24  *    prior written permission.
  25  *
  26  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR IMPLIED
  27  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  28  * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  29  * IN NO EVENT SHALL BRINI OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
  30  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
  31  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  32  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  36  * SUCH DAMAGE.
  37  *
  38  * RiscBSD kernel project
  39  *
  40  * blockio.S
  41  *
  42  * optimised block read/write from/to IO routines.
  43  *
  44  * Created      : 08/10/94
  45  * Modified     : 22/01/99  -- R.Earnshaw
  46  *                             Faster, and small tweaks for StrongARM
  47  */
  48
  49 #include <machine/asm.h>
  50
  51 __FBSDID("$FreeBSD$");
  52
  53 /*
  54  * Read bytes from an I/O address into a block of memory
  55  *
  56  * r0 = address to read from (IO)
  57  * r1 = address to write to (memory)
  58  * r2 = length
  59  */
  60
  61 /* This code will look very familiar if you've read _memcpy(). */
  62 ENTRY(read_multi_1)
  63         mov     ip, sp
  64         stmfd   sp!, {fp, ip, lr, pc}
  65         sub     fp, ip, #4
  66         subs    r2, r2, #4              /* r2 = length - 4 */
  67         blt     .Lrm1_l4                        /* less than 4 bytes */
  68         ands    r12, r1, #3
  69         beq     .Lrm1_main              /* aligned destination */
  70         rsb     r12, r12, #4
  71         cmp     r12, #2
  72         ldrb    r3, [r0]
  73         strb    r3, [r1], #1
  74         ldrgeb  r3, [r0]
  75         strgeb  r3, [r1], #1
  76         ldrgtb  r3, [r0]
  77         strgtb  r3, [r1], #1
  78         subs    r2, r2, r12
  79         blt     .Lrm1_l4
  80 .Lrm1_main:
  81 .Lrm1loop:
  82         ldrb    r3, [r0]
  83         ldrb    r12, [r0]
  84         orr     r3, r3, r12, lsl #8
  85         ldrb    r12, [r0]
  86         orr     r3, r3, r12, lsl #16
  87         ldrb    r12, [r0]
  88         orr     r3, r3, r12, lsl #24
  89         str     r3, [r1], #4
  90         subs    r2, r2, #4
  91         bge     .Lrm1loop
  92 .Lrm1_l4:
  93         adds    r2, r2, #4                      /* r2 = length again */
  94         ldmeqdb fp, {fp, sp, pc}
  95         RETeq
  96         cmp     r2, #2
  97         ldrb    r3, [r0]
  98         strb    r3, [r1], #1
  99         ldrgeb  r3, [r0]
 100         strgeb  r3, [r1], #1
 101         ldrgtb  r3, [r0]
 102         strgtb  r3, [r1], #1
 103         ldmdb   fp, {fp, sp, pc}
 104 END(read_multi_1)
 105
 106 /*
 107  * Write bytes to an I/O address from a block of memory
 108  *
 109  * r0 = address to write to (IO)
 110  * r1 = address to read from (memory)
 111  * r2 = length
 112  */
 113
 114 /* This code will look very familiar if you've read _memcpy(). */
 115 ENTRY(write_multi_1)
 116         mov     ip, sp
 117         stmfd   sp!, {fp, ip, lr, pc}
 118         sub     fp, ip, #4
 119         subs    r2, r2, #4              /* r2 = length - 4 */
 120         blt     .Lwm1_l4                /* less than 4 bytes */
 121         ands    r12, r1, #3
 122         beq     .Lwm1_main              /* aligned source */
 123         rsb     r12, r12, #4
 124         cmp     r12, #2
 125         ldrb    r3, [r1], #1
 126         strb    r3, [r0]
 127         ldrgeb  r3, [r1], #1
 128         strgeb  r3, [r0]
 129         ldrgtb  r3, [r1], #1
 130         strgtb  r3, [r0]
 131         subs    r2, r2, r12
 132         blt     .Lwm1_l4
 133 .Lwm1_main:
 134 .Lwm1loop:
 135         ldr     r3, [r1], #4
 136         strb    r3, [r0]
 137         mov     r3, r3, lsr #8
 138         strb    r3, [r0]
 139         mov     r3, r3, lsr #8
 140         strb    r3, [r0]
 141         mov     r3, r3, lsr #8
 142         strb    r3, [r0]
 143         subs    r2, r2, #4
 144         bge     .Lwm1loop
 145 .Lwm1_l4:
 146         adds    r2, r2, #4                      /* r2 = length again */
 147         ldmeqdb fp, {fp, sp, pc}
 148         cmp     r2, #2
 149         ldrb    r3, [r1], #1
 150         strb    r3, [r0]
 151         ldrgeb  r3, [r1], #1
 152         strgeb  r3, [r0]
 153         ldrgtb  r3, [r1], #1
 154         strgtb  r3, [r0]
 155         ldmdb   fp, {fp, sp, pc}
 156 END(write_multi_1)
 157
 158 /*
 159  * Reads short ints (16 bits) from an I/O address into a block of memory
 160  *
 161  * r0 = address to read from (IO)
 162  * r1 = address to write to (memory)
 163  * r2 = length
 164  */
 165
 166 ENTRY(insw)
 167 /* Make sure that we have a positive length */
 168         cmp     r2, #0x00000000
 169         movle   pc, lr
 170
 171 /* If the destination address and the size is word aligned, do it fast */
 172
 173         tst     r2, #0x00000001
 174         tsteq   r1, #0x00000003
 175         beq     .Lfastinsw
 176
 177 /* Non aligned insw */
 178
 179 .Linswloop:
 180         ldr     r3, [r0]
 181         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 182         strb    r3, [r1], #0x0001
 183         mov     r3, r3, lsr #8
 184         strb    r3, [r1], #0x0001
 185         bgt     .Linswloop
 186
 187         RET
 188
 189 /* Word aligned insw */
 190
 191 .Lfastinsw:
 192
 193 .Lfastinswloop:
 194         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 195                                          * word accesses */
 196         ldr     ip, [r0]
 197         mov     r3, r3, lsr #16         /* Put the two shorts together */
 198         orr     r3, r3, ip, lsl #16
 199         str     r3, [r1], #0x0004       /* Store */
 200         subs    r2, r2, #0x00000002     /* Next */
 201         bgt     .Lfastinswloop
 202
 203         RET
 204 END(insw)
 205
 206 /*
 207  * Writes short ints (16 bits) from a block of memory to an I/O address
 208  *
 209  * r0 = address to write to (IO)
 210  * r1 = address to read from (memory)
 211  * r2 = length
 212  */
 213
 214 ENTRY(outsw)
 215 /* Make sure that we have a positive length */
 216         cmp     r2, #0x00000000
 217         movle   pc, lr
 218
 219 /* If the destination address and the size is word aligned, do it fast */
 220
 221         tst     r2, #0x00000001
 222         tsteq   r1, #0x00000003
 223         beq     .Lfastoutsw
 224
 225 /* Non aligned outsw */
 226
 227 .Loutswloop:
 228         ldrb    r3, [r1], #0x0001
 229         ldrb    ip, [r1], #0x0001
 230         subs    r2, r2, #0x00000001     /* Loop test in load delay slot */
 231         orr     r3, r3, ip, lsl #8
 232         orr     r3, r3, r3, lsl #16
 233         str     r3, [r0]
 234         bgt     .Loutswloop
 235
 236         RET
 237
 238 /* Word aligned outsw */
 239
 240 .Lfastoutsw:
 241
 242 .Lfastoutswloop:
 243         ldr     r3, [r1], #0x0004       /* r3 = (H)(L) */
 244         subs    r2, r2, #0x00000002     /* Loop test in load delay slot */
 245
 246         eor     ip, r3, r3, lsr #16     /* ip = (H)(H^L) */
 247         eor     r3, r3, ip, lsl #16     /* r3 = (H^H^L)(L) = (L)(L) */
 248         eor     ip, ip, r3, lsr #16     /* ip = (H)(H^L^L) = (H)(H) */
 249
 250         str     r3, [r0]
 251         str     ip, [r0]
 252
 253 /*      mov     ip, r3, lsl #16
 254  *      orr     ip, ip, ip, lsr #16
 255  *      str     ip, [r0]
 256  *
 257  *      mov     ip, r3, lsr #16
 258  *      orr     ip, ip, ip, lsl #16
 259  *      str     ip, [r0]
 260  */
 261
 262         bgt     .Lfastoutswloop
 263
 264         RET
 265 END(outsw)
 266
 267 /*
 268  * reads short ints (16 bits) from an I/O address into a block of memory
 269  * with a length garenteed to be a multiple of 16 bytes
 270  * with a word aligned destination address
 271  *
 272  * r0 = address to read from (IO)
 273  * r1 = address to write to (memory)
 274  * r2 = length
 275  */
 276
 277 ENTRY(insw16)
 278 /* Make sure that we have a positive length */
 279         cmp     r2, #0x00000000
 280         movle   pc, lr
 281
 282 /* If the destination address is word aligned and the size suitably
 283    aligned, do it fast */
 284
 285         tst     r2, #0x00000007
 286         tsteq   r1, #0x00000003
 287
 288         bne     _C_LABEL(insw)
 289
 290 /* Word aligned insw */
 291
 292         stmfd   sp!, {r4,r5,lr}
 293
 294 .Linsw16loop:
 295         ldr     r3, [r0, #0x0002]       /* take advantage of nonaligned
 296                                          * word accesses */
 297         ldr     lr, [r0]
 298         mov     r3, r3, lsr #16         /* Put the two shorts together */
 299         orr     r3, r3, lr, lsl #16
 300
 301         ldr     r4, [r0, #0x0002]       /* take advantage of nonaligned
 302                                          * word accesses */
 303         ldr     lr, [r0]
 304         mov     r4, r4, lsr #16         /* Put the two shorts together */
 305         orr     r4, r4, lr, lsl #16
 306
 307         ldr     r5, [r0, #0x0002]       /* take advantage of nonaligned
 308                                          * word accesses */
 309         ldr     lr, [r0]
 310         mov     r5, r5, lsr #16         /* Put the two shorts together */
 311         orr     r5, r5, lr, lsl #16
 312
 313         ldr     ip, [r0, #0x0002]       /* take advantage of nonaligned
 314                                          * word accesses */
 315         ldr     lr, [r0]
 316         mov     ip, ip, lsr #16         /* Put the two shorts together */
 317         orr     ip, ip, lr, lsl #16
 318
 319         stmia   r1!, {r3-r5,ip}
 320         subs    r2, r2, #0x00000008     /* Next */
 321         bgt     .Linsw16loop
 322
 323         ldmfd   sp!, {r4,r5,pc}         /* Restore regs and go home */
 324 END(insw16)
 325
 326 /*
 327  * Writes short ints (16 bits) from a block of memory to an I/O address
 328  *
 329  * r0 = address to write to (IO)
 330  * r1 = address to read from (memory)
 331  * r2 = length
 332  */
 333
 334 ENTRY(outsw16)
 335 /* Make sure that we have a positive length */
 336         cmp     r2, #0x00000000
 337         movle   pc, lr
 338
 339 /* If the destination address is word aligned and the size suitably
 340    aligned, do it fast */
 341
 342         tst     r2, #0x00000007
 343         tsteq   r1, #0x00000003
 344
 345         bne     _C_LABEL(outsw)
 346
 347 /* Word aligned outsw */
 348
 349         stmfd   sp!, {r4,r5,lr}
 350
 351 .Loutsw16loop:
 352         ldmia   r1!, {r4,r5,ip,lr}
 353
 354         eor     r3, r4, r4, lsl #16     /* r3 = (A^B)(B) */
 355         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 356         eor     r3, r3, r4, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 357         str     r3, [r0]
 358         str     r4, [r0]
 359
 360 /*      mov     r3, r4, lsl #16
 361  *      orr     r3, r3, r3, lsr #16
 362  *      str     r3, [r0]
 363  *
 364  *      mov     r3, r4, lsr #16
 365  *      orr     r3, r3, r3, lsl #16
 366  *      str     r3, [r0]
 367  */
 368
 369         eor     r3, r5, r5, lsl #16     /* r3 = (A^B)(B) */
 370         eor     r5, r5, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 371         eor     r3, r3, r5, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 372         str     r3, [r0]
 373         str     r5, [r0]
 374
 375         eor     r3, ip, ip, lsl #16     /* r3 = (A^B)(B) */
 376         eor     ip, ip, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 377         eor     r3, r3, ip, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 378         str     r3, [r0]
 379         str     ip, [r0]
 380
 381         eor     r3, lr, lr, lsl #16     /* r3 = (A^B)(B) */
 382         eor     lr, lr, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 383         eor     r3, r3, lr, lsl #16     /* r3 = (A^B^A)(B) = (B)(B) */
 384         str     r3, [r0]
 385         str     lr, [r0]
 386
 387         subs    r2, r2, #0x00000008
 388         bgt     .Loutsw16loop
 389
 390         ldmfd   sp!, {r4,r5,pc}         /* and go home */
 391 END(outsw16)
 392
 393 /*
 394  * reads short ints (16 bits) from an I/O address into a block of memory
 395  * The I/O address is assumed to be mapped multiple times in a block of
 396  * 8 words.
 397  * The destination address should be word aligned.
 398  *
 399  * r0 = address to read from (IO)
 400  * r1 = address to write to (memory)
 401  * r2 = length
 402  */
 403
 404 ENTRY(inswm8)
 405 /* Make sure that we have a positive length */
 406         cmp     r2, #0x00000000
 407         movle   pc, lr
 408
 409 /* If the destination address is word aligned and the size suitably
 410    aligned, do it fast */
 411
 412         tst     r1, #0x00000003
 413
 414         bne     _C_LABEL(insw)
 415
 416 /* Word aligned insw */
 417
 418         stmfd   sp!, {r4-r9,lr}
 419
 420         mov     lr, #0xff000000
 421         orr     lr, lr, #0x00ff0000
 422
 423 .Linswm8_loop8:
 424         cmp     r2, #8
 425         bcc     .Linswm8_l8
 426
 427         ldmia   r0, {r3-r9,ip}
 428
 429         bic     r3, r3, lr
 430         orr     r3, r3, r4, lsl #16
 431         bic     r5, r5, lr
 432         orr     r4, r5, r6, lsl #16
 433         bic     r7, r7, lr
 434         orr     r5, r7, r8, lsl #16
 435         bic     r9, r9, lr
 436         orr     r6, r9, ip, lsl #16
 437
 438         stmia   r1!, {r3-r6}
 439
 440         subs    r2, r2, #0x00000008     /* Next */
 441         bne     .Linswm8_loop8
 442         beq     .Linswm8_l1
 443
 444 .Linswm8_l8:
 445         cmp     r2, #4
 446         bcc     .Linswm8_l4
 447
 448         ldmia   r0, {r3-r6}
 449
 450         bic     r3, r3, lr
 451         orr     r3, r3, r4, lsl #16
 452         bic     r5, r5, lr
 453         orr     r4, r5, r6, lsl #16
 454
 455         stmia   r1!, {r3-r4}
 456
 457         subs    r2, r2, #0x00000004
 458         beq     .Linswm8_l1
 459
 460 .Linswm8_l4:
 461         cmp     r2, #2
 462         bcc     .Linswm8_l2
 463
 464         ldmia   r0, {r3-r4}
 465
 466         bic     r3, r3, lr
 467         orr     r3, r3, r4, lsl #16
 468         str     r3, [r1], #0x0004
 469
 470         subs    r2, r2, #0x00000002
 471         beq     .Linswm8_l1
 472
 473 .Linswm8_l2:
 474         cmp     r2, #1
 475         bcc     .Linswm8_l1
 476
 477         ldr     r3, [r0]
 478         subs    r2, r2, #0x00000001     /* Test in load delay slot */
 479                                         /* XXX, why don't we use result?  */
 480
 481         strb    r3, [r1], #0x0001
 482         mov     r3, r3, lsr #8
 483         strb    r3, [r1], #0x0001
 484
 485
 486 .Linswm8_l1:
 487         ldmfd   sp!, {r4-r9,pc}         /* And go home */
 488 END(inswm8)
 489
 490 /*
 491  * write short ints (16 bits) to an I/O address from a block of memory
 492  * The I/O address is assumed to be mapped multiple times in a block of
 493  * 8 words.
 494  * The source address should be word aligned.
 495  *
 496  * r0 = address to read to (IO)
 497  * r1 = address to write from (memory)
 498  * r2 = length
 499  */
 500
 501 ENTRY(outswm8)
 502 /* Make sure that we have a positive length */
 503         cmp     r2, #0x00000000
 504         movle   pc, lr
 505
 506 /* If the destination address is word aligned and the size suitably
 507    aligned, do it fast */
 508
 509         tst     r1, #0x00000003
 510
 511         bne     _C_LABEL(outsw)
 512
 513 /* Word aligned outsw */
 514
 515         stmfd   sp!, {r4-r8,lr}
 516
 517 .Loutswm8_loop8:
 518         cmp     r2, #8
 519         bcc     .Loutswm8_l8
 520
 521         ldmia   r1!, {r3,r5,r7,ip}
 522
 523         eor     r4, r3, r3, lsr #16     /* r4 = (A)(A^B) */
 524         eor     r3, r3, r4, lsl #16     /* r3 = (A^A^B)(B) = (B)(B) */
 525         eor     r4, r4, r3, lsr #16     /* r4 = (A)(B^A^B) = (A)(A) */
 526
 527         eor     r6, r5, r5, lsr #16     /* r6 = (A)(A^B) */
 528         eor     r5, r5, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 529         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 530
 531         eor     r8, r7, r7, lsr #16     /* r8 = (A)(A^B) */
 532         eor     r7, r7, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 533         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 534
 535         eor     lr, ip, ip, lsr #16     /* lr = (A)(A^B) */
 536         eor     ip, ip, lr, lsl #16     /* ip = (A^A^B)(B) = (B)(B) */
 537         eor     lr, lr, ip, lsr #16     /* lr = (A)(B^A^B) = (A)(A) */
 538
 539         stmia   r0, {r3-r8,ip,lr}
 540
 541         subs    r2, r2, #0x00000008     /* Next */
 542         bne     .Loutswm8_loop8
 543         beq     .Loutswm8_l1
 544
 545 .Loutswm8_l8:
 546         cmp     r2, #4
 547         bcc     .Loutswm8_l4
 548
 549         ldmia   r1!, {r3-r4}
 550
 551         eor     r6, r3, r3, lsr #16     /* r6 = (A)(A^B) */
 552         eor     r5, r3, r6, lsl #16     /* r5 = (A^A^B)(B) = (B)(B) */
 553         eor     r6, r6, r5, lsr #16     /* r6 = (A)(B^A^B) = (A)(A) */
 554
 555         eor     r8, r4, r4, lsr #16     /* r8 = (A)(A^B) */
 556         eor     r7, r4, r8, lsl #16     /* r7 = (A^A^B)(B) = (B)(B) */
 557         eor     r8, r8, r7, lsr #16     /* r8 = (A)(B^A^B) = (A)(A) */
 558
 559         stmia   r0, {r5-r8}
 560
 561         subs    r2, r2, #0x00000004
 562         beq     .Loutswm8_l1
 563
 564 .Loutswm8_l4:
 565         cmp     r2, #2
 566         bcc     .Loutswm8_l2
 567
 568         ldr     r3, [r1], #0x0004       /* r3 = (A)(B) */
 569         subs    r2, r2, #0x00000002     /* Done test in Load delay slot */
 570
 571         eor     r5, r3, r3, lsr #16     /* r5 = (A)(A^B)*/
 572         eor     r4, r3, r5, lsl #16     /* r4 = (A^A^B)(B) = (B)(B) */
 573         eor     r5, r5, r4, lsr #16     /* r5 = (A)(B^A^B) = (A)(A) */
 574
 575         stmia   r0, {r4, r5}
 576
 577         beq     .Loutswm8_l1
 578
 579 .Loutswm8_l2:
 580         cmp     r2, #1
 581         bcc     .Loutswm8_l1
 582
 583         ldrb    r3, [r1], #0x0001
 584         ldrb    r4, [r1], #0x0001
 585         subs    r2, r2, #0x00000001     /* Done test in load delay slot */
 586                                         /* XXX This test isn't used?  */
 587         orr     r3, r3, r4, lsl #8
 588         orr     r3, r3, r3, lsl #16
 589         str     r3, [r0]
 590
 591 .Loutswm8_l1:
 592         ldmfd   sp!, {r4-r8,pc}         /* And go home */
 593 END(outswm8)
 594