sys/arm/include/atomic-v6.h

   1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
   2
   3 /*-
   4  * Copyright (C) 2003-2004 Olivier Houchard
   5  * Copyright (C) 1994-1997 Mark Brinicombe
   6  * Copyright (C) 1994 Brini
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of Brini may not be used to endorse or promote products
  23  *    derived from this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
  26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35  *
  36  * $FreeBSD$
  37  */
  38
  39 #ifndef _MACHINE_ATOMIC_V6_H_
  40 #define _MACHINE_ATOMIC_V6_H_
  41
  42 #ifndef _MACHINE_ATOMIC_H_
  43 #error Do not include this file directly, use <machine/atomic.h>
  44 #endif
  45
  46 #if __ARM_ARCH >= 7
  47 #define isb()  __asm __volatile("isb" : : : "memory")
  48 #define dsb()  __asm __volatile("dsb" : : : "memory")
  49 #define dmb()  __asm __volatile("dmb" : : : "memory")
  50 #elif __ARM_ARCH >= 6
  51 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
  52 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
  53 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
  54 #else
  55 #error Only use this file with ARMv6 and later
  56 #endif
  57
  58 #define mb()   dmb()
  59 #define wmb()  dmb()
  60 #define rmb()  dmb()
  61
  62 #define ARM_HAVE_ATOMIC64
  63
  64 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
  65 static __inline void                                                    \
  66 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
  67 {                                                                       \
  68         atomic_##NAME##_long(p, v);                                     \
  69         dmb();                                                          \
  70 }                                                                       \
  71                                                                         \
  72 static __inline  void                                                   \
  73 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
  74 {                                                                       \
  75         dmb();                                                          \
  76         atomic_##NAME##_long(p, v);                                     \
  77 }
  78
  79 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
  80 static __inline  void                                                   \
  81 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  82 {                                                                       \
  83         atomic_##NAME##_##WIDTH(p, v);                                  \
  84         dmb();                                                          \
  85 }                                                                       \
  86                                                                         \
  87 static __inline  void                                                   \
  88 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  89 {                                                                       \
  90         dmb();                                                          \
  91         atomic_##NAME##_##WIDTH(p, v);                                  \
  92 }
  93
  94
  95 static __inline void
  96 atomic_add_32(volatile uint32_t *p, uint32_t val)
  97 {
  98         uint32_t tmp = 0, tmp2 = 0;
  99
 100         __asm __volatile(
 101             "1: ldrex   %0, [%2]        \n"
 102             "   add     %0, %0, %3      \n"
 103             "   strex   %1, %0, [%2]    \n"
 104             "   cmp     %1, #0          \n"
 105             "   it      ne              \n"
 106             "   bne     1b              \n"
 107             : "=&r" (tmp), "+r" (tmp2)
 108             ,"+r" (p), "+r" (val) : : "cc", "memory");
 109 }
 110
 111 static __inline void
 112 atomic_add_64(volatile uint64_t *p, uint64_t val)
 113 {
 114         uint64_t tmp;
 115         uint32_t exflag;
 116
 117         __asm __volatile(
 118             "1:                                                 \n"
 119             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 120             "   adds    %Q[tmp], %Q[val]                        \n"
 121             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
 122             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 123             "   teq     %[exf], #0                              \n"
 124             "   it      ne                                      \n"
 125             "   bne     1b                                      \n"
 126             : [exf] "=&r" (exflag),
 127               [tmp] "=&r" (tmp)
 128             : [ptr] "r"   (p),
 129               [val] "r"   (val)
 130             : "cc", "memory");
 131 }
 132
 133 static __inline void
 134 atomic_add_long(volatile u_long *p, u_long val)
 135 {
 136
 137         atomic_add_32((volatile uint32_t *)p, val);
 138 }
 139
 140 ATOMIC_ACQ_REL(add, 32)
 141 ATOMIC_ACQ_REL(add, 64)
 142 ATOMIC_ACQ_REL_LONG(add)
 143
 144 static __inline void
 145 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
 146 {
 147         uint32_t tmp = 0, tmp2 = 0;
 148
 149         __asm __volatile(
 150             "1: ldrex   %0, [%2]        \n"
 151             "   bic     %0, %0, %3      \n"
 152             "   strex   %1, %0, [%2]    \n"
 153             "   cmp     %1, #0          \n"
 154             "   it      ne              \n"
 155             "   bne     1b              \n"
 156             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 157             : : "cc", "memory");
 158 }
 159
 160 static __inline void
 161 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 162 {
 163         uint64_t tmp;
 164         uint32_t exflag;
 165
 166         __asm __volatile(
 167             "1:                                                 \n"
 168             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 169             "   bic     %Q[tmp], %Q[val]                        \n"
 170             "   bic     %R[tmp], %R[val]                        \n"
 171             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 172             "   teq     %[exf], #0                              \n"
 173             "   it      ne                                      \n"
 174             "   bne     1b                                      \n"
 175             : [exf] "=&r" (exflag),
 176               [tmp] "=&r" (tmp)
 177             : [ptr] "r"   (p),
 178               [val] "r"   (val)
 179             : "cc", "memory");
 180 }
 181
 182 static __inline void
 183 atomic_clear_long(volatile u_long *address, u_long setmask)
 184 {
 185
 186         atomic_clear_32((volatile uint32_t *)address, setmask);
 187 }
 188
 189 ATOMIC_ACQ_REL(clear, 32)
 190 ATOMIC_ACQ_REL(clear, 64)
 191 ATOMIC_ACQ_REL_LONG(clear)
 192
 193 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
 194     {                                                         \
 195         TYPE tmp;                                             \
 196                                                               \
 197         __asm __volatile(                                     \
 198             "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
 199             "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
 200             "   teq            %[tmp], %[ret]            \n"  \
 201             "   ittee          ne                        \n"  \
 202             "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
 203             "   movne          %[ret], #0                \n"  \
 204             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
 205             "   eorseq         %[ret], #1                \n"  \
 206             "   beq            1b                        \n"  \
 207             : [ret] "=&r" (RET),                              \
 208               [tmp] "=&r" (tmp)                               \
 209             : [ptr] "r"   (_ptr),                             \
 210               [oldv] "r"  (_old),                             \
 211               [newv] "r"  (_new)                              \
 212             : "cc", "memory");                                \
 213     }
 214
 215 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
 216     {                                                              \
 217         uint64_t cmp, tmp;                                         \
 218                                                                    \
 219         __asm __volatile(                                          \
 220             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
 221             "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
 222             "   teq      %Q[tmp], %Q[cmp]                     \n"  \
 223             "   it       eq                                   \n"  \
 224             "   teqeq    %R[tmp], %R[cmp]                     \n"  \
 225             "   ittee    ne                                   \n"  \
 226             "   movne    %[ret], #0                           \n"  \
 227             "   strdne   %[cmp], [%[oldv]]                    \n"  \
 228             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
 229             "   eorseq   %[ret], #1                           \n"  \
 230             "   beq      1b                                   \n"  \
 231             : [ret] "=&r" (RET),                                   \
 232               [cmp] "=&r" (cmp),                                   \
 233               [tmp] "=&r" (tmp)                                    \
 234             : [ptr] "r"   (_ptr),                                  \
 235               [oldv] "r"  (_old),                                  \
 236               [newv] "r"  (_new)                                   \
 237             : "cc", "memory");                                     \
 238     }
 239
 240 static __inline int
 241 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 242 {
 243         int ret;
 244
 245         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 246         return (ret);
 247 }
 248 #define atomic_fcmpset_8        atomic_fcmpset_8
 249
 250 static __inline int
 251 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 252 {
 253         int ret;
 254
 255         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 256         dmb();
 257         return (ret);
 258 }
 259
 260 static __inline int
 261 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 262 {
 263         int ret;
 264
 265         dmb();
 266         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 267         return (ret);
 268 }
 269
 270 static __inline int
 271 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 272 {
 273         int ret;
 274
 275         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 276         return (ret);
 277 }
 278 #define atomic_fcmpset_16       atomic_fcmpset_16
 279
 280 static __inline int
 281 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 282 {
 283         int ret;
 284
 285         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 286         dmb();
 287         return (ret);
 288 }
 289
 290 static __inline int
 291 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 292 {
 293         int ret;
 294
 295         dmb();
 296         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 297         return (ret);
 298 }
 299
 300 static __inline int
 301 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 302 {
 303         int ret;
 304
 305         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 306         return (ret);
 307 }
 308
 309 static __inline int
 310 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 311 {
 312         int ret;
 313
 314         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 315         dmb();
 316         return (ret);
 317 }
 318
 319 static __inline int
 320 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 321 {
 322         int ret;
 323
 324         dmb();
 325         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 326         return (ret);
 327 }
 328
 329 static __inline int
 330 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 331 {
 332         int ret;
 333
 334         ATOMIC_FCMPSET_CODE(ret, u_long, "");
 335         return (ret);
 336 }
 337
 338 static __inline int
 339 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 340 {
 341         int ret;
 342
 343         ATOMIC_FCMPSET_CODE(ret, u_long, "");
 344         dmb();
 345         return (ret);
 346 }
 347
 348 static __inline int
 349 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 350 {
 351         int ret;
 352
 353         dmb();
 354         ATOMIC_FCMPSET_CODE(ret, u_long, "");
 355         return (ret);
 356 }
 357
 358 static __inline int
 359 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 360 {
 361         int ret;
 362
 363         ATOMIC_FCMPSET_CODE64(ret);
 364         return (ret);
 365 }
 366
 367 static __inline int
 368 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 369 {
 370         int ret;
 371
 372         ATOMIC_FCMPSET_CODE64(ret);
 373         dmb();
 374         return (ret);
 375 }
 376
 377 static __inline int
 378 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 379 {
 380         int ret;
 381
 382         dmb();
 383         ATOMIC_FCMPSET_CODE64(ret);
 384         return (ret);
 385 }
 386
 387 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
 388     {                                                        \
 389         __asm __volatile(                                    \
 390             "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
 391             "   teq            %[ret], %[oldv]           \n" \
 392             "   itee           ne                        \n" \
 393             "   movne          %[ret], #0                \n" \
 394             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
 395             "   eorseq         %[ret], #1                \n" \
 396             "   beq            1b                        \n" \
 397             : [ret] "=&r" (RET)                              \
 398             : [ptr] "r"   (_ptr),                            \
 399               [oldv] "r"  (_old),                            \
 400               [newv] "r"  (_new)                             \
 401             : "cc", "memory");                               \
 402     }
 403
 404 #define ATOMIC_CMPSET_CODE64(RET)                                 \
 405     {                                                             \
 406         uint64_t tmp;                                             \
 407                                                                   \
 408         __asm __volatile(                                         \
 409             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
 410             "   teq      %Q[tmp], %Q[oldv]                    \n" \
 411             "   it       eq                                   \n" \
 412             "   teqeq    %R[tmp], %R[oldv]                    \n" \
 413             "   itee     ne                                   \n" \
 414             "   movne    %[ret], #0                           \n" \
 415             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
 416             "   eorseq   %[ret], #1                           \n" \
 417             "   beq      1b                                   \n" \
 418             : [ret] "=&r" (RET),                                  \
 419               [tmp] "=&r" (tmp)                                   \
 420             : [ptr] "r"   (_ptr),                                 \
 421               [oldv] "r"  (_old),                                 \
 422               [newv] "r"  (_new)                                  \
 423             : "cc", "memory");                                    \
 424     }
 425
 426 static __inline int
 427 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 428 {
 429         int ret;
 430
 431         ATOMIC_CMPSET_CODE(ret, "b");
 432         return (ret);
 433 }
 434 #define atomic_cmpset_8         atomic_cmpset_8
 435
 436 static __inline int
 437 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 438 {
 439         int ret;
 440
 441         ATOMIC_CMPSET_CODE(ret, "b");
 442         dmb();
 443         return (ret);
 444 }
 445
 446 static __inline int
 447 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 448 {
 449         int ret;
 450
 451         dmb();
 452         ATOMIC_CMPSET_CODE(ret, "b");
 453         return (ret);
 454 }
 455
 456 static __inline int
 457 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 458 {
 459         int ret;
 460
 461         ATOMIC_CMPSET_CODE(ret, "h");
 462         return (ret);
 463 }
 464 #define atomic_cmpset_16        atomic_cmpset_16
 465
 466 static __inline int
 467 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 468 {
 469         int ret;
 470
 471         ATOMIC_CMPSET_CODE(ret, "h");
 472         dmb();
 473         return (ret);
 474 }
 475
 476 static __inline int
 477 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 478 {
 479         int ret;
 480
 481         dmb();
 482         ATOMIC_CMPSET_CODE(ret, "h");
 483         return (ret);
 484 }
 485
 486 static __inline int
 487 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 488 {
 489         int ret;
 490
 491         ATOMIC_CMPSET_CODE(ret, "");
 492         return (ret);
 493 }
 494
 495 static __inline int
 496 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 497 {
 498         int ret;
 499
 500         ATOMIC_CMPSET_CODE(ret, "");
 501         dmb();
 502         return (ret);
 503 }
 504
 505 static __inline int
 506 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 507 {
 508         int ret;
 509
 510         dmb();
 511         ATOMIC_CMPSET_CODE(ret, "");
 512         return (ret);
 513 }
 514
 515 static __inline int
 516 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
 517 {
 518         int ret;
 519
 520         ATOMIC_CMPSET_CODE(ret, "");
 521         return (ret);
 522 }
 523
 524 static __inline int
 525 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
 526 {
 527         int ret;
 528
 529         ATOMIC_CMPSET_CODE(ret, "");
 530         dmb();
 531         return (ret);
 532 }
 533
 534 static __inline int
 535 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
 536 {
 537         int ret;
 538
 539         dmb();
 540         ATOMIC_CMPSET_CODE(ret, "");
 541         return (ret);
 542 }
 543
 544 static __inline int
 545 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 546 {
 547         int ret;
 548
 549         ATOMIC_CMPSET_CODE64(ret);
 550         return (ret);
 551 }
 552
 553 static __inline int
 554 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 555 {
 556         int ret;
 557
 558         ATOMIC_CMPSET_CODE64(ret);
 559         dmb();
 560         return (ret);
 561 }
 562
 563 static __inline int
 564 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 565 {
 566         int ret;
 567
 568         dmb();
 569         ATOMIC_CMPSET_CODE64(ret);
 570         return (ret);
 571 }
 572
 573 static __inline uint32_t
 574 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 575 {
 576         uint32_t tmp = 0, tmp2 = 0, ret = 0;
 577
 578         __asm __volatile(
 579             "1: ldrex   %0, [%3]        \n"
 580             "   add     %1, %0, %4      \n"
 581             "   strex   %2, %1, [%3]    \n"
 582             "   cmp     %2, #0          \n"
 583             "   it      ne              \n"
 584             "   bne     1b              \n"
 585             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 586             : : "cc", "memory");
 587         return (ret);
 588 }
 589
 590 static __inline uint64_t
 591 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 592 {
 593         uint64_t ret, tmp;
 594         uint32_t exflag;
 595
 596         __asm __volatile(
 597             "1:                                                 \n"
 598             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 599             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
 600             "   adc     %R[tmp], %R[ret], %R[val]               \n"
 601             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 602             "   teq     %[exf], #0                              \n"
 603             "   it      ne                                      \n"
 604             "   bne     1b                                      \n"
 605             : [ret] "=&r" (ret),
 606               [exf] "=&r" (exflag),
 607               [tmp] "=&r" (tmp)
 608             : [ptr] "r"   (p),
 609               [val] "r"   (val)
 610             : "cc", "memory");
 611         return (ret);
 612 }
 613
 614 static __inline u_long
 615 atomic_fetchadd_long(volatile u_long *p, u_long val)
 616 {
 617
 618         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
 619 }
 620
 621 static __inline uint32_t
 622 atomic_load_acq_32(volatile uint32_t *p)
 623 {
 624         uint32_t v;
 625
 626         v = *p;
 627         dmb();
 628         return (v);
 629 }
 630
 631 static __inline uint64_t
 632 atomic_load_64(volatile uint64_t *p)
 633 {
 634         uint64_t ret;
 635
 636         /*
 637          * The only way to atomically load 64 bits is with LDREXD which puts the
 638          * exclusive monitor into the exclusive state, so reset it to open state
 639          * with CLREX because we don't actually need to store anything.
 640          */
 641         __asm __volatile(
 642             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
 643             "clrex                                      \n"
 644             : [ret] "=&r" (ret)
 645             : [ptr] "r"   (p)
 646             : "cc", "memory");
 647         return (ret);
 648 }
 649
 650 static __inline uint64_t
 651 atomic_load_acq_64(volatile uint64_t *p)
 652 {
 653         uint64_t ret;
 654
 655         ret = atomic_load_64(p);
 656         dmb();
 657         return (ret);
 658 }
 659
 660 static __inline u_long
 661 atomic_load_acq_long(volatile u_long *p)
 662 {
 663         u_long v;
 664
 665         v = *p;
 666         dmb();
 667         return (v);
 668 }
 669
 670 static __inline uint32_t
 671 atomic_readandclear_32(volatile uint32_t *p)
 672 {
 673         uint32_t ret, tmp = 0, tmp2 = 0;
 674
 675         __asm __volatile(
 676             "1: ldrex   %0, [%3]        \n"
 677             "   mov     %1, #0          \n"
 678             "   strex   %2, %1, [%3]    \n"
 679             "   cmp     %2, #0          \n"
 680             "   it      ne              \n"
 681             "   bne     1b              \n"
 682             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
 683             : : "cc", "memory");
 684         return (ret);
 685 }
 686
 687 static __inline uint64_t
 688 atomic_readandclear_64(volatile uint64_t *p)
 689 {
 690         uint64_t ret, tmp;
 691         uint32_t exflag;
 692
 693         __asm __volatile(
 694             "1:                                                 \n"
 695             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 696             "   mov     %Q[tmp], #0                             \n"
 697             "   mov     %R[tmp], #0                             \n"
 698             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 699             "   teq     %[exf], #0                              \n"
 700             "   it      ne                                      \n"
 701             "   bne     1b                                      \n"
 702             : [ret] "=&r" (ret),
 703               [exf] "=&r" (exflag),
 704               [tmp] "=&r" (tmp)
 705             : [ptr] "r"   (p)
 706             : "cc", "memory");
 707         return (ret);
 708 }
 709
 710 static __inline u_long
 711 atomic_readandclear_long(volatile u_long *p)
 712 {
 713
 714         return (atomic_readandclear_32((volatile uint32_t *)p));
 715 }
 716
 717 static __inline void
 718 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
 719 {
 720         uint32_t tmp = 0, tmp2 = 0;
 721
 722         __asm __volatile(
 723             "1: ldrex   %0, [%2]        \n"
 724             "   orr     %0, %0, %3      \n"
 725             "   strex   %1, %0, [%2]    \n"
 726             "   cmp     %1, #0          \n"
 727             "   it      ne              \n"
 728             "   bne     1b              \n"
 729             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 730             : : "cc", "memory");
 731 }
 732
 733 static __inline void
 734 atomic_set_64(volatile uint64_t *p, uint64_t val)
 735 {
 736         uint64_t tmp;
 737         uint32_t exflag;
 738
 739         __asm __volatile(
 740             "1:                                                 \n"
 741             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 742             "   orr     %Q[tmp], %Q[val]                        \n"
 743             "   orr     %R[tmp], %R[val]                        \n"
 744             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 745             "   teq     %[exf], #0                              \n"
 746             "   it      ne                                      \n"
 747             "   bne     1b                                      \n"
 748             : [exf] "=&r" (exflag),
 749               [tmp] "=&r" (tmp)
 750             : [ptr] "r"   (p),
 751               [val] "r"   (val)
 752             : "cc", "memory");
 753 }
 754
 755 static __inline void
 756 atomic_set_long(volatile u_long *address, u_long setmask)
 757 {
 758
 759         atomic_set_32((volatile uint32_t *)address, setmask);
 760 }
 761
 762 ATOMIC_ACQ_REL(set, 32)
 763 ATOMIC_ACQ_REL(set, 64)
 764 ATOMIC_ACQ_REL_LONG(set)
 765
 766 static __inline void
 767 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 768 {
 769         uint32_t tmp = 0, tmp2 = 0;
 770
 771         __asm __volatile(
 772             "1: ldrex   %0, [%2]        \n"
 773             "   sub     %0, %0, %3      \n"
 774             "   strex   %1, %0, [%2]    \n"
 775             "   cmp     %1, #0          \n"
 776             "   it      ne              \n"
 777             "   bne     1b              \n"
 778             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 779             : : "cc", "memory");
 780 }
 781
 782 static __inline void
 783 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 784 {
 785         uint64_t tmp;
 786         uint32_t exflag;
 787
 788         __asm __volatile(
 789             "1:                                                 \n"
 790             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 791             "   subs    %Q[tmp], %Q[val]                        \n"
 792             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
 793             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 794             "   teq     %[exf], #0                              \n"
 795             "   it      ne                                      \n"
 796             "   bne     1b                                      \n"
 797             : [exf] "=&r" (exflag),
 798               [tmp] "=&r" (tmp)
 799             : [ptr] "r"   (p),
 800               [val] "r"   (val)
 801             : "cc", "memory");
 802 }
 803
 804 static __inline void
 805 atomic_subtract_long(volatile u_long *p, u_long val)
 806 {
 807
 808         atomic_subtract_32((volatile uint32_t *)p, val);
 809 }
 810
 811 ATOMIC_ACQ_REL(subtract, 32)
 812 ATOMIC_ACQ_REL(subtract, 64)
 813 ATOMIC_ACQ_REL_LONG(subtract)
 814
 815 static __inline void
 816 atomic_store_64(volatile uint64_t *p, uint64_t val)
 817 {
 818         uint64_t tmp;
 819         uint32_t exflag;
 820
 821         /*
 822          * The only way to atomically store 64 bits is with STREXD, which will
 823          * succeed only if paired up with a preceeding LDREXD using the same
 824          * address, so we read and discard the existing value before storing.
 825          */
 826         __asm __volatile(
 827             "1:                                                 \n"
 828             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 829             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 830             "   teq     %[exf], #0                              \n"
 831             "   it      ne                                      \n"
 832             "   bne     1b                                      \n"
 833             : [tmp] "=&r" (tmp),
 834               [exf] "=&r" (exflag)
 835             : [ptr] "r"   (p),
 836               [val] "r"   (val)
 837             : "cc", "memory");
 838 }
 839
 840 static __inline void
 841 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
 842 {
 843
 844         dmb();
 845         *p = v;
 846 }
 847
 848 static __inline void
 849 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 850 {
 851
 852         dmb();
 853         atomic_store_64(p, val);
 854 }
 855
 856 static __inline void
 857 atomic_store_rel_long(volatile u_long *p, u_long v)
 858 {
 859
 860         dmb();
 861         *p = v;
 862 }
 863
 864 static __inline int
 865 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
 866 {
 867         int newv, oldv, result;
 868
 869         __asm __volatile(
 870             "   mov     ip, #1                                  \n"
 871             "   lsl     ip, ip, %[bit]                          \n"
 872             /*  Done with %[bit] as input, reuse below as output. */
 873             "1:                                                 \n"
 874             "   ldrex   %[oldv], [%[ptr]]                       \n"
 875             "   bic     %[newv], %[oldv], ip                    \n"
 876             "   strex   %[bit], %[newv], [%[ptr]]               \n"
 877             "   teq     %[bit], #0                              \n"
 878             "   it      ne                                      \n"
 879             "   bne     1b                                      \n"
 880             "   ands    %[bit], %[oldv], ip                     \n"
 881             "   it      ne                                      \n"
 882             "   movne   %[bit], #1                              \n"
 883             : [bit]  "=&r"   (result),
 884               [oldv] "=&r"   (oldv),
 885               [newv] "=&r"   (newv)
 886             : [ptr]  "r"     (ptr),
 887                      "[bit]" (bit)
 888             : "cc", "ip", "memory");
 889
 890         return (result);
 891 }
 892
 893 static __inline int
 894 atomic_testandclear_int(volatile u_int *p, u_int v)
 895 {
 896
 897         return (atomic_testandclear_32((volatile uint32_t *)p, v));
 898 }
 899
 900 static __inline int
 901 atomic_testandclear_long(volatile u_long *p, u_int v)
 902 {
 903
 904         return (atomic_testandclear_32((volatile uint32_t *)p, v));
 905 }
 906 #define atomic_testandclear_long        atomic_testandclear_long
 907
 908 static __inline int
 909 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
 910 {
 911         int newv, oldv, result;
 912
 913         __asm __volatile(
 914             "   mov     ip, #1                                  \n"
 915             "   lsl     ip, ip, %[bit]                          \n"
 916             /*  Done with %[bit] as input, reuse below as output. */
 917             "1:                                                 \n"
 918             "   ldrex   %[oldv], [%[ptr]]                       \n"
 919             "   orr     %[newv], %[oldv], ip                    \n"
 920             "   strex   %[bit], %[newv], [%[ptr]]               \n"
 921             "   teq     %[bit], #0                              \n"
 922             "   it      ne                                      \n"
 923             "   bne     1b                                      \n"
 924             "   ands    %[bit], %[oldv], ip                     \n"
 925             "   it      ne                                      \n"
 926             "   movne   %[bit], #1                              \n"
 927             : [bit]  "=&r"   (result),
 928               [oldv] "=&r"   (oldv),
 929               [newv] "=&r"   (newv)
 930             : [ptr]  "r"     (ptr),
 931                      "[bit]" (bit)
 932             : "cc", "ip", "memory");
 933
 934         return (result);
 935 }
 936
 937 static __inline int
 938 atomic_testandset_int(volatile u_int *p, u_int v)
 939 {
 940
 941         return (atomic_testandset_32((volatile uint32_t *)p, v));
 942 }
 943
 944 static __inline int
 945 atomic_testandset_long(volatile u_long *p, u_int v)
 946 {
 947
 948         return (atomic_testandset_32((volatile uint32_t *)p, v));
 949 }
 950 #define atomic_testandset_long  atomic_testandset_long
 951
 952 static __inline int
 953 atomic_testandset_64(volatile uint64_t *p, u_int v)
 954 {
 955         volatile uint32_t *p32;
 956
 957         p32 = (volatile uint32_t *)p;
 958         /* Assume little-endian */
 959         if (v >= 32) {
 960                 v &= 0x1f;
 961                 p32++;
 962         }
 963         return (atomic_testandset_32(p32, v));
 964 }
 965
 966 static __inline uint32_t
 967 atomic_swap_32(volatile uint32_t *p, uint32_t v)
 968 {
 969         uint32_t ret, exflag;
 970
 971         __asm __volatile(
 972             "1: ldrex   %[ret], [%[ptr]]                \n"
 973             "   strex   %[exf], %[val], [%[ptr]]        \n"
 974             "   teq     %[exf], #0                      \n"
 975             "   it      ne                              \n"
 976             "   bne     1b                              \n"
 977             : [ret] "=&r"  (ret),
 978               [exf] "=&r" (exflag)
 979             : [val] "r"  (v),
 980               [ptr] "r"  (p)
 981             : "cc", "memory");
 982         return (ret);
 983 }
 984
 985 static __inline uint64_t
 986 atomic_swap_64(volatile uint64_t *p, uint64_t v)
 987 {
 988         uint64_t ret;
 989         uint32_t exflag;
 990
 991         __asm __volatile(
 992             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 993             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 994             "   teq     %[exf], #0                              \n"
 995             "   it      ne                                      \n"
 996             "   bne     1b                                      \n"
 997             : [ret] "=&r" (ret),
 998               [exf] "=&r" (exflag)
 999             : [val] "r"   (v),
1000               [ptr] "r"   (p)
1001             : "cc", "memory");
1002         return (ret);
1003 }
1004
1005 #undef ATOMIC_ACQ_REL
1006 #undef ATOMIC_ACQ_REL_LONG
1007
1008 static __inline void
1009 atomic_thread_fence_acq(void)
1010 {
1011
1012         dmb();
1013 }
1014
1015 static __inline void
1016 atomic_thread_fence_rel(void)
1017 {
1018
1019         dmb();
1020 }
1021
1022 static __inline void
1023 atomic_thread_fence_acq_rel(void)
1024 {
1025
1026         dmb();
1027 }
1028
1029 static __inline void
1030 atomic_thread_fence_seq_cst(void)
1031 {
1032
1033         dmb();
1034 }
1035
1036 #endif /* _MACHINE_ATOMIC_V6_H_ */