sys/arm/include/atomic-v6.h

   1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
   2
   3 /*-
   4  * Copyright (C) 2003-2004 Olivier Houchard
   5  * Copyright (C) 1994-1997 Mark Brinicombe
   6  * Copyright (C) 1994 Brini
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of Brini may not be used to endorse or promote products
  23  *    derived from this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
  26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35  *
  36  * $FreeBSD$
  37  */
  38
  39 #ifndef _MACHINE_ATOMIC_V6_H_
  40 #define _MACHINE_ATOMIC_V6_H_
  41
  42 #ifndef _MACHINE_ATOMIC_H_
  43 #error Do not include this file directly, use <machine/atomic.h>
  44 #endif
  45
  46 #if __ARM_ARCH >= 7
  47 #define isb()  __asm __volatile("isb" : : : "memory")
  48 #define dsb()  __asm __volatile("dsb" : : : "memory")
  49 #define dmb()  __asm __volatile("dmb" : : : "memory")
  50 #elif __ARM_ARCH >= 6
  51 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
  52 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
  53 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
  54 #else
  55 #error Only use this file with ARMv6 and later
  56 #endif
  57
  58 #define mb()   dmb()
  59 #define wmb()  dmb()
  60 #define rmb()  dmb()
  61
  62 #define ARM_HAVE_ATOMIC64
  63
  64 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
  65 static __inline void                                                    \
  66 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
  67 {                                                                       \
  68         atomic_##NAME##_long(p, v);                                     \
  69         dmb();                                                          \
  70 }                                                                       \
  71                                                                         \
  72 static __inline  void                                                   \
  73 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
  74 {                                                                       \
  75         dmb();                                                          \
  76         atomic_##NAME##_long(p, v);                                     \
  77 }
  78
  79 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
  80 static __inline  void                                                   \
  81 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  82 {                                                                       \
  83         atomic_##NAME##_##WIDTH(p, v);                                  \
  84         dmb();                                                          \
  85 }                                                                       \
  86                                                                         \
  87 static __inline  void                                                   \
  88 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  89 {                                                                       \
  90         dmb();                                                          \
  91         atomic_##NAME##_##WIDTH(p, v);                                  \
  92 }
  93
  94
  95 static __inline void
  96 atomic_add_32(volatile uint32_t *p, uint32_t val)
  97 {
  98         uint32_t tmp = 0, tmp2 = 0;
  99
 100         __asm __volatile(
 101             "1: ldrex   %0, [%2]        \n"
 102             "   add     %0, %0, %3      \n"
 103             "   strex   %1, %0, [%2]    \n"
 104             "   cmp     %1, #0          \n"
 105             "   it      ne              \n"
 106             "   bne     1b              \n"
 107             : "=&r" (tmp), "+r" (tmp2)
 108             ,"+r" (p), "+r" (val) : : "cc", "memory");
 109 }
 110
 111 static __inline void
 112 atomic_add_64(volatile uint64_t *p, uint64_t val)
 113 {
 114         uint64_t tmp;
 115         uint32_t exflag;
 116
 117         __asm __volatile(
 118             "1:                                                 \n"
 119             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 120             "   adds    %Q[tmp], %Q[val]                        \n"
 121             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
 122             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 123             "   teq     %[exf], #0                              \n"
 124             "   it      ne                                      \n"
 125             "   bne     1b                                      \n"
 126             : [exf] "=&r" (exflag),
 127               [tmp] "=&r" (tmp)
 128             : [ptr] "r"   (p),
 129               [val] "r"   (val)
 130             : "cc", "memory");
 131 }
 132
 133 static __inline void
 134 atomic_add_long(volatile u_long *p, u_long val)
 135 {
 136
 137         atomic_add_32((volatile uint32_t *)p, val);
 138 }
 139
 140 ATOMIC_ACQ_REL(add, 32)
 141 ATOMIC_ACQ_REL(add, 64)
 142 ATOMIC_ACQ_REL_LONG(add)
 143
 144 static __inline void
 145 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
 146 {
 147         uint32_t tmp = 0, tmp2 = 0;
 148
 149         __asm __volatile(
 150             "1: ldrex   %0, [%2]        \n"
 151             "   bic     %0, %0, %3      \n"
 152             "   strex   %1, %0, [%2]    \n"
 153             "   cmp     %1, #0          \n"
 154             "   it      ne              \n"
 155             "   bne     1b              \n"
 156             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 157             : : "cc", "memory");
 158 }
 159
 160 static __inline void
 161 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 162 {
 163         uint64_t tmp;
 164         uint32_t exflag;
 165
 166         __asm __volatile(
 167             "1:                                                 \n"
 168             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 169             "   bic     %Q[tmp], %Q[val]                        \n"
 170             "   bic     %R[tmp], %R[val]                        \n"
 171             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 172             "   teq     %[exf], #0                              \n"
 173             "   it      ne                                      \n"
 174             "   bne     1b                                      \n"
 175             : [exf] "=&r" (exflag),
 176               [tmp] "=&r" (tmp)
 177             : [ptr] "r"   (p),
 178               [val] "r"   (val)
 179             : "cc", "memory");
 180 }
 181
 182 static __inline void
 183 atomic_clear_long(volatile u_long *address, u_long setmask)
 184 {
 185
 186         atomic_clear_32((volatile uint32_t *)address, setmask);
 187 }
 188
 189 ATOMIC_ACQ_REL(clear, 32)
 190 ATOMIC_ACQ_REL(clear, 64)
 191 ATOMIC_ACQ_REL_LONG(clear)
 192
 193 static __inline int
 194 atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
 195 {
 196         uint32_t tmp;
 197         uint32_t _cmpval = *cmpval;
 198         int ret;
 199
 200         __asm __volatile(
 201             "   mov     %0, #1          \n"
 202             "   ldrex   %1, [%2]        \n"
 203             "   cmp     %1, %3          \n"
 204             "   it      eq              \n"
 205             "   strexeq %0, %4, [%2]    \n"
 206             : "=&r" (ret), "=&r" (tmp), "+r" (p), "+r" (_cmpval), "+r" (newval)
 207             : : "cc", "memory");
 208         *cmpval = tmp;
 209         return (!ret);
 210 }
 211
 212 static __inline uint64_t
 213 atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
 214 {
 215         uint64_t tmp;
 216         uint64_t _cmpval = *cmpval;
 217         int ret;
 218
 219         __asm __volatile(
 220             "1: mov     %[ret], #1                              \n"
 221             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 222             "   teq     %Q[tmp], %Q[_cmpval]                    \n"
 223             "   ite     eq                                      \n"
 224             "   teqeq   %R[tmp], %R[_cmpval]                    \n"
 225             "   bne     2f                                      \n"
 226             "   strexd  %[ret], %Q[newval], %R[newval], [%[ptr]]\n"
 227             "2:                                                 \n"
 228             : [ret]    "=&r" (ret),
 229               [tmp]    "=&r" (tmp)
 230             : [ptr]    "r"   (p),
 231               [_cmpval] "r"   (_cmpval),
 232               [newval] "r"   (newval)
 233             : "cc", "memory");
 234         *cmpval = tmp;
 235         return (!ret);
 236 }
 237
 238 static __inline u_long
 239 atomic_fcmpset_long(volatile u_long *p, u_long *cmpval, u_long newval)
 240 {
 241
 242         return (atomic_fcmpset_32((volatile uint32_t *)p,
 243             (uint32_t *)cmpval, newval));
 244 }
 245
 246 static __inline uint64_t
 247 atomic_fcmpset_acq_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
 248 {
 249         uint64_t ret;
 250
 251         ret = atomic_fcmpset_64(p, cmpval, newval);
 252         dmb();
 253         return (ret);
 254 }
 255
 256 static __inline u_long
 257 atomic_fcmpset_acq_long(volatile u_long *p, u_long *cmpval, u_long newval)
 258 {
 259         u_long ret;
 260
 261         ret = atomic_fcmpset_long(p, cmpval, newval);
 262         dmb();
 263         return (ret);
 264 }
 265
 266 static __inline uint32_t
 267 atomic_fcmpset_acq_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
 268 {
 269
 270         uint32_t ret;
 271
 272         ret = atomic_fcmpset_32(p, cmpval, newval);
 273         dmb();
 274         return (ret);
 275 }
 276
 277 static __inline uint32_t
 278 atomic_fcmpset_rel_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
 279 {
 280
 281         dmb();
 282         return (atomic_fcmpset_32(p, cmpval, newval));
 283 }
 284
 285 static __inline uint64_t
 286 atomic_fcmpset_rel_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
 287 {
 288
 289         dmb();
 290         return (atomic_fcmpset_64(p, cmpval, newval));
 291 }
 292
 293 static __inline u_long
 294 atomic_fcmpset_rel_long(volatile u_long *p, u_long *cmpval, u_long newval)
 295 {
 296
 297         dmb();
 298         return (atomic_fcmpset_long(p, cmpval, newval));
 299 }
 300
 301 static __inline uint32_t
 302 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 303 {
 304         uint32_t ret;
 305
 306         __asm __volatile(
 307             "1: ldrex   %0, [%1]        \n"
 308             "   cmp     %0, %2          \n"
 309             "   itt     ne              \n"
 310             "   movne   %0, #0          \n"
 311             "   bne     2f              \n"
 312             "   strex   %0, %3, [%1]    \n"
 313             "   cmp     %0, #0          \n"
 314             "   ite     eq              \n"
 315             "   moveq   %0, #1          \n"
 316             "   bne     1b              \n"
 317             "2:"
 318             : "=&r" (ret), "+r" (p), "+r" (cmpval), "+r" (newval)
 319             : : "cc", "memory");
 320         return (ret);
 321 }
 322
 323 static __inline int
 324 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 325 {
 326         uint64_t tmp;
 327         uint32_t ret;
 328
 329         __asm __volatile(
 330             "1:                                                 \n"
 331             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 332             "   teq     %Q[tmp], %Q[cmpval]                     \n"
 333             "   itee    eq                                      \n"
 334             "   teqeq   %R[tmp], %R[cmpval]                     \n"
 335             "   movne   %[ret], #0                              \n"
 336             "   bne     2f                                      \n"
 337             "   strexd  %[ret], %Q[newval], %R[newval], [%[ptr]]\n"
 338             "   teq     %[ret], #0                              \n"
 339             "   it      ne                                      \n"
 340             "   bne     1b                                      \n"
 341             "   mov     %[ret], #1                              \n"
 342             "2:                                                 \n"
 343             : [ret]    "=&r" (ret),
 344               [tmp]    "=&r" (tmp)
 345             : [ptr]    "r"   (p),
 346               [cmpval] "r"   (cmpval),
 347               [newval] "r"   (newval)
 348             : "cc", "memory");
 349         return (ret);
 350 }
 351
 352 static __inline u_long
 353 atomic_cmpset_long(volatile u_long *p, u_long cmpval, u_long newval)
 354 {
 355
 356         return (atomic_cmpset_32((volatile uint32_t *)p, cmpval, newval));
 357 }
 358
 359 static __inline uint32_t
 360 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 361 {
 362         uint32_t ret;
 363
 364         ret = atomic_cmpset_32(p, cmpval, newval);
 365         dmb();
 366         return (ret);
 367 }
 368
 369 static __inline uint64_t
 370 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 371 {
 372         uint64_t ret;
 373
 374         ret = atomic_cmpset_64(p, cmpval, newval);
 375         dmb();
 376         return (ret);
 377 }
 378
 379 static __inline u_long
 380 atomic_cmpset_acq_long(volatile u_long *p, u_long cmpval, u_long newval)
 381 {
 382         u_long ret;
 383
 384         ret = atomic_cmpset_long(p, cmpval, newval);
 385         dmb();
 386         return (ret);
 387 }
 388
 389 static __inline uint32_t
 390 atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
 391 {
 392
 393         dmb();
 394         return (atomic_cmpset_32(p, cmpval, newval));
 395 }
 396
 397 static __inline uint64_t
 398 atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
 399 {
 400
 401         dmb();
 402         return (atomic_cmpset_64(p, cmpval, newval));
 403 }
 404
 405 static __inline u_long
 406 atomic_cmpset_rel_long(volatile u_long *p, u_long cmpval, u_long newval)
 407 {
 408
 409         dmb();
 410         return (atomic_cmpset_long(p, cmpval, newval));
 411 }
 412
 413 static __inline uint32_t
 414 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 415 {
 416         uint32_t tmp = 0, tmp2 = 0, ret = 0;
 417
 418         __asm __volatile(
 419             "1: ldrex   %0, [%3]        \n"
 420             "   add     %1, %0, %4      \n"
 421             "   strex   %2, %1, [%3]    \n"
 422             "   cmp     %2, #0          \n"
 423             "   it      ne              \n"
 424             "   bne     1b              \n"
 425             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 426             : : "cc", "memory");
 427         return (ret);
 428 }
 429
 430 static __inline uint64_t
 431 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 432 {
 433         uint64_t ret, tmp;
 434         uint32_t exflag;
 435
 436         __asm __volatile(
 437             "1:                                                 \n"
 438             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 439             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
 440             "   adc     %R[tmp], %R[ret], %R[val]               \n"
 441             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 442             "   teq     %[exf], #0                              \n"
 443             "   it      ne                                      \n"
 444             "   bne     1b                                      \n"
 445             : [ret] "=&r" (ret),
 446               [exf] "=&r" (exflag),
 447               [tmp] "=&r" (tmp)
 448             : [ptr] "r"   (p),
 449               [val] "r"   (val)
 450             : "cc", "memory");
 451         return (ret);
 452 }
 453
 454 static __inline u_long
 455 atomic_fetchadd_long(volatile u_long *p, u_long val)
 456 {
 457
 458         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
 459 }
 460
 461 static __inline uint32_t
 462 atomic_load_acq_32(volatile uint32_t *p)
 463 {
 464         uint32_t v;
 465
 466         v = *p;
 467         dmb();
 468         return (v);
 469 }
 470
 471 static __inline uint64_t
 472 atomic_load_64(volatile uint64_t *p)
 473 {
 474         uint64_t ret;
 475
 476         /*
 477          * The only way to atomically load 64 bits is with LDREXD which puts the
 478          * exclusive monitor into the exclusive state, so reset it to open state
 479          * with CLREX because we don't actually need to store anything.
 480          */
 481         __asm __volatile(
 482             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
 483             "clrex                                      \n"
 484             : [ret] "=&r" (ret)
 485             : [ptr] "r"   (p)
 486             : "cc", "memory");
 487         return (ret);
 488 }
 489
 490 static __inline uint64_t
 491 atomic_load_acq_64(volatile uint64_t *p)
 492 {
 493         uint64_t ret;
 494
 495         ret = atomic_load_64(p);
 496         dmb();
 497         return (ret);
 498 }
 499
 500 static __inline u_long
 501 atomic_load_acq_long(volatile u_long *p)
 502 {
 503         u_long v;
 504
 505         v = *p;
 506         dmb();
 507         return (v);
 508 }
 509
 510 static __inline uint32_t
 511 atomic_readandclear_32(volatile uint32_t *p)
 512 {
 513         uint32_t ret, tmp = 0, tmp2 = 0;
 514
 515         __asm __volatile(
 516             "1: ldrex   %0, [%3]        \n"
 517             "   mov     %1, #0          \n"
 518             "   strex   %2, %1, [%3]    \n"
 519             "   cmp     %2, #0          \n"
 520             "   it      ne              \n"
 521             "   bne     1b              \n"
 522             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
 523             : : "cc", "memory");
 524         return (ret);
 525 }
 526
 527 static __inline uint64_t
 528 atomic_readandclear_64(volatile uint64_t *p)
 529 {
 530         uint64_t ret, tmp;
 531         uint32_t exflag;
 532
 533         __asm __volatile(
 534             "1:                                                 \n"
 535             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 536             "   mov     %Q[tmp], #0                             \n"
 537             "   mov     %R[tmp], #0                             \n"
 538             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 539             "   teq     %[exf], #0                              \n"
 540             "   it      ne                                      \n"
 541             "   bne     1b                                      \n"
 542             : [ret] "=&r" (ret),
 543               [exf] "=&r" (exflag),
 544               [tmp] "=&r" (tmp)
 545             : [ptr] "r"   (p)
 546             : "cc", "memory");
 547         return (ret);
 548 }
 549
 550 static __inline u_long
 551 atomic_readandclear_long(volatile u_long *p)
 552 {
 553
 554         return (atomic_readandclear_32((volatile uint32_t *)p));
 555 }
 556
 557 static __inline void
 558 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
 559 {
 560         uint32_t tmp = 0, tmp2 = 0;
 561
 562         __asm __volatile(
 563             "1: ldrex   %0, [%2]        \n"
 564             "   orr     %0, %0, %3      \n"
 565             "   strex   %1, %0, [%2]    \n"
 566             "   cmp     %1, #0          \n"
 567             "   it      ne              \n"
 568             "   bne     1b              \n"
 569             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 570             : : "cc", "memory");
 571 }
 572
 573 static __inline void
 574 atomic_set_64(volatile uint64_t *p, uint64_t val)
 575 {
 576         uint64_t tmp;
 577         uint32_t exflag;
 578
 579         __asm __volatile(
 580             "1:                                                 \n"
 581             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 582             "   orr     %Q[tmp], %Q[val]                        \n"
 583             "   orr     %R[tmp], %R[val]                        \n"
 584             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 585             "   teq     %[exf], #0                              \n"
 586             "   it      ne                                      \n"
 587             "   bne     1b                                      \n"
 588             : [exf] "=&r" (exflag),
 589               [tmp] "=&r" (tmp)
 590             : [ptr] "r"   (p),
 591               [val] "r"   (val)
 592             : "cc", "memory");
 593 }
 594
 595 static __inline void
 596 atomic_set_long(volatile u_long *address, u_long setmask)
 597 {
 598
 599         atomic_set_32((volatile uint32_t *)address, setmask);
 600 }
 601
 602 ATOMIC_ACQ_REL(set, 32)
 603 ATOMIC_ACQ_REL(set, 64)
 604 ATOMIC_ACQ_REL_LONG(set)
 605
 606 static __inline void
 607 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 608 {
 609         uint32_t tmp = 0, tmp2 = 0;
 610
 611         __asm __volatile(
 612             "1: ldrex   %0, [%2]        \n"
 613             "   sub     %0, %0, %3      \n"
 614             "   strex   %1, %0, [%2]    \n"
 615             "   cmp     %1, #0          \n"
 616             "   it      ne              \n"
 617             "   bne     1b              \n"
 618             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 619             : : "cc", "memory");
 620 }
 621
 622 static __inline void
 623 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 624 {
 625         uint64_t tmp;
 626         uint32_t exflag;
 627
 628         __asm __volatile(
 629             "1:                                                 \n"
 630             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 631             "   subs    %Q[tmp], %Q[val]                        \n"
 632             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
 633             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 634             "   teq     %[exf], #0                              \n"
 635             "   it      ne                                      \n"
 636             "   bne     1b                                      \n"
 637             : [exf] "=&r" (exflag),
 638               [tmp] "=&r" (tmp)
 639             : [ptr] "r"   (p),
 640               [val] "r"   (val)
 641             : "cc", "memory");
 642 }
 643
 644 static __inline void
 645 atomic_subtract_long(volatile u_long *p, u_long val)
 646 {
 647
 648         atomic_subtract_32((volatile uint32_t *)p, val);
 649 }
 650
 651 ATOMIC_ACQ_REL(subtract, 32)
 652 ATOMIC_ACQ_REL(subtract, 64)
 653 ATOMIC_ACQ_REL_LONG(subtract)
 654
 655 static __inline void
 656 atomic_store_64(volatile uint64_t *p, uint64_t val)
 657 {
 658         uint64_t tmp;
 659         uint32_t exflag;
 660
 661         /*
 662          * The only way to atomically store 64 bits is with STREXD, which will
 663          * succeed only if paired up with a preceeding LDREXD using the same
 664          * address, so we read and discard the existing value before storing.
 665          */
 666         __asm __volatile(
 667             "1:                                                 \n"
 668             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 669             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 670             "   teq     %[exf], #0                              \n"
 671             "   it      ne                                      \n"
 672             "   bne     1b                                      \n"
 673             : [tmp] "=&r" (tmp),
 674               [exf] "=&r" (exflag)
 675             : [ptr] "r"   (p),
 676               [val] "r"   (val)
 677             : "cc", "memory");
 678 }
 679
 680 static __inline void
 681 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
 682 {
 683
 684         dmb();
 685         *p = v;
 686 }
 687
 688 static __inline void
 689 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 690 {
 691
 692         dmb();
 693         atomic_store_64(p, val);
 694 }
 695
 696 static __inline void
 697 atomic_store_rel_long(volatile u_long *p, u_long v)
 698 {
 699
 700         dmb();
 701         *p = v;
 702 }
 703
 704 static __inline int
 705 atomic_testandset_32(volatile uint32_t *p, u_int v)
 706 {
 707         uint32_t tmp, tmp2, res, mask;
 708
 709         mask = 1u << (v & 0x1f);
 710         tmp = tmp2 = 0;
 711         __asm __volatile(
 712         "1:     ldrex   %0, [%4]        \n"
 713         "       orr     %1, %0, %3      \n"
 714         "       strex   %2, %1, [%4]    \n"
 715         "       cmp     %2, #0          \n"
 716         "       it      ne              \n"
 717         "       bne     1b              \n"
 718         : "=&r" (res), "=&r" (tmp), "=&r" (tmp2)
 719         : "r" (mask), "r" (p)
 720         : "cc", "memory");
 721         return ((res & mask) != 0);
 722 }
 723
 724 static __inline int
 725 atomic_testandset_int(volatile u_int *p, u_int v)
 726 {
 727
 728         return (atomic_testandset_32((volatile uint32_t *)p, v));
 729 }
 730
 731 static __inline int
 732 atomic_testandset_long(volatile u_long *p, u_int v)
 733 {
 734
 735         return (atomic_testandset_32((volatile uint32_t *)p, v));
 736 }
 737
 738 static __inline int
 739 atomic_testandset_64(volatile uint64_t *p, u_int v)
 740 {
 741         volatile uint32_t *p32;
 742
 743         p32 = (volatile uint32_t *)p;
 744         /* Assume little-endian */
 745         if (v >= 32) {
 746                 v &= 0x1f;
 747                 p32++;
 748         }
 749         return (atomic_testandset_32(p32, v));
 750 }
 751
 752 static __inline uint32_t
 753 atomic_swap_32(volatile uint32_t *p, uint32_t v)
 754 {
 755         uint32_t ret, exflag;
 756
 757         __asm __volatile(
 758             "1: ldrex   %[ret], [%[ptr]]                \n"
 759             "   strex   %[exf], %[val], [%[ptr]]        \n"
 760             "   teq     %[exf], #0                      \n"
 761             "   it      ne                              \n"
 762             "   bne     1b                              \n"
 763             : [ret] "=&r"  (ret),
 764               [exf] "=&r" (exflag)
 765             : [val] "r"  (v),
 766               [ptr] "r"  (p)
 767             : "cc", "memory");
 768         return (ret);
 769 }
 770
 771 static __inline uint64_t
 772 atomic_swap_64(volatile uint64_t *p, uint64_t v)
 773 {
 774         uint64_t ret;
 775         uint32_t exflag;
 776
 777         __asm __volatile(
 778             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 779             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 780             "   teq     %[exf], #0                              \n"
 781             "   it      ne                                      \n"
 782             "   bne     1b                                      \n"
 783             : [ret] "=&r" (ret),
 784               [exf] "=&r" (exflag)
 785             : [val] "r"   (v),
 786               [ptr] "r"   (p)
 787             : "cc", "memory");
 788         return (ret);
 789 }
 790
 791 #undef ATOMIC_ACQ_REL
 792 #undef ATOMIC_ACQ_REL_LONG
 793
 794 static __inline void
 795 atomic_thread_fence_acq(void)
 796 {
 797
 798         dmb();
 799 }
 800
 801 static __inline void
 802 atomic_thread_fence_rel(void)
 803 {
 804
 805         dmb();
 806 }
 807
 808 static __inline void
 809 atomic_thread_fence_acq_rel(void)
 810 {
 811
 812         dmb();
 813 }
 814
 815 static __inline void
 816 atomic_thread_fence_seq_cst(void)
 817 {
 818
 819         dmb();
 820 }
 821
 822 #endif /* _MACHINE_ATOMIC_V6_H_ */