sys/arm/include/atomic-v6.h

   1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
   2
   3 /*-
   4  * Copyright (C) 2003-2004 Olivier Houchard
   5  * Copyright (C) 1994-1997 Mark Brinicombe
   6  * Copyright (C) 1994 Brini
   7  * All rights reserved.
   8  *
   9  * This code is derived from software written for Brini by Mark Brinicombe
  10  *
  11  * Redistribution and use in source and binary forms, with or without
  12  * modification, are permitted provided that the following conditions
  13  * are met:
  14  * 1. Redistributions of source code must retain the above copyright
  15  *    notice, this list of conditions and the following disclaimer.
  16  * 2. Redistributions in binary form must reproduce the above copyright
  17  *    notice, this list of conditions and the following disclaimer in the
  18  *    documentation and/or other materials provided with the distribution.
  19  * 3. All advertising materials mentioning features or use of this software
  20  *    must display the following acknowledgement:
  21  *      This product includes software developed by Brini.
  22  * 4. The name of Brini may not be used to endorse or promote products
  23  *    derived from this software without specific prior written permission.
  24  *
  25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
  26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  35  *
  36  * $FreeBSD$
  37  */
  38
  39 #ifndef _MACHINE_ATOMIC_V6_H_
  40 #define _MACHINE_ATOMIC_V6_H_
  41
  42 #ifndef _MACHINE_ATOMIC_H_
  43 #error Do not include this file directly, use <machine/atomic.h>
  44 #endif
  45
  46 #if __ARM_ARCH >= 7
  47 #define isb()  __asm __volatile("isb" : : : "memory")
  48 #define dsb()  __asm __volatile("dsb" : : : "memory")
  49 #define dmb()  __asm __volatile("dmb" : : : "memory")
  50 #elif __ARM_ARCH >= 6
  51 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
  52 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
  53 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
  54 #else
  55 #error Only use this file with ARMv6 and later
  56 #endif
  57
  58 #define mb()   dmb()
  59 #define wmb()  dmb()
  60 #define rmb()  dmb()
  61
  62 #define ARM_HAVE_ATOMIC64
  63
  64 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
  65 static __inline void                                                    \
  66 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
  67 {                                                                       \
  68         atomic_##NAME##_long(p, v);                                     \
  69         dmb();                                                          \
  70 }                                                                       \
  71                                                                         \
  72 static __inline  void                                                   \
  73 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
  74 {                                                                       \
  75         dmb();                                                          \
  76         atomic_##NAME##_long(p, v);                                     \
  77 }
  78
  79 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
  80 static __inline  void                                                   \
  81 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  82 {                                                                       \
  83         atomic_##NAME##_##WIDTH(p, v);                                  \
  84         dmb();                                                          \
  85 }                                                                       \
  86                                                                         \
  87 static __inline  void                                                   \
  88 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  89 {                                                                       \
  90         dmb();                                                          \
  91         atomic_##NAME##_##WIDTH(p, v);                                  \
  92 }
  93
  94
  95 static __inline void
  96 atomic_add_32(volatile uint32_t *p, uint32_t val)
  97 {
  98         uint32_t tmp = 0, tmp2 = 0;
  99
 100         __asm __volatile(
 101             "1: ldrex   %0, [%2]        \n"
 102             "   add     %0, %0, %3      \n"
 103             "   strex   %1, %0, [%2]    \n"
 104             "   cmp     %1, #0          \n"
 105             "   it      ne              \n"
 106             "   bne     1b              \n"
 107             : "=&r" (tmp), "+r" (tmp2)
 108             ,"+r" (p), "+r" (val) : : "cc", "memory");
 109 }
 110
 111 static __inline void
 112 atomic_add_64(volatile uint64_t *p, uint64_t val)
 113 {
 114         uint64_t tmp;
 115         uint32_t exflag;
 116
 117         __asm __volatile(
 118             "1:                                                 \n"
 119             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 120             "   adds    %Q[tmp], %Q[val]                        \n"
 121             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
 122             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 123             "   teq     %[exf], #0                              \n"
 124             "   it      ne                                      \n"
 125             "   bne     1b                                      \n"
 126             : [exf] "=&r" (exflag),
 127               [tmp] "=&r" (tmp)
 128             : [ptr] "r"   (p),
 129               [val] "r"   (val)
 130             : "cc", "memory");
 131 }
 132
 133 static __inline void
 134 atomic_add_long(volatile u_long *p, u_long val)
 135 {
 136
 137         atomic_add_32((volatile uint32_t *)p, val);
 138 }
 139
 140 ATOMIC_ACQ_REL(add, 32)
 141 ATOMIC_ACQ_REL(add, 64)
 142 ATOMIC_ACQ_REL_LONG(add)
 143
 144 static __inline void
 145 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
 146 {
 147         uint32_t tmp = 0, tmp2 = 0;
 148
 149         __asm __volatile(
 150             "1: ldrex   %0, [%2]        \n"
 151             "   bic     %0, %0, %3      \n"
 152             "   strex   %1, %0, [%2]    \n"
 153             "   cmp     %1, #0          \n"
 154             "   it      ne              \n"
 155             "   bne     1b              \n"
 156             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 157             : : "cc", "memory");
 158 }
 159
 160 static __inline void
 161 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 162 {
 163         uint64_t tmp;
 164         uint32_t exflag;
 165
 166         __asm __volatile(
 167             "1:                                                 \n"
 168             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 169             "   bic     %Q[tmp], %Q[val]                        \n"
 170             "   bic     %R[tmp], %R[val]                        \n"
 171             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 172             "   teq     %[exf], #0                              \n"
 173             "   it      ne                                      \n"
 174             "   bne     1b                                      \n"
 175             : [exf] "=&r" (exflag),
 176               [tmp] "=&r" (tmp)
 177             : [ptr] "r"   (p),
 178               [val] "r"   (val)
 179             : "cc", "memory");
 180 }
 181
 182 static __inline void
 183 atomic_clear_long(volatile u_long *address, u_long setmask)
 184 {
 185
 186         atomic_clear_32((volatile uint32_t *)address, setmask);
 187 }
 188
 189 ATOMIC_ACQ_REL(clear, 32)
 190 ATOMIC_ACQ_REL(clear, 64)
 191 ATOMIC_ACQ_REL_LONG(clear)
 192
 193 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
 194     {                                                         \
 195         TYPE tmp;                                             \
 196                                                               \
 197         __asm __volatile(                                     \
 198             "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
 199             "   ldr            %[ret], [%[oldv]]         \n"  \
 200             "   teq            %[tmp], %[ret]            \n"  \
 201             "   ittee          ne                        \n"  \
 202             "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
 203             "   movne          %[ret], #0                \n"  \
 204             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
 205             "   eorseq         %[ret], #1                \n"  \
 206             "   beq            1b                        \n"  \
 207             : [ret] "=&r" (RET),                              \
 208               [tmp] "=&r" (tmp)                               \
 209             : [ptr] "r"   (_ptr),                             \
 210               [oldv] "r"  (_old),                             \
 211               [newv] "r"  (_new)                              \
 212             : "cc", "memory");                                \
 213     }
 214
 215 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
 216     {                                                              \
 217         uint64_t cmp, tmp;                                         \
 218                                                                    \
 219         __asm __volatile(                                          \
 220             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
 221             "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
 222             "   teq      %Q[tmp], %Q[cmp]                     \n"  \
 223             "   it       eq                                   \n"  \
 224             "   teqeq    %R[tmp], %R[cmp]                     \n"  \
 225             "   ittee    ne                                   \n"  \
 226             "   movne    %[ret], #0                           \n"  \
 227             "   strdne   %[cmp], [%[oldv]]                    \n"  \
 228             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
 229             "   eorseq   %[ret], #1                           \n"  \
 230             "   beq      1b                                   \n"  \
 231             : [ret] "=&r" (RET),                                   \
 232               [cmp] "=&r" (cmp),                                   \
 233               [tmp] "=&r" (tmp)                                    \
 234             : [ptr] "r"   (_ptr),                                  \
 235               [oldv] "r"  (_old),                                  \
 236               [newv] "r"  (_new)                                   \
 237             : "cc", "memory");                                     \
 238     }
 239
 240 static __inline int
 241 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 242 {
 243         int ret;
 244
 245         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 246         return (ret);
 247 }
 248
 249 static __inline int
 250 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 251 {
 252         int ret;
 253
 254         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 255         dmb();
 256         return (ret);
 257 }
 258
 259 static __inline int
 260 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 261 {
 262         int ret;
 263
 264         dmb();
 265         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 266         return (ret);
 267 }
 268
 269 static __inline int
 270 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 271 {
 272         int ret;
 273
 274         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 275         return (ret);
 276 }
 277
 278 static __inline int
 279 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 280 {
 281         int ret;
 282
 283         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 284         dmb();
 285         return (ret);
 286 }
 287
 288 static __inline int
 289 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 290 {
 291         int ret;
 292
 293         dmb();
 294         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 295         return (ret);
 296 }
 297
 298 static __inline int
 299 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 300 {
 301         int ret;
 302
 303         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 304         return (ret);
 305 }
 306
 307 static __inline int
 308 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 309 {
 310         int ret;
 311
 312         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 313         dmb();
 314         return (ret);
 315 }
 316
 317 static __inline int
 318 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 319 {
 320         int ret;
 321
 322         dmb();
 323         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 324         return (ret);
 325 }
 326
 327 static __inline int
 328 atomic_fcmpset_long(volatile long *_ptr, long *_old, long _new)
 329 {
 330         int ret;
 331
 332         ATOMIC_FCMPSET_CODE(ret, long, "");
 333         return (ret);
 334 }
 335
 336 static __inline int
 337 atomic_fcmpset_acq_long(volatile long *_ptr, long *_old, long _new)
 338 {
 339         int ret;
 340
 341         ATOMIC_FCMPSET_CODE(ret, long, "");
 342         dmb();
 343         return (ret);
 344 }
 345
 346 static __inline int
 347 atomic_fcmpset_rel_long(volatile long *_ptr, long *_old, long _new)
 348 {
 349         int ret;
 350
 351         dmb();
 352         ATOMIC_FCMPSET_CODE(ret, long, "");
 353         return (ret);
 354 }
 355
 356 static __inline int
 357 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 358 {
 359         int ret;
 360
 361         ATOMIC_FCMPSET_CODE64(ret);
 362         return (ret);
 363 }
 364
 365 static __inline int
 366 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 367 {
 368         int ret;
 369
 370         ATOMIC_FCMPSET_CODE64(ret);
 371         dmb();
 372         return (ret);
 373 }
 374
 375 static __inline int
 376 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 377 {
 378         int ret;
 379
 380         dmb();
 381         ATOMIC_FCMPSET_CODE64(ret);
 382         return (ret);
 383 }
 384
 385 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
 386     {                                                        \
 387         __asm __volatile(                                    \
 388             "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
 389             "   teq            %[ret], %[oldv]           \n" \
 390             "   itee           ne                        \n" \
 391             "   movne          %[ret], #0                \n" \
 392             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
 393             "   eorseq         %[ret], #1                \n" \
 394             "   beq            1b                        \n" \
 395             : [ret] "=&r" (RET)                              \
 396             : [ptr] "r"   (_ptr),                            \
 397               [oldv] "r"  (_old),                            \
 398               [newv] "r"  (_new)                             \
 399             : "cc", "memory");                               \
 400     }
 401
 402 #define ATOMIC_CMPSET_CODE64(RET)                                 \
 403     {                                                             \
 404         uint64_t tmp;                                             \
 405                                                                   \
 406         __asm __volatile(                                         \
 407             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
 408             "   teq      %Q[tmp], %Q[oldv]                    \n" \
 409             "   it       eq                                   \n" \
 410             "   teqeq    %R[tmp], %R[oldv]                    \n" \
 411             "   itee     ne                                   \n" \
 412             "   movne    %[ret], #0                           \n" \
 413             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
 414             "   eorseq   %[ret], #1                           \n" \
 415             "   beq      1b                                   \n" \
 416             : [ret] "=&r" (RET),                                  \
 417               [tmp] "=&r" (tmp)                                   \
 418             : [ptr] "r"   (_ptr),                                 \
 419               [oldv] "r"  (_old),                                 \
 420               [newv] "r"  (_new)                                  \
 421             : "cc", "memory");                                    \
 422     }
 423
 424 static __inline int
 425 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 426 {
 427         int ret;
 428
 429         ATOMIC_CMPSET_CODE(ret, "b");
 430         return (ret);
 431 }
 432
 433 static __inline int
 434 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 435 {
 436         int ret;
 437
 438         ATOMIC_CMPSET_CODE(ret, "b");
 439         dmb();
 440         return (ret);
 441 }
 442
 443 static __inline int
 444 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 445 {
 446         int ret;
 447
 448         dmb();
 449         ATOMIC_CMPSET_CODE(ret, "b");
 450         return (ret);
 451 }
 452
 453 static __inline int
 454 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 455 {
 456         int ret;
 457
 458         ATOMIC_CMPSET_CODE(ret, "h");
 459         return (ret);
 460 }
 461
 462 static __inline int
 463 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 464 {
 465         int ret;
 466
 467         ATOMIC_CMPSET_CODE(ret, "h");
 468         dmb();
 469         return (ret);
 470 }
 471
 472 static __inline int
 473 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 474 {
 475         int ret;
 476
 477         dmb();
 478         ATOMIC_CMPSET_CODE(ret, "h");
 479         return (ret);
 480 }
 481
 482 static __inline int
 483 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 484 {
 485         int ret;
 486
 487         ATOMIC_CMPSET_CODE(ret, "");
 488         return (ret);
 489 }
 490
 491 static __inline int
 492 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 493 {
 494         int ret;
 495
 496         ATOMIC_CMPSET_CODE(ret, "");
 497         dmb();
 498         return (ret);
 499 }
 500
 501 static __inline int
 502 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 503 {
 504         int ret;
 505
 506         dmb();
 507         ATOMIC_CMPSET_CODE(ret, "");
 508         return (ret);
 509 }
 510
 511 static __inline int
 512 atomic_cmpset_long(volatile long *_ptr, long _old, long _new)
 513 {
 514         int ret;
 515
 516         ATOMIC_CMPSET_CODE(ret, "");
 517         return (ret);
 518 }
 519
 520 static __inline int
 521 atomic_cmpset_acq_long(volatile long *_ptr, long _old, long _new)
 522 {
 523         int ret;
 524
 525         ATOMIC_CMPSET_CODE(ret, "");
 526         dmb();
 527         return (ret);
 528 }
 529
 530 static __inline int
 531 atomic_cmpset_rel_long(volatile long *_ptr, long _old, long _new)
 532 {
 533         int ret;
 534
 535         dmb();
 536         ATOMIC_CMPSET_CODE(ret, "");
 537         return (ret);
 538 }
 539
 540 static __inline int
 541 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 542 {
 543         int ret;
 544
 545         ATOMIC_CMPSET_CODE64(ret);
 546         return (ret);
 547 }
 548
 549 static __inline int
 550 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 551 {
 552         int ret;
 553
 554         ATOMIC_CMPSET_CODE64(ret);
 555         dmb();
 556         return (ret);
 557 }
 558
 559 static __inline int
 560 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 561 {
 562         int ret;
 563
 564         dmb();
 565         ATOMIC_CMPSET_CODE64(ret);
 566         return (ret);
 567 }
 568
 569 static __inline uint32_t
 570 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 571 {
 572         uint32_t tmp = 0, tmp2 = 0, ret = 0;
 573
 574         __asm __volatile(
 575             "1: ldrex   %0, [%3]        \n"
 576             "   add     %1, %0, %4      \n"
 577             "   strex   %2, %1, [%3]    \n"
 578             "   cmp     %2, #0          \n"
 579             "   it      ne              \n"
 580             "   bne     1b              \n"
 581             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 582             : : "cc", "memory");
 583         return (ret);
 584 }
 585
 586 static __inline uint64_t
 587 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 588 {
 589         uint64_t ret, tmp;
 590         uint32_t exflag;
 591
 592         __asm __volatile(
 593             "1:                                                 \n"
 594             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 595             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
 596             "   adc     %R[tmp], %R[ret], %R[val]               \n"
 597             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 598             "   teq     %[exf], #0                              \n"
 599             "   it      ne                                      \n"
 600             "   bne     1b                                      \n"
 601             : [ret] "=&r" (ret),
 602               [exf] "=&r" (exflag),
 603               [tmp] "=&r" (tmp)
 604             : [ptr] "r"   (p),
 605               [val] "r"   (val)
 606             : "cc", "memory");
 607         return (ret);
 608 }
 609
 610 static __inline u_long
 611 atomic_fetchadd_long(volatile u_long *p, u_long val)
 612 {
 613
 614         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
 615 }
 616
 617 static __inline uint32_t
 618 atomic_load_acq_32(volatile uint32_t *p)
 619 {
 620         uint32_t v;
 621
 622         v = *p;
 623         dmb();
 624         return (v);
 625 }
 626
 627 static __inline uint64_t
 628 atomic_load_64(volatile uint64_t *p)
 629 {
 630         uint64_t ret;
 631
 632         /*
 633          * The only way to atomically load 64 bits is with LDREXD which puts the
 634          * exclusive monitor into the exclusive state, so reset it to open state
 635          * with CLREX because we don't actually need to store anything.
 636          */
 637         __asm __volatile(
 638             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
 639             "clrex                                      \n"
 640             : [ret] "=&r" (ret)
 641             : [ptr] "r"   (p)
 642             : "cc", "memory");
 643         return (ret);
 644 }
 645
 646 static __inline uint64_t
 647 atomic_load_acq_64(volatile uint64_t *p)
 648 {
 649         uint64_t ret;
 650
 651         ret = atomic_load_64(p);
 652         dmb();
 653         return (ret);
 654 }
 655
 656 static __inline u_long
 657 atomic_load_acq_long(volatile u_long *p)
 658 {
 659         u_long v;
 660
 661         v = *p;
 662         dmb();
 663         return (v);
 664 }
 665
 666 static __inline uint32_t
 667 atomic_readandclear_32(volatile uint32_t *p)
 668 {
 669         uint32_t ret, tmp = 0, tmp2 = 0;
 670
 671         __asm __volatile(
 672             "1: ldrex   %0, [%3]        \n"
 673             "   mov     %1, #0          \n"
 674             "   strex   %2, %1, [%3]    \n"
 675             "   cmp     %2, #0          \n"
 676             "   it      ne              \n"
 677             "   bne     1b              \n"
 678             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
 679             : : "cc", "memory");
 680         return (ret);
 681 }
 682
 683 static __inline uint64_t
 684 atomic_readandclear_64(volatile uint64_t *p)
 685 {
 686         uint64_t ret, tmp;
 687         uint32_t exflag;
 688
 689         __asm __volatile(
 690             "1:                                                 \n"
 691             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 692             "   mov     %Q[tmp], #0                             \n"
 693             "   mov     %R[tmp], #0                             \n"
 694             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 695             "   teq     %[exf], #0                              \n"
 696             "   it      ne                                      \n"
 697             "   bne     1b                                      \n"
 698             : [ret] "=&r" (ret),
 699               [exf] "=&r" (exflag),
 700               [tmp] "=&r" (tmp)
 701             : [ptr] "r"   (p)
 702             : "cc", "memory");
 703         return (ret);
 704 }
 705
 706 static __inline u_long
 707 atomic_readandclear_long(volatile u_long *p)
 708 {
 709
 710         return (atomic_readandclear_32((volatile uint32_t *)p));
 711 }
 712
 713 static __inline void
 714 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
 715 {
 716         uint32_t tmp = 0, tmp2 = 0;
 717
 718         __asm __volatile(
 719             "1: ldrex   %0, [%2]        \n"
 720             "   orr     %0, %0, %3      \n"
 721             "   strex   %1, %0, [%2]    \n"
 722             "   cmp     %1, #0          \n"
 723             "   it      ne              \n"
 724             "   bne     1b              \n"
 725             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 726             : : "cc", "memory");
 727 }
 728
 729 static __inline void
 730 atomic_set_64(volatile uint64_t *p, uint64_t val)
 731 {
 732         uint64_t tmp;
 733         uint32_t exflag;
 734
 735         __asm __volatile(
 736             "1:                                                 \n"
 737             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 738             "   orr     %Q[tmp], %Q[val]                        \n"
 739             "   orr     %R[tmp], %R[val]                        \n"
 740             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 741             "   teq     %[exf], #0                              \n"
 742             "   it      ne                                      \n"
 743             "   bne     1b                                      \n"
 744             : [exf] "=&r" (exflag),
 745               [tmp] "=&r" (tmp)
 746             : [ptr] "r"   (p),
 747               [val] "r"   (val)
 748             : "cc", "memory");
 749 }
 750
 751 static __inline void
 752 atomic_set_long(volatile u_long *address, u_long setmask)
 753 {
 754
 755         atomic_set_32((volatile uint32_t *)address, setmask);
 756 }
 757
 758 ATOMIC_ACQ_REL(set, 32)
 759 ATOMIC_ACQ_REL(set, 64)
 760 ATOMIC_ACQ_REL_LONG(set)
 761
 762 static __inline void
 763 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 764 {
 765         uint32_t tmp = 0, tmp2 = 0;
 766
 767         __asm __volatile(
 768             "1: ldrex   %0, [%2]        \n"
 769             "   sub     %0, %0, %3      \n"
 770             "   strex   %1, %0, [%2]    \n"
 771             "   cmp     %1, #0          \n"
 772             "   it      ne              \n"
 773             "   bne     1b              \n"
 774             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 775             : : "cc", "memory");
 776 }
 777
 778 static __inline void
 779 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 780 {
 781         uint64_t tmp;
 782         uint32_t exflag;
 783
 784         __asm __volatile(
 785             "1:                                                 \n"
 786             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 787             "   subs    %Q[tmp], %Q[val]                        \n"
 788             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
 789             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 790             "   teq     %[exf], #0                              \n"
 791             "   it      ne                                      \n"
 792             "   bne     1b                                      \n"
 793             : [exf] "=&r" (exflag),
 794               [tmp] "=&r" (tmp)
 795             : [ptr] "r"   (p),
 796               [val] "r"   (val)
 797             : "cc", "memory");
 798 }
 799
 800 static __inline void
 801 atomic_subtract_long(volatile u_long *p, u_long val)
 802 {
 803
 804         atomic_subtract_32((volatile uint32_t *)p, val);
 805 }
 806
 807 ATOMIC_ACQ_REL(subtract, 32)
 808 ATOMIC_ACQ_REL(subtract, 64)
 809 ATOMIC_ACQ_REL_LONG(subtract)
 810
 811 static __inline void
 812 atomic_store_64(volatile uint64_t *p, uint64_t val)
 813 {
 814         uint64_t tmp;
 815         uint32_t exflag;
 816
 817         /*
 818          * The only way to atomically store 64 bits is with STREXD, which will
 819          * succeed only if paired up with a preceeding LDREXD using the same
 820          * address, so we read and discard the existing value before storing.
 821          */
 822         __asm __volatile(
 823             "1:                                                 \n"
 824             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 825             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 826             "   teq     %[exf], #0                              \n"
 827             "   it      ne                                      \n"
 828             "   bne     1b                                      \n"
 829             : [tmp] "=&r" (tmp),
 830               [exf] "=&r" (exflag)
 831             : [ptr] "r"   (p),
 832               [val] "r"   (val)
 833             : "cc", "memory");
 834 }
 835
 836 static __inline void
 837 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
 838 {
 839
 840         dmb();
 841         *p = v;
 842 }
 843
 844 static __inline void
 845 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 846 {
 847
 848         dmb();
 849         atomic_store_64(p, val);
 850 }
 851
 852 static __inline void
 853 atomic_store_rel_long(volatile u_long *p, u_long v)
 854 {
 855
 856         dmb();
 857         *p = v;
 858 }
 859
 860 static __inline int
 861 atomic_testandset_32(volatile uint32_t *p, u_int v)
 862 {
 863         uint32_t tmp, tmp2, res, mask;
 864
 865         mask = 1u << (v & 0x1f);
 866         tmp = tmp2 = 0;
 867         __asm __volatile(
 868         "1:     ldrex   %0, [%4]        \n"
 869         "       orr     %1, %0, %3      \n"
 870         "       strex   %2, %1, [%4]    \n"
 871         "       cmp     %2, #0          \n"
 872         "       it      ne              \n"
 873         "       bne     1b              \n"
 874         : "=&r" (res), "=&r" (tmp), "=&r" (tmp2)
 875         : "r" (mask), "r" (p)
 876         : "cc", "memory");
 877         return ((res & mask) != 0);
 878 }
 879
 880 static __inline int
 881 atomic_testandset_int(volatile u_int *p, u_int v)
 882 {
 883
 884         return (atomic_testandset_32((volatile uint32_t *)p, v));
 885 }
 886
 887 static __inline int
 888 atomic_testandset_long(volatile u_long *p, u_int v)
 889 {
 890
 891         return (atomic_testandset_32((volatile uint32_t *)p, v));
 892 }
 893
 894 static __inline int
 895 atomic_testandset_64(volatile uint64_t *p, u_int v)
 896 {
 897         volatile uint32_t *p32;
 898
 899         p32 = (volatile uint32_t *)p;
 900         /* Assume little-endian */
 901         if (v >= 32) {
 902                 v &= 0x1f;
 903                 p32++;
 904         }
 905         return (atomic_testandset_32(p32, v));
 906 }
 907
 908 static __inline uint32_t
 909 atomic_swap_32(volatile uint32_t *p, uint32_t v)
 910 {
 911         uint32_t ret, exflag;
 912
 913         __asm __volatile(
 914             "1: ldrex   %[ret], [%[ptr]]                \n"
 915             "   strex   %[exf], %[val], [%[ptr]]        \n"
 916             "   teq     %[exf], #0                      \n"
 917             "   it      ne                              \n"
 918             "   bne     1b                              \n"
 919             : [ret] "=&r"  (ret),
 920               [exf] "=&r" (exflag)
 921             : [val] "r"  (v),
 922               [ptr] "r"  (p)
 923             : "cc", "memory");
 924         return (ret);
 925 }
 926
 927 static __inline uint64_t
 928 atomic_swap_64(volatile uint64_t *p, uint64_t v)
 929 {
 930         uint64_t ret;
 931         uint32_t exflag;
 932
 933         __asm __volatile(
 934             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 935             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 936             "   teq     %[exf], #0                              \n"
 937             "   it      ne                                      \n"
 938             "   bne     1b                                      \n"
 939             : [ret] "=&r" (ret),
 940               [exf] "=&r" (exflag)
 941             : [val] "r"   (v),
 942               [ptr] "r"   (p)
 943             : "cc", "memory");
 944         return (ret);
 945 }
 946
 947 #undef ATOMIC_ACQ_REL
 948 #undef ATOMIC_ACQ_REL_LONG
 949
 950 static __inline void
 951 atomic_thread_fence_acq(void)
 952 {
 953
 954         dmb();
 955 }
 956
 957 static __inline void
 958 atomic_thread_fence_rel(void)
 959 {
 960
 961         dmb();
 962 }
 963
 964 static __inline void
 965 atomic_thread_fence_acq_rel(void)
 966 {
 967
 968         dmb();
 969 }
 970
 971 static __inline void
 972 atomic_thread_fence_seq_cst(void)
 973 {
 974
 975         dmb();
 976 }
 977
 978 #endif /* _MACHINE_ATOMIC_V6_H_ */