sys/arm/include/atomic.h

   1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
   2
   3 /*-
   4  * SPDX-License-Identifier: BSD-4-Clause
   5  *
   6  * Copyright (C) 2003-2004 Olivier Houchard
   7  * Copyright (C) 1994-1997 Mark Brinicombe
   8  * Copyright (C) 1994 Brini
   9  * All rights reserved.
  10  *
  11  * This code is derived from software written for Brini by Mark Brinicombe
  12  *
  13  * Redistribution and use in source and binary forms, with or without
  14  * modification, are permitted provided that the following conditions
  15  * are met:
  16  * 1. Redistributions of source code must retain the above copyright
  17  *    notice, this list of conditions and the following disclaimer.
  18  * 2. Redistributions in binary form must reproduce the above copyright
  19  *    notice, this list of conditions and the following disclaimer in the
  20  *    documentation and/or other materials provided with the distribution.
  21  * 3. All advertising materials mentioning features or use of this software
  22  *    must display the following acknowledgement:
  23  *      This product includes software developed by Brini.
  24  * 4. The name of Brini may not be used to endorse or promote products
  25  *    derived from this software without specific prior written permission.
  26  *
  27  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
  28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  30  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  32  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
  33  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  34  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  35  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  36  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37  */
  38
  39 #ifndef _MACHINE_ATOMIC_H_
  40 #define _MACHINE_ATOMIC_H_
  41
  42 #include <sys/atomic_common.h>
  43
  44 #if __ARM_ARCH >= 7
  45 #define isb()  __asm __volatile("isb" : : : "memory")
  46 #define dsb()  __asm __volatile("dsb" : : : "memory")
  47 #define dmb()  __asm __volatile("dmb" : : : "memory")
  48 #else
  49 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
  50 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
  51 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
  52 #endif
  53
  54 #define mb()   dmb()
  55 #define wmb()  dmb()
  56 #define rmb()  dmb()
  57
  58 #define ARM_HAVE_ATOMIC64
  59
  60 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
  61 static __inline void                                                    \
  62 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
  63 {                                                                       \
  64         atomic_##NAME##_long(p, v);                                     \
  65         dmb();                                                          \
  66 }                                                                       \
  67                                                                         \
  68 static __inline  void                                                   \
  69 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
  70 {                                                                       \
  71         dmb();                                                          \
  72         atomic_##NAME##_long(p, v);                                     \
  73 }
  74
  75 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
  76 static __inline  void                                                   \
  77 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  78 {                                                                       \
  79         atomic_##NAME##_##WIDTH(p, v);                                  \
  80         dmb();                                                          \
  81 }                                                                       \
  82                                                                         \
  83 static __inline  void                                                   \
  84 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
  85 {                                                                       \
  86         dmb();                                                          \
  87         atomic_##NAME##_##WIDTH(p, v);                                  \
  88 }
  89
  90 static __inline void
  91 atomic_add_32(volatile uint32_t *p, uint32_t val)
  92 {
  93         uint32_t tmp = 0, tmp2 = 0;
  94
  95         __asm __volatile(
  96             "1: ldrex   %0, [%2]        \n"
  97             "   add     %0, %0, %3      \n"
  98             "   strex   %1, %0, [%2]    \n"
  99             "   cmp     %1, #0          \n"
 100             "   it      ne              \n"
 101             "   bne     1b              \n"
 102             : "=&r" (tmp), "+r" (tmp2)
 103             ,"+r" (p), "+r" (val) : : "cc", "memory");
 104 }
 105
 106 static __inline void
 107 atomic_add_64(volatile uint64_t *p, uint64_t val)
 108 {
 109         uint64_t tmp;
 110         uint32_t exflag;
 111
 112         __asm __volatile(
 113             "1:                                                 \n"
 114             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 115             "   adds    %Q[tmp], %Q[val]                        \n"
 116             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
 117             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 118             "   teq     %[exf], #0                              \n"
 119             "   it      ne                                      \n"
 120             "   bne     1b                                      \n"
 121             : [exf] "=&r" (exflag),
 122               [tmp] "=&r" (tmp)
 123             : [ptr] "r"   (p),
 124               [val] "r"   (val)
 125             : "cc", "memory");
 126 }
 127
 128 static __inline void
 129 atomic_add_long(volatile u_long *p, u_long val)
 130 {
 131
 132         atomic_add_32((volatile uint32_t *)p, val);
 133 }
 134
 135 ATOMIC_ACQ_REL(add, 32)
 136 ATOMIC_ACQ_REL(add, 64)
 137 ATOMIC_ACQ_REL_LONG(add)
 138
 139 static __inline void
 140 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
 141 {
 142         uint32_t tmp = 0, tmp2 = 0;
 143
 144         __asm __volatile(
 145             "1: ldrex   %0, [%2]        \n"
 146             "   bic     %0, %0, %3      \n"
 147             "   strex   %1, %0, [%2]    \n"
 148             "   cmp     %1, #0          \n"
 149             "   it      ne              \n"
 150             "   bne     1b              \n"
 151             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 152             : : "cc", "memory");
 153 }
 154
 155 static __inline void
 156 atomic_clear_64(volatile uint64_t *p, uint64_t val)
 157 {
 158         uint64_t tmp;
 159         uint32_t exflag;
 160
 161         __asm __volatile(
 162             "1:                                                 \n"
 163             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 164             "   bic     %Q[tmp], %Q[val]                        \n"
 165             "   bic     %R[tmp], %R[val]                        \n"
 166             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 167             "   teq     %[exf], #0                              \n"
 168             "   it      ne                                      \n"
 169             "   bne     1b                                      \n"
 170             : [exf] "=&r" (exflag),
 171               [tmp] "=&r" (tmp)
 172             : [ptr] "r"   (p),
 173               [val] "r"   (val)
 174             : "cc", "memory");
 175 }
 176
 177 static __inline void
 178 atomic_clear_long(volatile u_long *address, u_long setmask)
 179 {
 180
 181         atomic_clear_32((volatile uint32_t *)address, setmask);
 182 }
 183
 184 ATOMIC_ACQ_REL(clear, 32)
 185 ATOMIC_ACQ_REL(clear, 64)
 186 ATOMIC_ACQ_REL_LONG(clear)
 187
 188 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
 189     {                                                         \
 190         TYPE tmp;                                             \
 191                                                               \
 192         __asm __volatile(                                     \
 193             "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
 194             "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
 195             "   teq            %[tmp], %[ret]            \n"  \
 196             "   ittee          ne                        \n"  \
 197             "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
 198             "   movne          %[ret], #0                \n"  \
 199             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
 200             "   eorseq         %[ret], #1                \n"  \
 201             "   beq            1b                        \n"  \
 202             : [ret] "=&r" (RET),                              \
 203               [tmp] "=&r" (tmp)                               \
 204             : [ptr] "r"   (_ptr),                             \
 205               [oldv] "r"  (_old),                             \
 206               [newv] "r"  (_new)                              \
 207             : "cc", "memory");                                \
 208     }
 209
 210 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
 211     {                                                              \
 212         uint64_t cmp, tmp;                                         \
 213                                                                    \
 214         __asm __volatile(                                          \
 215             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
 216             "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
 217             "   teq      %Q[tmp], %Q[cmp]                     \n"  \
 218             "   it       eq                                   \n"  \
 219             "   teqeq    %R[tmp], %R[cmp]                     \n"  \
 220             "   ittee    ne                                   \n"  \
 221             "   movne    %[ret], #0                           \n"  \
 222             "   strdne   %[cmp], [%[oldv]]                    \n"  \
 223             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
 224             "   eorseq   %[ret], #1                           \n"  \
 225             "   beq      1b                                   \n"  \
 226             : [ret] "=&r" (RET),                                   \
 227               [cmp] "=&r" (cmp),                                   \
 228               [tmp] "=&r" (tmp)                                    \
 229             : [ptr] "r"   (_ptr),                                  \
 230               [oldv] "r"  (_old),                                  \
 231               [newv] "r"  (_new)                                   \
 232             : "cc", "memory");                                     \
 233     }
 234
 235 static __inline int
 236 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 237 {
 238         int ret;
 239
 240         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 241         return (ret);
 242 }
 243 #define atomic_fcmpset_8        atomic_fcmpset_8
 244
 245 static __inline int
 246 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 247 {
 248         int ret;
 249
 250         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 251         dmb();
 252         return (ret);
 253 }
 254
 255 static __inline int
 256 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
 257 {
 258         int ret;
 259
 260         dmb();
 261         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
 262         return (ret);
 263 }
 264
 265 static __inline int
 266 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 267 {
 268         int ret;
 269
 270         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 271         return (ret);
 272 }
 273 #define atomic_fcmpset_16       atomic_fcmpset_16
 274
 275 static __inline int
 276 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 277 {
 278         int ret;
 279
 280         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 281         dmb();
 282         return (ret);
 283 }
 284
 285 static __inline int
 286 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
 287 {
 288         int ret;
 289
 290         dmb();
 291         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
 292         return (ret);
 293 }
 294
 295 static __inline int
 296 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 297 {
 298         int ret;
 299
 300         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 301         return (ret);
 302 }
 303
 304 static __inline int
 305 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 306 {
 307         int ret;
 308
 309         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 310         dmb();
 311         return (ret);
 312 }
 313
 314 static __inline int
 315 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
 316 {
 317         int ret;
 318
 319         dmb();
 320         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
 321         return (ret);
 322 }
 323
 324 static __inline int
 325 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 326 {
 327         int ret;
 328
 329         ATOMIC_FCMPSET_CODE(ret, u_long, "");
 330         return (ret);
 331 }
 332
 333 static __inline int
 334 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 335 {
 336         int ret;
 337
 338         ATOMIC_FCMPSET_CODE(ret, u_long, "");
 339         dmb();
 340         return (ret);
 341 }
 342
 343 static __inline int
 344 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
 345 {
 346         int ret;
 347
 348         dmb();
 349         ATOMIC_FCMPSET_CODE(ret, u_long, "");
 350         return (ret);
 351 }
 352
 353 static __inline int
 354 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 355 {
 356         int ret;
 357
 358         ATOMIC_FCMPSET_CODE64(ret);
 359         return (ret);
 360 }
 361
 362 static __inline int
 363 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 364 {
 365         int ret;
 366
 367         ATOMIC_FCMPSET_CODE64(ret);
 368         dmb();
 369         return (ret);
 370 }
 371
 372 static __inline int
 373 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
 374 {
 375         int ret;
 376
 377         dmb();
 378         ATOMIC_FCMPSET_CODE64(ret);
 379         return (ret);
 380 }
 381
 382 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
 383     {                                                        \
 384         __asm __volatile(                                    \
 385             "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
 386             "   teq            %[ret], %[oldv]           \n" \
 387             "   itee           ne                        \n" \
 388             "   movne          %[ret], #0                \n" \
 389             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
 390             "   eorseq         %[ret], #1                \n" \
 391             "   beq            1b                        \n" \
 392             : [ret] "=&r" (RET)                              \
 393             : [ptr] "r"   (_ptr),                            \
 394               [oldv] "r"  (_old),                            \
 395               [newv] "r"  (_new)                             \
 396             : "cc", "memory");                               \
 397     }
 398
 399 #define ATOMIC_CMPSET_CODE64(RET)                                 \
 400     {                                                             \
 401         uint64_t tmp;                                             \
 402                                                                   \
 403         __asm __volatile(                                         \
 404             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
 405             "   teq      %Q[tmp], %Q[oldv]                    \n" \
 406             "   it       eq                                   \n" \
 407             "   teqeq    %R[tmp], %R[oldv]                    \n" \
 408             "   itee     ne                                   \n" \
 409             "   movne    %[ret], #0                           \n" \
 410             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
 411             "   eorseq   %[ret], #1                           \n" \
 412             "   beq      1b                                   \n" \
 413             : [ret] "=&r" (RET),                                  \
 414               [tmp] "=&r" (tmp)                                   \
 415             : [ptr] "r"   (_ptr),                                 \
 416               [oldv] "r"  (_old),                                 \
 417               [newv] "r"  (_new)                                  \
 418             : "cc", "memory");                                    \
 419     }
 420
 421 static __inline int
 422 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 423 {
 424         int ret;
 425
 426         ATOMIC_CMPSET_CODE(ret, "b");
 427         return (ret);
 428 }
 429 #define atomic_cmpset_8         atomic_cmpset_8
 430
 431 static __inline int
 432 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 433 {
 434         int ret;
 435
 436         ATOMIC_CMPSET_CODE(ret, "b");
 437         dmb();
 438         return (ret);
 439 }
 440
 441 static __inline int
 442 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
 443 {
 444         int ret;
 445
 446         dmb();
 447         ATOMIC_CMPSET_CODE(ret, "b");
 448         return (ret);
 449 }
 450
 451 static __inline int
 452 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 453 {
 454         int ret;
 455
 456         ATOMIC_CMPSET_CODE(ret, "h");
 457         return (ret);
 458 }
 459 #define atomic_cmpset_16        atomic_cmpset_16
 460
 461 static __inline int
 462 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 463 {
 464         int ret;
 465
 466         ATOMIC_CMPSET_CODE(ret, "h");
 467         dmb();
 468         return (ret);
 469 }
 470
 471 static __inline int
 472 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
 473 {
 474         int ret;
 475
 476         dmb();
 477         ATOMIC_CMPSET_CODE(ret, "h");
 478         return (ret);
 479 }
 480
 481 static __inline int
 482 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 483 {
 484         int ret;
 485
 486         ATOMIC_CMPSET_CODE(ret, "");
 487         return (ret);
 488 }
 489
 490 static __inline int
 491 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 492 {
 493         int ret;
 494
 495         ATOMIC_CMPSET_CODE(ret, "");
 496         dmb();
 497         return (ret);
 498 }
 499
 500 static __inline int
 501 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
 502 {
 503         int ret;
 504
 505         dmb();
 506         ATOMIC_CMPSET_CODE(ret, "");
 507         return (ret);
 508 }
 509
 510 static __inline int
 511 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
 512 {
 513         int ret;
 514
 515         ATOMIC_CMPSET_CODE(ret, "");
 516         return (ret);
 517 }
 518
 519 static __inline int
 520 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
 521 {
 522         int ret;
 523
 524         ATOMIC_CMPSET_CODE(ret, "");
 525         dmb();
 526         return (ret);
 527 }
 528
 529 static __inline int
 530 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
 531 {
 532         int ret;
 533
 534         dmb();
 535         ATOMIC_CMPSET_CODE(ret, "");
 536         return (ret);
 537 }
 538
 539 static __inline int
 540 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 541 {
 542         int ret;
 543
 544         ATOMIC_CMPSET_CODE64(ret);
 545         return (ret);
 546 }
 547
 548 static __inline int
 549 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 550 {
 551         int ret;
 552
 553         ATOMIC_CMPSET_CODE64(ret);
 554         dmb();
 555         return (ret);
 556 }
 557
 558 static __inline int
 559 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
 560 {
 561         int ret;
 562
 563         dmb();
 564         ATOMIC_CMPSET_CODE64(ret);
 565         return (ret);
 566 }
 567
 568 static __inline uint32_t
 569 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
 570 {
 571         uint32_t tmp = 0, tmp2 = 0, ret = 0;
 572
 573         __asm __volatile(
 574             "1: ldrex   %0, [%3]        \n"
 575             "   add     %1, %0, %4      \n"
 576             "   strex   %2, %1, [%3]    \n"
 577             "   cmp     %2, #0          \n"
 578             "   it      ne              \n"
 579             "   bne     1b              \n"
 580             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 581             : : "cc", "memory");
 582         return (ret);
 583 }
 584
 585 static __inline uint64_t
 586 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
 587 {
 588         uint64_t ret, tmp;
 589         uint32_t exflag;
 590
 591         __asm __volatile(
 592             "1:                                                 \n"
 593             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 594             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
 595             "   adc     %R[tmp], %R[ret], %R[val]               \n"
 596             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 597             "   teq     %[exf], #0                              \n"
 598             "   it      ne                                      \n"
 599             "   bne     1b                                      \n"
 600             : [ret] "=&r" (ret),
 601               [exf] "=&r" (exflag),
 602               [tmp] "=&r" (tmp)
 603             : [ptr] "r"   (p),
 604               [val] "r"   (val)
 605             : "cc", "memory");
 606         return (ret);
 607 }
 608
 609 static __inline u_long
 610 atomic_fetchadd_long(volatile u_long *p, u_long val)
 611 {
 612
 613         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
 614 }
 615
 616 static __inline uint32_t
 617 atomic_load_acq_32(volatile uint32_t *p)
 618 {
 619         uint32_t v;
 620
 621         v = *p;
 622         dmb();
 623         return (v);
 624 }
 625
 626 static __inline uint64_t
 627 atomic_load_64(volatile uint64_t *p)
 628 {
 629         uint64_t ret;
 630
 631         /*
 632          * The only way to atomically load 64 bits is with LDREXD which puts the
 633          * exclusive monitor into the exclusive state, so reset it to open state
 634          * with CLREX because we don't actually need to store anything.
 635          */
 636         __asm __volatile(
 637             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
 638             "clrex                                      \n"
 639             : [ret] "=&r" (ret)
 640             : [ptr] "r"   (p)
 641             : "cc", "memory");
 642         return (ret);
 643 }
 644
 645 static __inline uint64_t
 646 atomic_load_acq_64(volatile uint64_t *p)
 647 {
 648         uint64_t ret;
 649
 650         ret = atomic_load_64(p);
 651         dmb();
 652         return (ret);
 653 }
 654
 655 static __inline u_long
 656 atomic_load_acq_long(volatile u_long *p)
 657 {
 658         u_long v;
 659
 660         v = *p;
 661         dmb();
 662         return (v);
 663 }
 664
 665 static __inline uint32_t
 666 atomic_readandclear_32(volatile uint32_t *p)
 667 {
 668         uint32_t ret, tmp = 0, tmp2 = 0;
 669
 670         __asm __volatile(
 671             "1: ldrex   %0, [%3]        \n"
 672             "   mov     %1, #0          \n"
 673             "   strex   %2, %1, [%3]    \n"
 674             "   cmp     %2, #0          \n"
 675             "   it      ne              \n"
 676             "   bne     1b              \n"
 677             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
 678             : : "cc", "memory");
 679         return (ret);
 680 }
 681
 682 static __inline uint64_t
 683 atomic_readandclear_64(volatile uint64_t *p)
 684 {
 685         uint64_t ret, tmp;
 686         uint32_t exflag;
 687
 688         __asm __volatile(
 689             "1:                                                 \n"
 690             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
 691             "   mov     %Q[tmp], #0                             \n"
 692             "   mov     %R[tmp], #0                             \n"
 693             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 694             "   teq     %[exf], #0                              \n"
 695             "   it      ne                                      \n"
 696             "   bne     1b                                      \n"
 697             : [ret] "=&r" (ret),
 698               [exf] "=&r" (exflag),
 699               [tmp] "=&r" (tmp)
 700             : [ptr] "r"   (p)
 701             : "cc", "memory");
 702         return (ret);
 703 }
 704
 705 static __inline u_long
 706 atomic_readandclear_long(volatile u_long *p)
 707 {
 708
 709         return (atomic_readandclear_32((volatile uint32_t *)p));
 710 }
 711
 712 static __inline void
 713 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
 714 {
 715         uint32_t tmp = 0, tmp2 = 0;
 716
 717         __asm __volatile(
 718             "1: ldrex   %0, [%2]        \n"
 719             "   orr     %0, %0, %3      \n"
 720             "   strex   %1, %0, [%2]    \n"
 721             "   cmp     %1, #0          \n"
 722             "   it      ne              \n"
 723             "   bne     1b              \n"
 724             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
 725             : : "cc", "memory");
 726 }
 727
 728 static __inline void
 729 atomic_set_64(volatile uint64_t *p, uint64_t val)
 730 {
 731         uint64_t tmp;
 732         uint32_t exflag;
 733
 734         __asm __volatile(
 735             "1:                                                 \n"
 736             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 737             "   orr     %Q[tmp], %Q[val]                        \n"
 738             "   orr     %R[tmp], %R[val]                        \n"
 739             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 740             "   teq     %[exf], #0                              \n"
 741             "   it      ne                                      \n"
 742             "   bne     1b                                      \n"
 743             : [exf] "=&r" (exflag),
 744               [tmp] "=&r" (tmp)
 745             : [ptr] "r"   (p),
 746               [val] "r"   (val)
 747             : "cc", "memory");
 748 }
 749
 750 static __inline void
 751 atomic_set_long(volatile u_long *address, u_long setmask)
 752 {
 753
 754         atomic_set_32((volatile uint32_t *)address, setmask);
 755 }
 756
 757 ATOMIC_ACQ_REL(set, 32)
 758 ATOMIC_ACQ_REL(set, 64)
 759 ATOMIC_ACQ_REL_LONG(set)
 760
 761 static __inline void
 762 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
 763 {
 764         uint32_t tmp = 0, tmp2 = 0;
 765
 766         __asm __volatile(
 767             "1: ldrex   %0, [%2]        \n"
 768             "   sub     %0, %0, %3      \n"
 769             "   strex   %1, %0, [%2]    \n"
 770             "   cmp     %1, #0          \n"
 771             "   it      ne              \n"
 772             "   bne     1b              \n"
 773             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
 774             : : "cc", "memory");
 775 }
 776
 777 static __inline void
 778 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
 779 {
 780         uint64_t tmp;
 781         uint32_t exflag;
 782
 783         __asm __volatile(
 784             "1:                                                 \n"
 785             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 786             "   subs    %Q[tmp], %Q[val]                        \n"
 787             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
 788             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
 789             "   teq     %[exf], #0                              \n"
 790             "   it      ne                                      \n"
 791             "   bne     1b                                      \n"
 792             : [exf] "=&r" (exflag),
 793               [tmp] "=&r" (tmp)
 794             : [ptr] "r"   (p),
 795               [val] "r"   (val)
 796             : "cc", "memory");
 797 }
 798
 799 static __inline void
 800 atomic_subtract_long(volatile u_long *p, u_long val)
 801 {
 802
 803         atomic_subtract_32((volatile uint32_t *)p, val);
 804 }
 805
 806 ATOMIC_ACQ_REL(subtract, 32)
 807 ATOMIC_ACQ_REL(subtract, 64)
 808 ATOMIC_ACQ_REL_LONG(subtract)
 809
 810 static __inline void
 811 atomic_store_64(volatile uint64_t *p, uint64_t val)
 812 {
 813         uint64_t tmp;
 814         uint32_t exflag;
 815
 816         /*
 817          * The only way to atomically store 64 bits is with STREXD, which will
 818          * succeed only if paired up with a preceeding LDREXD using the same
 819          * address, so we read and discard the existing value before storing.
 820          */
 821         __asm __volatile(
 822             "1:                                                 \n"
 823             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
 824             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
 825             "   teq     %[exf], #0                              \n"
 826             "   it      ne                                      \n"
 827             "   bne     1b                                      \n"
 828             : [tmp] "=&r" (tmp),
 829               [exf] "=&r" (exflag)
 830             : [ptr] "r"   (p),
 831               [val] "r"   (val)
 832             : "cc", "memory");
 833 }
 834
 835 static __inline void
 836 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
 837 {
 838
 839         dmb();
 840         *p = v;
 841 }
 842
 843 static __inline void
 844 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
 845 {
 846
 847         dmb();
 848         atomic_store_64(p, val);
 849 }
 850
 851 static __inline void
 852 atomic_store_rel_long(volatile u_long *p, u_long v)
 853 {
 854
 855         dmb();
 856         *p = v;
 857 }
 858
 859 static __inline int
 860 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
 861 {
 862         int newv, oldv, result;
 863
 864         __asm __volatile(
 865             "   mov     ip, #1                                  \n"
 866             "   lsl     ip, ip, %[bit]                          \n"
 867             /*  Done with %[bit] as input, reuse below as output. */
 868             "1:                                                 \n"
 869             "   ldrex   %[oldv], [%[ptr]]                       \n"
 870             "   bic     %[newv], %[oldv], ip                    \n"
 871             "   strex   %[bit], %[newv], [%[ptr]]               \n"
 872             "   teq     %[bit], #0                              \n"
 873             "   it      ne                                      \n"
 874             "   bne     1b                                      \n"
 875             "   ands    %[bit], %[oldv], ip                     \n"
 876             "   it      ne                                      \n"
 877             "   movne   %[bit], #1                              \n"
 878             : [bit]  "=&r"   (result),
 879               [oldv] "=&r"   (oldv),
 880               [newv] "=&r"   (newv)
 881             : [ptr]  "r"     (ptr),
 882                      "[bit]" (bit & 0x1f)
 883             : "cc", "ip", "memory");
 884
 885         return (result);
 886 }
 887
 888 static __inline int
 889 atomic_testandclear_int(volatile u_int *p, u_int v)
 890 {
 891
 892         return (atomic_testandclear_32((volatile uint32_t *)p, v));
 893 }
 894
 895 static __inline int
 896 atomic_testandclear_long(volatile u_long *p, u_int v)
 897 {
 898
 899         return (atomic_testandclear_32((volatile uint32_t *)p, v));
 900 }
 901 #define atomic_testandclear_long        atomic_testandclear_long
 902
 903
 904 static __inline int
 905 atomic_testandclear_64(volatile uint64_t *p, u_int v)
 906 {
 907         volatile uint32_t *p32;
 908
 909         p32 = (volatile uint32_t *)p;
 910         /*
 911          * Assume little-endian,
 912          * atomic_testandclear_32() uses only last 5 bits of v
 913          */
 914         if ((v & 0x20) != 0)
 915                 p32++;
 916         return (atomic_testandclear_32(p32, v));
 917 }
 918
 919 static __inline int
 920 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
 921 {
 922         int newv, oldv, result;
 923
 924         __asm __volatile(
 925             "   mov     ip, #1                                  \n"
 926             "   lsl     ip, ip, %[bit]                          \n"
 927             /*  Done with %[bit] as input, reuse below as output. */
 928             "1:                                                 \n"
 929             "   ldrex   %[oldv], [%[ptr]]                       \n"
 930             "   orr     %[newv], %[oldv], ip                    \n"
 931             "   strex   %[bit], %[newv], [%[ptr]]               \n"
 932             "   teq     %[bit], #0                              \n"
 933             "   it      ne                                      \n"
 934             "   bne     1b                                      \n"
 935             "   ands    %[bit], %[oldv], ip                     \n"
 936             "   it      ne                                      \n"
 937             "   movne   %[bit], #1                              \n"
 938             : [bit]  "=&r"   (result),
 939               [oldv] "=&r"   (oldv),
 940               [newv] "=&r"   (newv)
 941             : [ptr]  "r"     (ptr),
 942                      "[bit]" (bit & 0x1f)
 943             : "cc", "ip", "memory");
 944
 945         return (result);
 946 }
 947
 948 static __inline int
 949 atomic_testandset_int(volatile u_int *p, u_int v)
 950 {
 951
 952         return (atomic_testandset_32((volatile uint32_t *)p, v));
 953 }
 954
 955 static __inline int
 956 atomic_testandset_long(volatile u_long *p, u_int v)
 957 {
 958
 959         return (atomic_testandset_32((volatile uint32_t *)p, v));
 960 }
 961 #define atomic_testandset_long  atomic_testandset_long
 962
 963 static __inline int
 964 atomic_testandset_64(volatile uint64_t *p, u_int v)
 965 {
 966         volatile uint32_t *p32;
 967
 968         p32 = (volatile uint32_t *)p;
 969         /*
 970          * Assume little-endian,
 971          * atomic_testandset_32() uses only last 5 bits of v
 972          */
 973         if ((v & 0x20) != 0)
 974                 p32++;
 975         return (atomic_testandset_32(p32, v));
 976 }
 977
 978 static __inline uint32_t
 979 atomic_swap_32(volatile uint32_t *p, uint32_t v)
 980 {
 981         uint32_t ret, exflag;
 982
 983         __asm __volatile(
 984             "1: ldrex   %[ret], [%[ptr]]                \n"
 985             "   strex   %[exf], %[val], [%[ptr]]        \n"
 986             "   teq     %[exf], #0                      \n"
 987             "   it      ne                              \n"
 988             "   bne     1b                              \n"
 989             : [ret] "=&r"  (ret),
 990               [exf] "=&r" (exflag)
 991             : [val] "r"  (v),
 992               [ptr] "r"  (p)
 993             : "cc", "memory");
 994         return (ret);
 995 }
 996
 997 static __inline u_long
 998 atomic_swap_long(volatile u_long *p, u_long v)
 999 {
1000
1001         return (atomic_swap_32((volatile uint32_t *)p, v));
1002 }
1003
1004 static __inline uint64_t
1005 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1006 {
1007         uint64_t ret;
1008         uint32_t exflag;
1009
1010         __asm __volatile(
1011             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
1012             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
1013             "   teq     %[exf], #0                              \n"
1014             "   it      ne                                      \n"
1015             "   bne     1b                                      \n"
1016             : [ret] "=&r" (ret),
1017               [exf] "=&r" (exflag)
1018             : [val] "r"   (v),
1019               [ptr] "r"   (p)
1020             : "cc", "memory");
1021         return (ret);
1022 }
1023
1024 #undef ATOMIC_ACQ_REL
1025 #undef ATOMIC_ACQ_REL_LONG
1026
1027 static __inline void
1028 atomic_thread_fence_acq(void)
1029 {
1030
1031         dmb();
1032 }
1033
1034 static __inline void
1035 atomic_thread_fence_rel(void)
1036 {
1037
1038         dmb();
1039 }
1040
1041 static __inline void
1042 atomic_thread_fence_acq_rel(void)
1043 {
1044
1045         dmb();
1046 }
1047
1048 static __inline void
1049 atomic_thread_fence_seq_cst(void)
1050 {
1051
1052         dmb();
1053 }
1054
1055 #define atomic_clear_ptr                atomic_clear_32
1056 #define atomic_clear_acq_ptr            atomic_clear_acq_32
1057 #define atomic_clear_rel_ptr            atomic_clear_rel_32
1058 #define atomic_set_ptr                  atomic_set_32
1059 #define atomic_set_acq_ptr              atomic_set_acq_32
1060 #define atomic_set_rel_ptr              atomic_set_rel_32
1061 #define atomic_fcmpset_ptr              atomic_fcmpset_32
1062 #define atomic_fcmpset_rel_ptr          atomic_fcmpset_rel_32
1063 #define atomic_fcmpset_acq_ptr          atomic_fcmpset_acq_32
1064 #define atomic_cmpset_ptr               atomic_cmpset_32
1065 #define atomic_cmpset_acq_ptr           atomic_cmpset_acq_32
1066 #define atomic_cmpset_rel_ptr           atomic_cmpset_rel_32
1067 #define atomic_load_acq_ptr             atomic_load_acq_32
1068 #define atomic_store_rel_ptr            atomic_store_rel_32
1069 #define atomic_swap_ptr                 atomic_swap_32
1070 #define atomic_readandclear_ptr         atomic_readandclear_32
1071
1072 #define atomic_add_int                  atomic_add_32
1073 #define atomic_add_acq_int              atomic_add_acq_32
1074 #define atomic_add_rel_int              atomic_add_rel_32
1075 #define atomic_subtract_int             atomic_subtract_32
1076 #define atomic_subtract_acq_int         atomic_subtract_acq_32
1077 #define atomic_subtract_rel_int         atomic_subtract_rel_32
1078 #define atomic_clear_int                atomic_clear_32
1079 #define atomic_clear_acq_int            atomic_clear_acq_32
1080 #define atomic_clear_rel_int            atomic_clear_rel_32
1081 #define atomic_set_int                  atomic_set_32
1082 #define atomic_set_acq_int              atomic_set_acq_32
1083 #define atomic_set_rel_int              atomic_set_rel_32
1084 #define atomic_fcmpset_int              atomic_fcmpset_32
1085 #define atomic_fcmpset_acq_int          atomic_fcmpset_acq_32
1086 #define atomic_fcmpset_rel_int          atomic_fcmpset_rel_32
1087 #define atomic_cmpset_int               atomic_cmpset_32
1088 #define atomic_cmpset_acq_int           atomic_cmpset_acq_32
1089 #define atomic_cmpset_rel_int           atomic_cmpset_rel_32
1090 #define atomic_fetchadd_int             atomic_fetchadd_32
1091 #define atomic_readandclear_int         atomic_readandclear_32
1092 #define atomic_load_acq_int             atomic_load_acq_32
1093 #define atomic_store_rel_int            atomic_store_rel_32
1094 #define atomic_swap_int                 atomic_swap_32
1095
1096 /*
1097  * For:
1098  *  - atomic_load_acq_8
1099  *  - atomic_load_acq_16
1100  *  - atomic_testandset_acq_long
1101  */
1102 #include <sys/_atomic_subword.h>
1103
1104 #endif /* _MACHINE_ATOMIC_H_ */