2 * Copyright (c) 2013 Andrew Turner <andrew@freebsd.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #ifndef _MACHINE_ATOMIC_H_
30 #define _MACHINE_ATOMIC_H_
32 #define isb() __asm __volatile("isb" : : : "memory")
35 * Options for DMB and DSB:
36 * oshld Outer Shareable, load
37 * oshst Outer Shareable, store
38 * osh Outer Shareable, all
39 * nshld Non-shareable, load
40 * nshst Non-shareable, store
41 * nsh Non-shareable, all
42 * ishld Inner Shareable, load
43 * ishst Inner Shareable, store
44 * ish Inner Shareable, all
45 * ld Full system, load
46 * st Full system, store
49 #define dsb(opt) __asm __volatile("dsb " __STRING(opt) : : : "memory")
50 #define dmb(opt) __asm __volatile("dmb " __STRING(opt) : : : "memory")
52 #define mb() dmb(sy) /* Full system memory barrier all */
53 #define wmb() dmb(st) /* Full system memory barrier store */
54 #define rmb() dmb(ld) /* Full system memory barrier load */
57 atomic_add_32(volatile uint32_t *p, uint32_t val)
63 "1: ldxr %w0, [%2] \n"
64 " add %w0, %w0, %w3 \n"
65 " stxr %w1, %w0, [%2] \n"
67 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
72 atomic_clear_32(volatile uint32_t *p, uint32_t val)
78 "1: ldxr %w0, [%2] \n"
79 " bic %w0, %w0, %w3 \n"
80 " stxr %w1, %w0, [%2] \n"
82 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
87 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
97 " stxr %w1, %w4, [%2] \n"
100 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
107 static __inline uint32_t
108 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
114 "1: ldxr %w4, [%2] \n"
115 " add %w0, %w4, %w3 \n"
116 " stxr %w1, %w0, [%2] \n"
118 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc"
124 static __inline uint32_t
125 atomic_readandclear_32(volatile uint32_t *p)
132 "1: ldxr %w3, [%2] \n"
133 " stxr %w1, %w0, [%2] \n"
135 : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc"
142 atomic_set_32(volatile uint32_t *p, uint32_t val)
148 "1: ldxr %w0, [%2] \n"
149 " orr %w0, %w0, %w3 \n"
150 " stxr %w1, %w0, [%2] \n"
152 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
156 static __inline uint32_t
157 atomic_swap_32(volatile uint32_t *p, uint32_t val)
163 "1: ldxr %w0, [%2] \n"
164 " stxr %w1, %w3, [%2] \n"
166 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
173 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
179 "1: ldxr %w0, [%2] \n"
180 " sub %w0, %w0, %w3 \n"
181 " stxr %w1, %w0, [%2] \n"
183 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
187 #define atomic_add_int atomic_add_32
188 #define atomic_clear_int atomic_clear_32
189 #define atomic_cmpset_int atomic_cmpset_32
190 #define atomic_fetchadd_int atomic_fetchadd_32
191 #define atomic_readandclear_int atomic_readandclear_32
192 #define atomic_set_int atomic_set_32
193 #define atomic_swap_int atomic_swap_32
194 #define atomic_subtract_int atomic_subtract_32
197 atomic_add_acq_32(volatile uint32_t *p, uint32_t val)
203 "1: ldaxr %w0, [%2] \n"
204 " add %w0, %w0, %w3 \n"
205 " stxr %w1, %w0, [%2] \n"
208 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
213 atomic_clear_acq_32(volatile uint32_t *p, uint32_t val)
219 "1: ldaxr %w0, [%2] \n"
220 " bic %w0, %w0, %w3 \n"
221 " stxr %w1, %w0, [%2] \n"
223 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
228 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
235 " ldaxr %w0, [%2] \n"
238 " stxr %w1, %w4, [%2] \n"
241 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
248 static __inline uint32_t
249 atomic_load_acq_32(volatile uint32_t *p)
255 : "=&r" (ret) : "r" (p) : "memory");
261 atomic_set_acq_32(volatile uint32_t *p, uint32_t val)
267 "1: ldaxr %w0, [%2] \n"
268 " orr %w0, %w0, %w3 \n"
269 " stxr %w1, %w0, [%2] \n"
271 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
276 atomic_subtract_acq_32(volatile uint32_t *p, uint32_t val)
282 "1: ldaxr %w0, [%2] \n"
283 " sub %w0, %w0, %w3 \n"
284 " stxr %w1, %w0, [%2] \n"
286 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
290 #define atomic_add_acq_int atomic_add_acq_32
291 #define atomic_clear_acq_int atomic_clear_acq_32
292 #define atomic_cmpset_acq_int atomic_cmpset_acq_32
293 #define atomic_load_acq_int atomic_load_acq_32
294 #define atomic_set_acq_int atomic_set_acq_32
295 #define atomic_subtract_acq_int atomic_subtract_acq_32
297 /* The atomic functions currently are both acq and rel, we should fix this. */
300 atomic_add_rel_32(volatile uint32_t *p, uint32_t val)
306 "1: ldxr %w0, [%2] \n"
307 " add %w0, %w0, %w3 \n"
308 " stlxr %w1, %w0, [%2] \n"
311 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
316 atomic_clear_rel_32(volatile uint32_t *p, uint32_t val)
322 "1: ldxr %w0, [%2] \n"
323 " bic %w0, %w0, %w3 \n"
324 " stlxr %w1, %w0, [%2] \n"
326 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
331 atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
341 " stlxr %w1, %w4, [%2] \n"
344 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
352 atomic_set_rel_32(volatile uint32_t *p, uint32_t val)
358 "1: ldxr %w0, [%2] \n"
359 " orr %w0, %w0, %w3 \n"
360 " stlxr %w1, %w0, [%2] \n"
362 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
367 atomic_store_rel_32(volatile uint32_t *p, uint32_t val)
372 : : "r" (val), "r" (p) : "memory");
376 atomic_subtract_rel_32(volatile uint32_t *p, uint32_t val)
382 "1: ldxr %w0, [%2] \n"
383 " sub %w0, %w0, %w3 \n"
384 " stlxr %w1, %w0, [%2] \n"
386 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
390 #define atomic_add_rel_int atomic_add_rel_32
391 #define atomic_clear_rel_int atomic_add_rel_32
392 #define atomic_cmpset_rel_int atomic_cmpset_rel_32
393 #define atomic_set_rel_int atomic_set_rel_32
394 #define atomic_subtract_rel_int atomic_subtract_rel_32
395 #define atomic_store_rel_int atomic_store_rel_32
399 atomic_add_64(volatile uint64_t *p, uint64_t val)
405 "1: ldxr %0, [%2] \n"
407 " stxr %w1, %0, [%2] \n"
409 : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (val) : : "cc"
414 atomic_clear_64(volatile uint64_t *p, uint64_t val)
420 "1: ldxr %0, [%2] \n"
422 " stxr %w1, %0, [%2] \n"
424 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
429 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
439 " stxr %w1, %4, [%2] \n"
442 : "=&r" (tmp), "=&r"(res), "+r" (p), "+r" (cmpval), "+r" (newval)
449 static __inline uint64_t
450 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
456 "1: ldxr %4, [%2] \n"
458 " stxr %w1, %0, [%2] \n"
460 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val), "=&r"(ret) : : "cc"
466 static __inline uint64_t
467 atomic_readandclear_64(volatile uint64_t *p)
474 "1: ldxr %3, [%2] \n"
475 " stxr %w1, %0, [%2] \n"
477 : "=&r"(tmp), "=&r"(res), "+r" (p), "=&r"(ret) : : "cc"
484 atomic_set_64(volatile uint64_t *p, uint64_t val)
490 "1: ldxr %0, [%2] \n"
492 " stxr %w1, %0, [%2] \n"
494 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
499 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
505 "1: ldxr %0, [%2] \n"
507 " stxr %w1, %0, [%2] \n"
509 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc"
513 static __inline uint64_t
514 atomic_swap_64(volatile uint64_t *p, uint64_t val)
520 "1: ldxr %0, [%2] \n"
521 " stxr %w1, %3, [%2] \n"
523 : "=&r"(old), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
529 #define atomic_add_long atomic_add_64
530 #define atomic_clear_long atomic_clear_64
531 #define atomic_cmpset_long atomic_cmpset_64
532 #define atomic_fetchadd_long atomic_fetchadd_64
533 #define atomic_readandclear_long atomic_readandclear_64
534 #define atomic_set_long atomic_set_64
535 #define atomic_swap_long atomic_swap_64
536 #define atomic_subtract_long atomic_subtract_64
538 #define atomic_add_ptr atomic_add_64
539 #define atomic_clear_ptr atomic_clear_64
540 #define atomic_cmpset_ptr atomic_cmpset_64
541 #define atomic_fetchadd_ptr atomic_fetchadd_64
542 #define atomic_readandclear_ptr atomic_readandclear_64
543 #define atomic_set_ptr atomic_set_64
544 #define atomic_swap_ptr atomic_swap_64
545 #define atomic_subtract_ptr atomic_subtract_64
548 atomic_add_acq_64(volatile uint64_t *p, uint64_t val)
554 "1: ldaxr %0, [%2] \n"
556 " stxr %w1, %0, [%2] \n"
559 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
564 atomic_clear_acq_64(volatile uint64_t *p, uint64_t val)
570 "1: ldaxr %0, [%2] \n"
572 " stxr %w1, %0, [%2] \n"
574 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
579 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
589 " stxr %w1, %4, [%2] \n"
592 : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval)
599 static __inline uint64_t
600 atomic_load_acq_64(volatile uint64_t *p)
606 : "=&r" (ret) : "r" (p) : "memory");
612 atomic_set_acq_64(volatile uint64_t *p, uint64_t val)
618 "1: ldaxr %0, [%2] \n"
620 " stxr %w1, %0, [%2] \n"
622 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
627 atomic_subtract_acq_64(volatile uint64_t *p, uint64_t val)
633 "1: ldaxr %0, [%2] \n"
635 " stxr %w1, %0, [%2] \n"
637 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
641 #define atomic_add_acq_long atomic_add_acq_64
642 #define atomic_clear_acq_long atomic_add_acq_64
643 #define atomic_cmpset_acq_long atomic_cmpset_acq_64
644 #define atomic_load_acq_long atomic_load_acq_64
645 #define atomic_set_acq_long atomic_set_acq_64
646 #define atomic_subtract_acq_long atomic_subtract_acq_64
648 #define atomic_add_acq_ptr atomic_add_acq_64
649 #define atomic_clear_acq_ptr atomic_add_acq_64
650 #define atomic_cmpset_acq_ptr atomic_cmpset_acq_64
651 #define atomic_load_acq_ptr atomic_load_acq_64
652 #define atomic_set_acq_ptr atomic_set_acq_64
653 #define atomic_subtract_acq_ptr atomic_subtract_acq_64
656 * TODO: The atomic functions currently are both acq and rel, we should fix
660 atomic_add_rel_64(volatile uint64_t *p, uint64_t val)
666 "1: ldxr %0, [%2] \n"
668 " stlxr %w1, %0, [%2] \n"
671 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
676 atomic_clear_rel_64(volatile uint64_t *p, uint64_t val)
682 "1: ldxr %0, [%2] \n"
684 " stlxr %w1, %0, [%2] \n"
686 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
691 atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
701 " stlxr %w1, %4, [%2] \n"
704 : "=&r" (tmp), "=&r" (res), "+r" (p), "+r" (cmpval), "+r" (newval)
712 atomic_set_rel_64(volatile uint64_t *p, uint64_t val)
718 "1: ldxr %0, [%2] \n"
720 " stlxr %w1, %0, [%2] \n"
722 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
727 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
732 : : "r" (val), "r" (p) : "memory");
736 atomic_subtract_rel_64(volatile uint64_t *p, uint64_t val)
742 "1: ldxr %0, [%2] \n"
744 " stlxr %w1, %0, [%2] \n"
746 : "=&r"(tmp), "=&r"(res), "+r" (p), "+r" (val) : : "cc", "memory"
751 atomic_thread_fence_acq(void)
758 atomic_thread_fence_rel(void)
765 atomic_thread_fence_acq_rel(void)
772 atomic_thread_fence_seq_cst(void)
778 #define atomic_add_rel_long atomic_add_rel_64
779 #define atomic_clear_rel_long atomic_clear_rel_64
780 #define atomic_cmpset_rel_long atomic_cmpset_rel_64
781 #define atomic_set_rel_long atomic_set_rel_64
782 #define atomic_subtract_rel_long atomic_subtract_rel_64
783 #define atomic_store_rel_long atomic_store_rel_64
785 #define atomic_add_rel_ptr atomic_add_rel_64
786 #define atomic_clear_rel_ptr atomic_clear_rel_64
787 #define atomic_cmpset_rel_ptr atomic_cmpset_rel_64
788 #define atomic_set_rel_ptr atomic_set_rel_64
789 #define atomic_subtract_rel_ptr atomic_subtract_rel_64
790 #define atomic_store_rel_ptr atomic_store_rel_64
792 #endif /* _MACHINE_ATOMIC_H_ */