2 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
34 #include <machine/cpufunc.h>
35 #include <machine/sysarch.h>
38 #include "opt_global.h"
42 * Executing statements with interrupts disabled.
45 #if defined(_KERNEL) && !defined(SMP)
46 #define WITHOUT_INTERRUPTS(s) do { \
49 regs = intr_disable(); \
53 #endif /* _KERNEL && !SMP */
58 * It turns out __sync_synchronize() does not emit any code when used
59 * with GCC 4.2. Implement our own version that does work reliably.
61 * Although __sync_lock_test_and_set() should only perform an acquire
62 * barrier, make it do a full barrier like the other functions. This
63 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
66 #if defined(_KERNEL) && !defined(SMP)
71 __asm volatile ("" : : : "memory");
73 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
78 __asm volatile ("dmb" : : : "memory");
80 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
81 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
82 defined(__ARM_ARCH_6ZK__)
87 __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
91 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
94 * New C11 __atomic_* API.
97 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
98 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
99 defined(__ARM_ARCH_6ZK__) || \
100 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
102 /* These systems should be supported by the compiler. */
104 #else /* __ARM_ARCH_5__ */
106 /* Clang doesn't allow us to reimplement builtins without this. */
108 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
109 #define __sync_synchronize __sync_synchronize_ext
113 __sync_synchronize(void)
120 #error "On SMP systems we should have proper atomic operations."
124 * On uniprocessor systems, we can perform the atomic operations by
125 * disabling interrupts.
128 #define EMIT_LOAD_N(N, uintN_t) \
130 __atomic_load_##N(uintN_t *mem, int model __unused) \
134 WITHOUT_INTERRUPTS({ \
140 #define EMIT_STORE_N(N, uintN_t) \
142 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \
145 WITHOUT_INTERRUPTS({ \
150 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \
152 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected, \
153 uintN_t desired, int success __unused, int failure __unused) \
157 WITHOUT_INTERRUPTS({ \
158 if (*mem == *expected) { \
169 #define EMIT_FETCH_OP_N(N, uintN_t, name, op) \
171 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \
175 WITHOUT_INTERRUPTS({ \
182 #define EMIT_ALL_OPS_N(N, uintN_t) \
183 EMIT_LOAD_N(N, uintN_t) \
184 EMIT_STORE_N(N, uintN_t) \
185 EMIT_COMPARE_EXCHANGE_N(N, uintN_t) \
186 EMIT_FETCH_OP_N(N, uintN_t, exchange, =) \
187 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=) \
188 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=) \
189 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=) \
190 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=) \
191 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
193 EMIT_ALL_OPS_N(1, uint8_t)
194 EMIT_ALL_OPS_N(2, uint16_t)
195 EMIT_ALL_OPS_N(4, uint32_t)
196 EMIT_ALL_OPS_N(8, uint64_t)
201 * For userspace on uniprocessor systems, we can implement the atomic
202 * operations by using a Restartable Atomic Sequence. This makes the
203 * kernel restart the code from the beginning when interrupted.
206 #define EMIT_LOAD_N(N, uintN_t) \
208 __atomic_load_##N(uintN_t *mem, int model __unused) \
214 #define EMIT_STORE_N(N, uintN_t) \
216 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused) \
222 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \
224 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused) \
226 uint32_t old, temp, ras_start; \
228 ras_start = ARM_RAS_START; \
230 /* Set up Restartable Atomic Sequence. */ \
235 "\tstr %2, [%5, #4]\n" \
237 "\t"ldr" %0, %4\n" /* Load old value. */ \
238 "\t"str" %3, %1\n" /* Store new value. */ \
240 /* Tear down Restartable Atomic Sequence. */ \
242 "\tmov %2, #0x00000000\n" \
244 "\tmov %2, #0xffffffff\n" \
245 "\tstr %2, [%5, #4]\n" \
246 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
247 : "r" (val), "m" (*mem), "r" (ras_start)); \
251 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \
253 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected, \
254 uintN_t desired, int success __unused, int failure __unused) \
256 uint32_t expected, old, temp, ras_start; \
258 expected = *pexpected; \
259 ras_start = ARM_RAS_START; \
261 /* Set up Restartable Atomic Sequence. */ \
266 "\tstr %2, [%6, #4]\n" \
268 "\t"ldr" %0, %5\n" /* Load old value. */ \
269 "\tcmp %0, %3\n" /* Compare to expected value. */\
270 "\t"streq" %4, %1\n" /* Store new value. */ \
272 /* Tear down Restartable Atomic Sequence. */ \
274 "\tmov %2, #0x00000000\n" \
276 "\tmov %2, #0xffffffff\n" \
277 "\tstr %2, [%6, #4]\n" \
278 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
279 : "r" (expected), "r" (desired), "m" (*mem), \
281 if (old == expected) { \
289 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op) \
291 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused) \
293 uint32_t old, temp, ras_start; \
295 ras_start = ARM_RAS_START; \
297 /* Set up Restartable Atomic Sequence. */ \
302 "\tstr %2, [%5, #4]\n" \
304 "\t"ldr" %0, %4\n" /* Load old value. */ \
305 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \
306 "\t"str" %2, %1\n" /* Store new value. */ \
308 /* Tear down Restartable Atomic Sequence. */ \
310 "\tmov %2, #0x00000000\n" \
312 "\tmov %2, #0xffffffff\n" \
313 "\tstr %2, [%5, #4]\n" \
314 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
315 : "r" (val), "m" (*mem), "r" (ras_start)); \
319 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \
320 EMIT_LOAD_N(N, uintN_t) \
321 EMIT_STORE_N(N, uintN_t) \
322 EMIT_EXCHANGE_N(N, uintN_t, ldr, str) \
323 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq) \
324 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add") \
325 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and") \
326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr") \
327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub") \
328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
330 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
331 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
332 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
338 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
340 #if defined(__SYNC_ATOMICS)
346 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
347 defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
348 defined(__ARM_ARCH_6ZK__) || \
349 defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
351 /* Implementations for old GCC versions, lacking support for atomics. */
359 * Given a memory address pointing to an 8-bit or 16-bit integer, return
360 * the address of the 32-bit word containing it.
363 static inline uint32_t *
364 round_to_word(void *ptr)
367 return ((uint32_t *)((intptr_t)ptr & ~3));
371 * Utility functions for loading and storing 8-bit and 16-bit integers
372 * in 32-bit words at an offset corresponding with the location of the
377 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
381 offset = (intptr_t)offset_ptr & 3;
385 static inline uint8_t
386 get_1(const reg_t *r, const uint8_t *offset_ptr)
390 offset = (intptr_t)offset_ptr & 3;
391 return (r->v8[offset]);
395 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
403 offset = (intptr_t)offset_ptr & 3;
405 r->v8[offset] = bytes.out[0];
406 r->v8[offset + 1] = bytes.out[1];
409 static inline uint16_t
410 get_2(const reg_t *r, const uint16_t *offset_ptr)
418 offset = (intptr_t)offset_ptr & 3;
419 bytes.in[0] = r->v8[offset];
420 bytes.in[1] = r->v8[offset + 1];
425 * 8-bit and 16-bit routines.
427 * These operations are not natively supported by the CPU, so we use
428 * some shifting and bitmasking on top of the 32-bit instructions.
431 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
433 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \
436 reg_t val32, negmask, old; \
437 uint32_t temp1, temp2; \
439 mem32 = round_to_word(mem); \
440 val32.v32 = 0x00000000; \
441 put_##N(&val32, mem, val); \
442 negmask.v32 = 0xffffffff; \
443 put_##N(&negmask, mem, 0); \
448 "\tldrex %0, %6\n" /* Load old value. */ \
449 "\tand %2, %5, %0\n" /* Remove the old value. */ \
450 "\torr %2, %2, %4\n" /* Put in the new value. */ \
451 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
452 "\tcmp %3, #0\n" /* Did it succeed? */ \
453 "\tbne 1b\n" /* Spin if failed. */ \
454 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
456 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
457 return (get_##N(&old, mem)); \
460 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
461 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
463 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
465 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \
469 reg_t expected32, desired32, posmask, old; \
470 uint32_t negmask, temp1, temp2; \
472 mem32 = round_to_word(mem); \
473 expected32.v32 = 0x00000000; \
474 put_##N(&expected32, mem, expected); \
475 desired32.v32 = 0x00000000; \
476 put_##N(&desired32, mem, desired); \
477 posmask.v32 = 0x00000000; \
478 put_##N(&posmask, mem, ~0); \
479 negmask = ~posmask.v32; \
484 "\tldrex %0, %8\n" /* Load old value. */ \
485 "\tand %2, %6, %0\n" /* Isolate the old value. */ \
486 "\tcmp %2, %4\n" /* Compare to expected value. */\
487 "\tbne 2f\n" /* Values are unequal. */ \
488 "\tand %2, %7, %0\n" /* Remove the old value. */ \
489 "\torr %2, %5\n" /* Put in the new value. */ \
490 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
491 "\tcmp %3, #0\n" /* Did it succeed? */ \
492 "\tbne 1b\n" /* Spin if failed. */ \
494 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
496 : "r" (expected32.v32), "r" (desired32.v32), \
497 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
498 return (get_##N(&old, mem)); \
501 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
502 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
504 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
506 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
509 reg_t val32, posmask, old; \
510 uint32_t negmask, temp1, temp2; \
512 mem32 = round_to_word(mem); \
513 val32.v32 = 0x00000000; \
514 put_##N(&val32, mem, val); \
515 posmask.v32 = 0x00000000; \
516 put_##N(&posmask, mem, ~0); \
517 negmask = ~posmask.v32; \
522 "\tldrex %0, %7\n" /* Load old value. */ \
523 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
524 "\tand %2, %5\n" /* Isolate the new value. */ \
525 "\tand %3, %6, %0\n" /* Remove the old value. */ \
526 "\torr %2, %2, %3\n" /* Put in the new value. */ \
527 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
528 "\tcmp %3, #0\n" /* Did it succeed? */ \
529 "\tbne 1b\n" /* Spin if failed. */ \
530 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
532 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
534 return (get_##N(&old, mem)); \
537 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
538 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
539 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
540 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
542 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
544 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
548 uint32_t temp1, temp2; \
550 mem32 = round_to_word(mem); \
551 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
552 put_##N(&val32, mem, val); \
557 "\tldrex %0, %5\n" /* Load old value. */ \
558 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \
559 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
560 "\tcmp %3, #0\n" /* Did it succeed? */ \
561 "\tbne 1b\n" /* Spin if failed. */ \
562 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
564 : "r" (val32.v32), "m" (*mem32)); \
565 return (get_##N(&old, mem)); \
568 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
569 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
570 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
571 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
572 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
573 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
580 __sync_lock_test_and_set_4(uint32_t *mem, uint32_t val)
587 "\tldrex %0, %4\n" /* Load old value. */
588 "\tstrex %2, %3, %1\n" /* Attempt to store. */
589 "\tcmp %2, #0\n" /* Did it succeed? */
590 "\tbne 1b\n" /* Spin if failed. */
591 : "=&r" (old), "=m" (*mem), "=&r" (temp)
592 : "r" (val), "m" (*mem));
597 __sync_val_compare_and_swap_4(uint32_t *mem, uint32_t expected,
605 "\tldrex %0, %5\n" /* Load old value. */
606 "\tcmp %0, %3\n" /* Compare to expected value. */
607 "\tbne 2f\n" /* Values are unequal. */
608 "\tstrex %2, %4, %1\n" /* Attempt to store. */
609 "\tcmp %2, #0\n" /* Did it succeed? */
610 "\tbne 1b\n" /* Spin if failed. */
612 : "=&r" (old), "=m" (*mem), "=&r" (temp)
613 : "r" (expected), "r" (desired), "m" (*mem));
617 #define EMIT_FETCH_AND_OP_4(name, op) \
619 __sync_##name##_4(uint32_t *mem, uint32_t val) \
621 uint32_t old, temp1, temp2; \
626 "\tldrex %0, %5\n" /* Load old value. */ \
627 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
628 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
629 "\tcmp %3, #0\n" /* Did it succeed? */ \
630 "\tbne 1b\n" /* Spin if failed. */ \
631 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \
633 : "r" (val), "m" (*mem)); \
637 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
638 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
639 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
640 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
641 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
643 #else /* __ARM_ARCH_5__ */
648 #error "On SMP systems we should have proper atomic operations."
652 * On uniprocessor systems, we can perform the atomic operations by
653 * disabling interrupts.
656 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
658 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \
663 WITHOUT_INTERRUPTS({ \
665 if (*mem == expected) \
671 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op) \
673 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
677 WITHOUT_INTERRUPTS({ \
684 #define EMIT_ALL_OPS_N(N, uintN_t) \
685 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
686 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =) \
687 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=) \
688 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=) \
689 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=) \
690 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=) \
691 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
693 EMIT_ALL_OPS_N(1, uint8_t)
694 EMIT_ALL_OPS_N(2, uint16_t)
695 EMIT_ALL_OPS_N(4, uint32_t)
696 EMIT_ALL_OPS_N(8, uint64_t)
701 * For userspace on uniprocessor systems, we can implement the atomic
702 * operations by using a Restartable Atomic Sequence. This makes the
703 * kernel restart the code from the beginning when interrupted.
706 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \
708 __sync_lock_test_and_set_##N(uintN_t *mem, uintN_t val) \
710 uint32_t old, temp, ras_start; \
712 ras_start = ARM_RAS_START; \
714 /* Set up Restartable Atomic Sequence. */ \
719 "\tstr %2, [%5, #4]\n" \
721 "\t"ldr" %0, %4\n" /* Load old value. */ \
722 "\t"str" %3, %1\n" /* Store new value. */ \
724 /* Tear down Restartable Atomic Sequence. */ \
726 "\tmov %2, #0x00000000\n" \
728 "\tmov %2, #0xffffffff\n" \
729 "\tstr %2, [%5, #4]\n" \
730 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
731 : "r" (val), "m" (*mem), "r" (ras_start)); \
735 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \
737 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected, \
740 uint32_t old, temp, ras_start; \
742 ras_start = ARM_RAS_START; \
744 /* Set up Restartable Atomic Sequence. */ \
749 "\tstr %2, [%6, #4]\n" \
751 "\t"ldr" %0, %5\n" /* Load old value. */ \
752 "\tcmp %0, %3\n" /* Compare to expected value. */\
753 "\t"streq" %4, %1\n" /* Store new value. */ \
755 /* Tear down Restartable Atomic Sequence. */ \
757 "\tmov %2, #0x00000000\n" \
759 "\tmov %2, #0xffffffff\n" \
760 "\tstr %2, [%6, #4]\n" \
761 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
762 : "r" (expected), "r" (desired), "m" (*mem), \
767 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op) \
769 __sync_##name##_##N(uintN_t *mem, uintN_t val) \
771 uint32_t old, temp, ras_start; \
773 ras_start = ARM_RAS_START; \
775 /* Set up Restartable Atomic Sequence. */ \
780 "\tstr %2, [%5, #4]\n" \
782 "\t"ldr" %0, %4\n" /* Load old value. */ \
783 "\t"op" %2, %0, %3\n" /* Calculate new value. */ \
784 "\t"str" %2, %1\n" /* Store new value. */ \
786 /* Tear down Restartable Atomic Sequence. */ \
788 "\tmov %2, #0x00000000\n" \
790 "\tmov %2, #0xffffffff\n" \
791 "\tstr %2, [%5, #4]\n" \
792 : "=&r" (old), "=m" (*mem), "=&r" (temp) \
793 : "r" (val), "m" (*mem), "r" (ras_start)); \
797 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq) \
798 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str) \
799 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq) \
800 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add") \
801 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and") \
802 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr") \
803 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub") \
804 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
806 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
807 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
808 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
814 #endif /* __SYNC_ATOMICS */