2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
4 * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
32 #include <sys/param.h>
33 #include <sys/stdatomic.h>
34 #include <sys/types.h>
36 #include <machine/atomic.h>
37 #include <machine/cpufunc.h>
38 #include <machine/sysarch.h>
41 * Executing statements with interrupts disabled.
44 #if defined(_KERNEL) && !defined(SMP)
45 #define WITHOUT_INTERRUPTS(s) do { \
48 regs = intr_disable(); \
52 #endif /* _KERNEL && !SMP */
57 * It turns out __sync_synchronize() does not emit any code when used
58 * with GCC 4.2. Implement our own version that does work reliably.
60 * Although __sync_lock_test_and_set() should only perform an acquire
61 * barrier, make it do a full barrier like the other functions. This
62 * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
65 #if defined(_KERNEL) && !defined(SMP)
70 __asm volatile ("" : : : "memory");
82 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
85 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
86 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
87 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
88 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
89 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
90 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
91 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
92 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
93 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
94 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
95 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
96 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
97 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
98 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
99 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
100 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
101 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
102 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
103 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
104 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
105 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
113 /* Implementations for old GCC versions, lacking support for atomics. */
121 * Given a memory address pointing to an 8-bit or 16-bit integer, return
122 * the address of the 32-bit word containing it.
125 static inline uint32_t *
126 round_to_word(void *ptr)
129 return ((uint32_t *)((intptr_t)ptr & ~3));
133 * Utility functions for loading and storing 8-bit and 16-bit integers
134 * in 32-bit words at an offset corresponding with the location of the
139 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
143 offset = (intptr_t)offset_ptr & 3;
147 static inline uint8_t
148 get_1(const reg_t *r, const uint8_t *offset_ptr)
152 offset = (intptr_t)offset_ptr & 3;
153 return (r->v8[offset]);
157 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
165 offset = (intptr_t)offset_ptr & 3;
167 r->v8[offset] = bytes.out[0];
168 r->v8[offset + 1] = bytes.out[1];
171 static inline uint16_t
172 get_2(const reg_t *r, const uint16_t *offset_ptr)
180 offset = (intptr_t)offset_ptr & 3;
181 bytes.in[0] = r->v8[offset];
182 bytes.in[1] = r->v8[offset + 1];
187 * 8-bit and 16-bit routines.
189 * These operations are not natively supported by the CPU, so we use
190 * some shifting and bitmasking on top of the 32-bit instructions.
193 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t) \
195 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val) \
198 reg_t val32, negmask, old; \
199 uint32_t temp1, temp2; \
201 mem32 = round_to_word(mem); \
202 val32.v32 = 0x00000000; \
203 put_##N(&val32, mem, val); \
204 negmask.v32 = 0xffffffff; \
205 put_##N(&negmask, mem, 0); \
210 "\tldrex %0, %6\n" /* Load old value. */ \
211 "\tand %2, %5, %0\n" /* Remove the old value. */ \
212 "\torr %2, %2, %4\n" /* Put in the new value. */ \
213 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
214 "\tcmp %3, #0\n" /* Did it succeed? */ \
215 "\tbne 1b\n" /* Spin if failed. */ \
216 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
218 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32)); \
219 return (get_##N(&old, mem)); \
222 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
223 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
225 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t) \
227 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected, \
231 reg_t expected32, desired32, posmask, old; \
232 uint32_t negmask, temp1, temp2; \
234 mem32 = round_to_word(mem); \
235 expected32.v32 = 0x00000000; \
236 put_##N(&expected32, mem, expected); \
237 desired32.v32 = 0x00000000; \
238 put_##N(&desired32, mem, desired); \
239 posmask.v32 = 0x00000000; \
240 put_##N(&posmask, mem, ~0); \
241 negmask = ~posmask.v32; \
246 "\tldrex %0, %8\n" /* Load old value. */ \
247 "\tand %2, %6, %0\n" /* Isolate the old value. */ \
248 "\tcmp %2, %4\n" /* Compare to expected value. */\
249 "\tbne 2f\n" /* Values are unequal. */ \
250 "\tand %2, %7, %0\n" /* Remove the old value. */ \
251 "\torr %2, %5\n" /* Put in the new value. */ \
252 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
253 "\tcmp %3, #0\n" /* Did it succeed? */ \
254 "\tbne 1b\n" /* Spin if failed. */ \
256 : "=&r" (old), "=m" (*mem32), "=&r" (temp1), \
258 : "r" (expected32.v32), "r" (desired32.v32), \
259 "r" (posmask.v32), "r" (negmask), "m" (*mem32)); \
260 return (get_##N(&old, mem)); \
263 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
264 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
266 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op) \
268 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
271 reg_t val32, posmask, old; \
272 uint32_t negmask, temp1, temp2; \
274 mem32 = round_to_word(mem); \
275 val32.v32 = 0x00000000; \
276 put_##N(&val32, mem, val); \
277 posmask.v32 = 0x00000000; \
278 put_##N(&posmask, mem, ~0); \
279 negmask = ~posmask.v32; \
284 "\tldrex %0, %7\n" /* Load old value. */ \
285 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
286 "\tand %2, %5\n" /* Isolate the new value. */ \
287 "\tand %3, %6, %0\n" /* Remove the old value. */ \
288 "\torr %2, %2, %3\n" /* Put in the new value. */ \
289 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
290 "\tcmp %3, #0\n" /* Did it succeed? */ \
291 "\tbne 1b\n" /* Spin if failed. */ \
292 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
294 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask), \
296 return (get_##N(&old, mem)); \
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
300 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
301 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
302 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
304 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence) \
306 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val) \
310 uint32_t temp1, temp2; \
312 mem32 = round_to_word(mem); \
313 val32.v32 = idempotence ? 0xffffffff : 0x00000000; \
314 put_##N(&val32, mem, val); \
319 "\tldrex %0, %5\n" /* Load old value. */ \
320 "\t"op" %2, %4, %0\n" /* Calculate new value. */ \
321 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
322 "\tcmp %3, #0\n" /* Did it succeed? */ \
323 "\tbne 1b\n" /* Spin if failed. */ \
324 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1), \
326 : "r" (val32.v32), "m" (*mem32)); \
327 return (get_##N(&old, mem)); \
330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
331 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
332 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
334 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
335 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
342 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
349 "\tldrex %0, %4\n" /* Load old value. */
350 "\tstrex %2, %3, %1\n" /* Attempt to store. */
351 "\tcmp %2, #0\n" /* Did it succeed? */
352 "\tbne 1b\n" /* Spin if failed. */
353 : "=&r" (old), "=m" (*mem), "=&r" (temp)
354 : "r" (val), "m" (*mem));
359 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
367 "\tldrex %0, %5\n" /* Load old value. */
368 "\tcmp %0, %3\n" /* Compare to expected value. */
369 "\tbne 2f\n" /* Values are unequal. */
370 "\tstrex %2, %4, %1\n" /* Attempt to store. */
371 "\tcmp %2, #0\n" /* Did it succeed? */
372 "\tbne 1b\n" /* Spin if failed. */
374 : "=&r" (old), "=m" (*mem), "=&r" (temp)
375 : "r" (expected), "r" (desired), "m" (*mem));
379 #define EMIT_FETCH_AND_OP_4(name, op) \
381 __sync_##name##_4##_c(uint32_t *mem, uint32_t val) \
383 uint32_t old, temp1, temp2; \
388 "\tldrex %0, %5\n" /* Load old value. */ \
389 "\t"op" %2, %0, %4\n" /* Calculate new value. */ \
390 "\tstrex %3, %2, %1\n" /* Attempt to store. */ \
391 "\tcmp %3, #0\n" /* Did it succeed? */ \
392 "\tbne 1b\n" /* Spin if failed. */ \
393 : "=&r" (old), "=m" (*mem), "=&r" (temp1), \
395 : "r" (val), "m" (*mem)); \
399 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
400 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
401 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
402 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
403 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
406 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
407 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
408 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
409 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
410 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
411 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
412 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
413 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
414 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
415 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
416 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
417 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
418 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
419 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
420 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
421 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
422 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
423 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
424 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
425 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
426 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
429 #endif /* __SYNC_ATOMICS */