]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/arm/stdatomic.c
arm: remove interrupt nesting by ipi_preempt()/ipi_hardclock()
[FreeBSD/FreeBSD.git] / sys / arm / arm / stdatomic.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/stdatomic.h>
34 #include <sys/types.h>
35
36 #include <machine/atomic.h>
37 #include <machine/cpufunc.h>
38 #include <machine/sysarch.h>
39
40 /*
41  * Executing statements with interrupts disabled.
42  */
43
44 #if defined(_KERNEL) && !defined(SMP)
45 #define WITHOUT_INTERRUPTS(s) do {                                      \
46         register_t regs;                                                \
47                                                                         \
48         regs = intr_disable();                                          \
49         do s while (0);                                                 \
50         intr_restore(regs);                                             \
51 } while (0)
52 #endif /* _KERNEL && !SMP */
53
54 /*
55  * Memory barriers.
56  *
57  * It turns out __sync_synchronize() does not emit any code when used
58  * with GCC 4.2. Implement our own version that does work reliably.
59  *
60  * Although __sync_lock_test_and_set() should only perform an acquire
61  * barrier, make it do a full barrier like the other functions. This
62  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
63  */
64
65 #if defined(_KERNEL) && !defined(SMP)
66 static inline void
67 do_sync(void)
68 {
69
70         __asm volatile ("" : : : "memory");
71 }
72 #else
73 static inline void
74 do_sync(void)
75 {
76
77         dmb();
78 }
79 #endif
80
81
82 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
83
84 #ifdef __clang__
85 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
86 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
87 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
88 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
89 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
90 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
91 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
92 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
93 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
94 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
95 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
96 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
97 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
98 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
99 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
100 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
101 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
102 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
103 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
104 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
105 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
106 #endif
107
108 /*
109  * Old __sync_* API.
110  */
111
112
113 /* Implementations for old GCC versions, lacking support for atomics. */
114
115 typedef union {
116         uint8_t         v8[4];
117         uint32_t        v32;
118 } reg_t;
119
120 /*
121  * Given a memory address pointing to an 8-bit or 16-bit integer, return
122  * the address of the 32-bit word containing it.
123  */
124
125 static inline uint32_t *
126 round_to_word(void *ptr)
127 {
128
129         return ((uint32_t *)((intptr_t)ptr & ~3));
130 }
131
132 /*
133  * Utility functions for loading and storing 8-bit and 16-bit integers
134  * in 32-bit words at an offset corresponding with the location of the
135  * atomic variable.
136  */
137
138 static inline void
139 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
140 {
141         size_t offset;
142
143         offset = (intptr_t)offset_ptr & 3;
144         r->v8[offset] = val;
145 }
146
147 static inline uint8_t
148 get_1(const reg_t *r, const uint8_t *offset_ptr)
149 {
150         size_t offset;
151
152         offset = (intptr_t)offset_ptr & 3;
153         return (r->v8[offset]);
154 }
155
156 static inline void
157 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
158 {
159         size_t offset;
160         union {
161                 uint16_t in;
162                 uint8_t out[2];
163         } bytes;
164
165         offset = (intptr_t)offset_ptr & 3;
166         bytes.in = val;
167         r->v8[offset] = bytes.out[0];
168         r->v8[offset + 1] = bytes.out[1];
169 }
170
171 static inline uint16_t
172 get_2(const reg_t *r, const uint16_t *offset_ptr)
173 {
174         size_t offset;
175         union {
176                 uint8_t in[2];
177                 uint16_t out;
178         } bytes;
179
180         offset = (intptr_t)offset_ptr & 3;
181         bytes.in[0] = r->v8[offset];
182         bytes.in[1] = r->v8[offset + 1];
183         return (bytes.out);
184 }
185
186 /*
187  * 8-bit and 16-bit routines.
188  *
189  * These operations are not natively supported by the CPU, so we use
190  * some shifting and bitmasking on top of the 32-bit instructions.
191  */
192
193 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)                            \
194 uintN_t                                                                 \
195 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
196 {                                                                       \
197         uint32_t *mem32;                                                \
198         reg_t val32, negmask, old;                                      \
199         uint32_t temp1, temp2;                                          \
200                                                                         \
201         mem32 = round_to_word(mem);                                     \
202         val32.v32 = 0x00000000;                                         \
203         put_##N(&val32, mem, val);                                      \
204         negmask.v32 = 0xffffffff;                                       \
205         put_##N(&negmask, mem, 0);                                      \
206                                                                         \
207         do_sync();                                                      \
208         __asm volatile (                                                \
209                 "1:"                                                    \
210                 "\tldrex %0, %6\n"      /* Load old value. */           \
211                 "\tand   %2, %5, %0\n"  /* Remove the old value. */     \
212                 "\torr   %2, %2, %4\n"  /* Put in the new value. */     \
213                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
214                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
215                 "\tbne   1b\n"          /* Spin if failed. */           \
216                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
217                   "=&r" (temp2)                                         \
218                 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));    \
219         return (get_##N(&old, mem));                                    \
220 }
221
222 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
223 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
224
225 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
226 uintN_t                                                                 \
227 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
228     uintN_t desired)                                                    \
229 {                                                                       \
230         uint32_t *mem32;                                                \
231         reg_t expected32, desired32, posmask, old;                      \
232         uint32_t negmask, temp1, temp2;                                 \
233                                                                         \
234         mem32 = round_to_word(mem);                                     \
235         expected32.v32 = 0x00000000;                                    \
236         put_##N(&expected32, mem, expected);                            \
237         desired32.v32 = 0x00000000;                                     \
238         put_##N(&desired32, mem, desired);                              \
239         posmask.v32 = 0x00000000;                                       \
240         put_##N(&posmask, mem, ~0);                                     \
241         negmask = ~posmask.v32;                                         \
242                                                                         \
243         do_sync();                                                      \
244         __asm volatile (                                                \
245                 "1:"                                                    \
246                 "\tldrex %0, %8\n"      /* Load old value. */           \
247                 "\tand   %2, %6, %0\n"  /* Isolate the old value. */    \
248                 "\tcmp   %2, %4\n"      /* Compare to expected value. */\
249                 "\tbne   2f\n"          /* Values are unequal. */       \
250                 "\tand   %2, %7, %0\n"  /* Remove the old value. */     \
251                 "\torr   %2, %5\n"      /* Put in the new value. */     \
252                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
253                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
254                 "\tbne   1b\n"          /* Spin if failed. */           \
255                 "2:"                                                    \
256                 : "=&r" (old), "=m" (*mem32), "=&r" (temp1),            \
257                   "=&r" (temp2)                                         \
258                 : "r" (expected32.v32), "r" (desired32.v32),            \
259                   "r" (posmask.v32), "r" (negmask), "m" (*mem32));      \
260         return (get_##N(&old, mem));                                    \
261 }
262
263 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
264 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
265
266 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)            \
267 uintN_t                                                                 \
268 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
269 {                                                                       \
270         uint32_t *mem32;                                                \
271         reg_t val32, posmask, old;                                      \
272         uint32_t negmask, temp1, temp2;                                 \
273                                                                         \
274         mem32 = round_to_word(mem);                                     \
275         val32.v32 = 0x00000000;                                         \
276         put_##N(&val32, mem, val);                                      \
277         posmask.v32 = 0x00000000;                                       \
278         put_##N(&posmask, mem, ~0);                                     \
279         negmask = ~posmask.v32;                                         \
280                                                                         \
281         do_sync();                                                      \
282         __asm volatile (                                                \
283                 "1:"                                                    \
284                 "\tldrex %0, %7\n"      /* Load old value. */           \
285                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
286                 "\tand   %2, %5\n"      /* Isolate the new value. */    \
287                 "\tand   %3, %6, %0\n"  /* Remove the old value. */     \
288                 "\torr   %2, %2, %3\n"  /* Put in the new value. */     \
289                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
290                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
291                 "\tbne   1b\n"          /* Spin if failed. */           \
292                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
293                   "=&r" (temp2)                                         \
294                 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask),    \
295                   "m" (*mem32));                                        \
296         return (get_##N(&old, mem));                                    \
297 }
298
299 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
300 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
301 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
302 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
303
304 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)  \
305 uintN_t                                                                 \
306 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
307 {                                                                       \
308         uint32_t *mem32;                                                \
309         reg_t val32, old;                                               \
310         uint32_t temp1, temp2;                                          \
311                                                                         \
312         mem32 = round_to_word(mem);                                     \
313         val32.v32 = idempotence ? 0xffffffff : 0x00000000;              \
314         put_##N(&val32, mem, val);                                      \
315                                                                         \
316         do_sync();                                                      \
317         __asm volatile (                                                \
318                 "1:"                                                    \
319                 "\tldrex %0, %5\n"      /* Load old value. */           \
320                 "\t"op"  %2, %4, %0\n"  /* Calculate new value. */      \
321                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
322                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
323                 "\tbne   1b\n"          /* Spin if failed. */           \
324                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
325                   "=&r" (temp2)                                         \
326                 : "r" (val32.v32), "m" (*mem32));                       \
327         return (get_##N(&old, mem));                                    \
328 }
329
330 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
331 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
332 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
333 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
334 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
335 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
336
337 /*
338  * 32-bit routines.
339  */
340
341 uint32_t
342 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
343 {
344         uint32_t old, temp;
345
346         do_sync();
347         __asm volatile (
348                 "1:"
349                 "\tldrex %0, %4\n"      /* Load old value. */
350                 "\tstrex %2, %3, %1\n"  /* Attempt to store. */
351                 "\tcmp   %2, #0\n"      /* Did it succeed? */
352                 "\tbne   1b\n"          /* Spin if failed. */
353                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
354                 : "r" (val), "m" (*mem));
355         return (old);
356 }
357
358 uint32_t
359 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
360     uint32_t desired)
361 {
362         uint32_t old, temp;
363
364         do_sync();
365         __asm volatile (
366                 "1:"
367                 "\tldrex %0, %5\n"      /* Load old value. */
368                 "\tcmp   %0, %3\n"      /* Compare to expected value. */
369                 "\tbne   2f\n"          /* Values are unequal. */
370                 "\tstrex %2, %4, %1\n"  /* Attempt to store. */
371                 "\tcmp   %2, #0\n"      /* Did it succeed? */
372                 "\tbne   1b\n"          /* Spin if failed. */
373                 "2:"
374                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
375                 : "r" (expected), "r" (desired), "m" (*mem));
376         return (old);
377 }
378
379 #define EMIT_FETCH_AND_OP_4(name, op)                                   \
380 uint32_t                                                                \
381 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)                              \
382 {                                                                       \
383         uint32_t old, temp1, temp2;                                     \
384                                                                         \
385         do_sync();                                                      \
386         __asm volatile (                                                \
387                 "1:"                                                    \
388                 "\tldrex %0, %5\n"      /* Load old value. */           \
389                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
390                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
391                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
392                 "\tbne   1b\n"          /* Spin if failed. */           \
393                 : "=&r" (old), "=m" (*mem), "=&r" (temp1),              \
394                   "=&r" (temp2)                                         \
395                 : "r" (val), "m" (*mem));                               \
396         return (old);                                                   \
397 }
398
399 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
400 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
401 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
402 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
403 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
404
405 #ifndef __clang__
406 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
407 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
408 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
409 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
410 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
411 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
412 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
413 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
414 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
415 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
416 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
417 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
418 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
419 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
420 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
421 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
422 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
423 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
424 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
425 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
426 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
427 #endif
428
429 #endif /* __SYNC_ATOMICS */