]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/arm/stdatomic.c
Revert r330897:
[FreeBSD/FreeBSD.git] / sys / arm / arm / stdatomic.c
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33
34 #include <machine/atomic.h>
35 #include <machine/cpufunc.h>
36 #include <machine/sysarch.h>
37
38 /*
39  * Executing statements with interrupts disabled.
40  */
41
42 #if defined(_KERNEL) && !defined(SMP)
43 #define WITHOUT_INTERRUPTS(s) do {                                      \
44         register_t regs;                                                \
45                                                                         \
46         regs = intr_disable();                                          \
47         do s while (0);                                                 \
48         intr_restore(regs);                                             \
49 } while (0)
50 #endif /* _KERNEL && !SMP */
51
52 /*
53  * Memory barriers.
54  *
55  * It turns out __sync_synchronize() does not emit any code when used
56  * with GCC 4.2. Implement our own version that does work reliably.
57  *
58  * Although __sync_lock_test_and_set() should only perform an acquire
59  * barrier, make it do a full barrier like the other functions. This
60  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
61  */
62
63 #if defined(_KERNEL) && !defined(SMP)
64 static inline void
65 do_sync(void)
66 {
67
68         __asm volatile ("" : : : "memory");
69 }
70 #elif __ARM_ARCH >= 6
71 static inline void
72 do_sync(void)
73 {
74
75         dmb();
76 }
77 #endif
78
79 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
80
81 /*
82  * New C11 __atomic_* API.
83  */
84
85 /* ARMv6+ systems should be supported by the compiler. */
86 #if __ARM_ARCH <= 5
87
88 /* Clang doesn't allow us to reimplement builtins without this. */
89 #ifdef __clang__
90 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
91 #define __sync_synchronize __sync_synchronize_ext
92 #endif
93
94 void
95 __sync_synchronize(void)
96 {
97 }
98
99 #ifdef _KERNEL
100
101 #ifdef SMP
102 #error "On SMP systems we should have proper atomic operations."
103 #endif
104
105 /*
106  * On uniprocessor systems, we can perform the atomic operations by
107  * disabling interrupts.
108  */
109
110 #define EMIT_LOAD_N(N, uintN_t)                                         \
111 uintN_t                                                                 \
112 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
113 {                                                                       \
114         uintN_t ret;                                                    \
115                                                                         \
116         WITHOUT_INTERRUPTS({                                            \
117                 ret = *mem;                                             \
118         });                                                             \
119         return (ret);                                                   \
120 }
121
122 #define EMIT_STORE_N(N, uintN_t)                                        \
123 void                                                                    \
124 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
125 {                                                                       \
126                                                                         \
127         WITHOUT_INTERRUPTS({                                            \
128                 *mem = val;                                             \
129         });                                                             \
130 }
131
132 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                             \
133 _Bool                                                                   \
134 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,          \
135     uintN_t desired, int success __unused, int failure __unused)        \
136 {                                                                       \
137         _Bool ret;                                                      \
138                                                                         \
139         WITHOUT_INTERRUPTS({                                            \
140                 if (*mem == *expected) {                                \
141                         *mem = desired;                                 \
142                         ret = 1;                                        \
143                 } else {                                                \
144                         *expected = *mem;                               \
145                         ret = 0;                                        \
146                 }                                                       \
147         });                                                             \
148         return (ret);                                                   \
149 }
150
151 #define EMIT_FETCH_OP_N(N, uintN_t, name, op)                           \
152 uintN_t                                                                 \
153 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
154 {                                                                       \
155         uintN_t ret;                                                    \
156                                                                         \
157         WITHOUT_INTERRUPTS({                                            \
158                 ret = *mem;                                             \
159                 *mem op val;                                            \
160         });                                                             \
161         return (ret);                                                   \
162 }
163
164 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
165 EMIT_LOAD_N(N, uintN_t)                                                 \
166 EMIT_STORE_N(N, uintN_t)                                                \
167 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                                     \
168 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)                                \
169 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)                              \
170 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)                              \
171 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)                               \
172 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)                              \
173 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
174
175 EMIT_ALL_OPS_N(1, uint8_t)
176 EMIT_ALL_OPS_N(2, uint16_t)
177 EMIT_ALL_OPS_N(4, uint32_t)
178 EMIT_ALL_OPS_N(8, uint64_t)
179 #undef  EMIT_ALL_OPS_N
180
181 #else /* !_KERNEL */
182
183 /*
184  * For userspace on uniprocessor systems, we can implement the atomic
185  * operations by using a Restartable Atomic Sequence. This makes the
186  * kernel restart the code from the beginning when interrupted.
187  */
188
189 #define EMIT_LOAD_N(N, uintN_t)                                         \
190 uintN_t                                                                 \
191 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
192 {                                                                       \
193                                                                         \
194         return (*mem);                                                  \
195 }
196
197 #define EMIT_STORE_N(N, uintN_t)                                        \
198 void                                                                    \
199 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
200 {                                                                       \
201                                                                         \
202         *mem = val;                                                     \
203 }
204
205 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                           \
206 uintN_t                                                                 \
207 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)    \
208 {                                                                       \
209         uint32_t old, temp, ras_start;                                  \
210                                                                         \
211         ras_start = ARM_RAS_START;                                      \
212         __asm volatile (                                                \
213                 /* Set up Restartable Atomic Sequence. */               \
214                 "1:"                                                    \
215                 "\tadr   %2, 1b\n"                                      \
216                 "\tstr   %2, [%5]\n"                                    \
217                 "\tadr   %2, 2f\n"                                      \
218                 "\tstr   %2, [%5, #4]\n"                                \
219                                                                         \
220                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
221                 "\t"str" %3, %1\n"      /* Store new value. */          \
222                                                                         \
223                 /* Tear down Restartable Atomic Sequence. */            \
224                 "2:"                                                    \
225                 "\tmov   %2, #0x00000000\n"                             \
226                 "\tstr   %2, [%5]\n"                                    \
227                 "\tmov   %2, #0xffffffff\n"                             \
228                 "\tstr   %2, [%5, #4]\n"                                \
229                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
230                 : "r" (val), "m" (*mem), "r" (ras_start));              \
231         return (old);                                                   \
232 }
233
234 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                 \
235 _Bool                                                                   \
236 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,         \
237     uintN_t desired, int success __unused, int failure __unused)        \
238 {                                                                       \
239         uint32_t expected, old, temp, ras_start;                        \
240                                                                         \
241         expected = *pexpected;                                          \
242         ras_start = ARM_RAS_START;                                      \
243         __asm volatile (                                                \
244                 /* Set up Restartable Atomic Sequence. */               \
245                 "1:"                                                    \
246                 "\tadr   %2, 1b\n"                                      \
247                 "\tstr   %2, [%6]\n"                                    \
248                 "\tadr   %2, 2f\n"                                      \
249                 "\tstr   %2, [%6, #4]\n"                                \
250                                                                         \
251                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
252                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
253                 "\t"streq" %4, %1\n"    /* Store new value. */          \
254                                                                         \
255                 /* Tear down Restartable Atomic Sequence. */            \
256                 "2:"                                                    \
257                 "\tmov   %2, #0x00000000\n"                             \
258                 "\tstr   %2, [%6]\n"                                    \
259                 "\tmov   %2, #0xffffffff\n"                             \
260                 "\tstr   %2, [%6, #4]\n"                                \
261                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
262                 : "r" (expected), "r" (desired), "m" (*mem),            \
263                   "r" (ras_start));                                     \
264         if (old == expected) {                                          \
265                 return (1);                                             \
266         } else {                                                        \
267                 *pexpected = old;                                       \
268                 return (0);                                             \
269         }                                                               \
270 }
271
272 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret)            \
273 uintN_t                                                                 \
274 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
275 {                                                                       \
276         uint32_t old, new, ras_start;                                   \
277                                                                         \
278         ras_start = ARM_RAS_START;                                      \
279         __asm volatile (                                                \
280                 /* Set up Restartable Atomic Sequence. */               \
281                 "1:"                                                    \
282                 "\tadr   %2, 1b\n"                                      \
283                 "\tstr   %2, [%5]\n"                                    \
284                 "\tadr   %2, 2f\n"                                      \
285                 "\tstr   %2, [%5, #4]\n"                                \
286                                                                         \
287                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
288                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
289                 "\t"str" %2, %1\n"      /* Store new value. */          \
290                                                                         \
291                 /* Tear down Restartable Atomic Sequence. */            \
292                 "2:"                                                    \
293                 "\tmov   %2, #0x00000000\n"                             \
294                 "\tstr   %2, [%5]\n"                                    \
295                 "\tmov   %2, #0xffffffff\n"                             \
296                 "\tstr   %2, [%5, #4]\n"                                \
297                 : "=&r" (old), "=m" (*mem), "=&r" (new)                 \
298                 : "r" (val), "m" (*mem), "r" (ras_start));              \
299         return (ret);                                                   \
300 }
301
302 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
303 EMIT_LOAD_N(N, uintN_t)                                                 \
304 EMIT_STORE_N(N, uintN_t)                                                \
305 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                                   \
306 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                         \
307 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old)            \
308 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old)            \
309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or,  "orr", old)            \
310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old)            \
311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old)            \
312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new)            \
313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new)            \
314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch,  "orr", new)            \
315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new)            \
316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new)
317
318 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
319 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
320 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
321 #undef  EMIT_ALL_OPS_N
322
323 #endif /* _KERNEL */
324
325 #endif /* __ARM_ARCH */
326
327 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
328
329 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
330
331 #ifdef __clang__
332 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
333 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
334 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
335 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
336 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
337 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
338 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
339 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
340 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
341 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
342 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
343 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
344 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
345 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
346 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
347 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
348 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
349 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
350 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
351 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
352 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
353 #endif
354
355 /*
356  * Old __sync_* API.
357  */
358
359 #if __ARM_ARCH >= 6
360
361 /* Implementations for old GCC versions, lacking support for atomics. */
362
363 typedef union {
364         uint8_t         v8[4];
365         uint32_t        v32;
366 } reg_t;
367
368 /*
369  * Given a memory address pointing to an 8-bit or 16-bit integer, return
370  * the address of the 32-bit word containing it.
371  */
372
373 static inline uint32_t *
374 round_to_word(void *ptr)
375 {
376
377         return ((uint32_t *)((intptr_t)ptr & ~3));
378 }
379
380 /*
381  * Utility functions for loading and storing 8-bit and 16-bit integers
382  * in 32-bit words at an offset corresponding with the location of the
383  * atomic variable.
384  */
385
386 static inline void
387 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
388 {
389         size_t offset;
390
391         offset = (intptr_t)offset_ptr & 3;
392         r->v8[offset] = val;
393 }
394
395 static inline uint8_t
396 get_1(const reg_t *r, const uint8_t *offset_ptr)
397 {
398         size_t offset;
399
400         offset = (intptr_t)offset_ptr & 3;
401         return (r->v8[offset]);
402 }
403
404 static inline void
405 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
406 {
407         size_t offset;
408         union {
409                 uint16_t in;
410                 uint8_t out[2];
411         } bytes;
412
413         offset = (intptr_t)offset_ptr & 3;
414         bytes.in = val;
415         r->v8[offset] = bytes.out[0];
416         r->v8[offset + 1] = bytes.out[1];
417 }
418
419 static inline uint16_t
420 get_2(const reg_t *r, const uint16_t *offset_ptr)
421 {
422         size_t offset;
423         union {
424                 uint8_t in[2];
425                 uint16_t out;
426         } bytes;
427
428         offset = (intptr_t)offset_ptr & 3;
429         bytes.in[0] = r->v8[offset];
430         bytes.in[1] = r->v8[offset + 1];
431         return (bytes.out);
432 }
433
434 /*
435  * 8-bit and 16-bit routines.
436  *
437  * These operations are not natively supported by the CPU, so we use
438  * some shifting and bitmasking on top of the 32-bit instructions.
439  */
440
441 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)                            \
442 uintN_t                                                                 \
443 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
444 {                                                                       \
445         uint32_t *mem32;                                                \
446         reg_t val32, negmask, old;                                      \
447         uint32_t temp1, temp2;                                          \
448                                                                         \
449         mem32 = round_to_word(mem);                                     \
450         val32.v32 = 0x00000000;                                         \
451         put_##N(&val32, mem, val);                                      \
452         negmask.v32 = 0xffffffff;                                       \
453         put_##N(&negmask, mem, 0);                                      \
454                                                                         \
455         do_sync();                                                      \
456         __asm volatile (                                                \
457                 "1:"                                                    \
458                 "\tldrex %0, %6\n"      /* Load old value. */           \
459                 "\tand   %2, %5, %0\n"  /* Remove the old value. */     \
460                 "\torr   %2, %2, %4\n"  /* Put in the new value. */     \
461                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
462                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
463                 "\tbne   1b\n"          /* Spin if failed. */           \
464                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
465                   "=&r" (temp2)                                         \
466                 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));    \
467         return (get_##N(&old, mem));                                    \
468 }
469
470 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
471 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
472
473 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
474 uintN_t                                                                 \
475 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
476     uintN_t desired)                                                    \
477 {                                                                       \
478         uint32_t *mem32;                                                \
479         reg_t expected32, desired32, posmask, old;                      \
480         uint32_t negmask, temp1, temp2;                                 \
481                                                                         \
482         mem32 = round_to_word(mem);                                     \
483         expected32.v32 = 0x00000000;                                    \
484         put_##N(&expected32, mem, expected);                            \
485         desired32.v32 = 0x00000000;                                     \
486         put_##N(&desired32, mem, desired);                              \
487         posmask.v32 = 0x00000000;                                       \
488         put_##N(&posmask, mem, ~0);                                     \
489         negmask = ~posmask.v32;                                         \
490                                                                         \
491         do_sync();                                                      \
492         __asm volatile (                                                \
493                 "1:"                                                    \
494                 "\tldrex %0, %8\n"      /* Load old value. */           \
495                 "\tand   %2, %6, %0\n"  /* Isolate the old value. */    \
496                 "\tcmp   %2, %4\n"      /* Compare to expected value. */\
497                 "\tbne   2f\n"          /* Values are unequal. */       \
498                 "\tand   %2, %7, %0\n"  /* Remove the old value. */     \
499                 "\torr   %2, %5\n"      /* Put in the new value. */     \
500                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
501                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
502                 "\tbne   1b\n"          /* Spin if failed. */           \
503                 "2:"                                                    \
504                 : "=&r" (old), "=m" (*mem32), "=&r" (temp1),            \
505                   "=&r" (temp2)                                         \
506                 : "r" (expected32.v32), "r" (desired32.v32),            \
507                   "r" (posmask.v32), "r" (negmask), "m" (*mem32));      \
508         return (get_##N(&old, mem));                                    \
509 }
510
511 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
512 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
513
514 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)            \
515 uintN_t                                                                 \
516 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
517 {                                                                       \
518         uint32_t *mem32;                                                \
519         reg_t val32, posmask, old;                                      \
520         uint32_t negmask, temp1, temp2;                                 \
521                                                                         \
522         mem32 = round_to_word(mem);                                     \
523         val32.v32 = 0x00000000;                                         \
524         put_##N(&val32, mem, val);                                      \
525         posmask.v32 = 0x00000000;                                       \
526         put_##N(&posmask, mem, ~0);                                     \
527         negmask = ~posmask.v32;                                         \
528                                                                         \
529         do_sync();                                                      \
530         __asm volatile (                                                \
531                 "1:"                                                    \
532                 "\tldrex %0, %7\n"      /* Load old value. */           \
533                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
534                 "\tand   %2, %5\n"      /* Isolate the new value. */    \
535                 "\tand   %3, %6, %0\n"  /* Remove the old value. */     \
536                 "\torr   %2, %2, %3\n"  /* Put in the new value. */     \
537                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
538                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
539                 "\tbne   1b\n"          /* Spin if failed. */           \
540                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
541                   "=&r" (temp2)                                         \
542                 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask),    \
543                   "m" (*mem32));                                        \
544         return (get_##N(&old, mem));                                    \
545 }
546
547 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
548 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
549 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
550 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
551
552 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)  \
553 uintN_t                                                                 \
554 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
555 {                                                                       \
556         uint32_t *mem32;                                                \
557         reg_t val32, old;                                               \
558         uint32_t temp1, temp2;                                          \
559                                                                         \
560         mem32 = round_to_word(mem);                                     \
561         val32.v32 = idempotence ? 0xffffffff : 0x00000000;              \
562         put_##N(&val32, mem, val);                                      \
563                                                                         \
564         do_sync();                                                      \
565         __asm volatile (                                                \
566                 "1:"                                                    \
567                 "\tldrex %0, %5\n"      /* Load old value. */           \
568                 "\t"op"  %2, %4, %0\n"  /* Calculate new value. */      \
569                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
570                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
571                 "\tbne   1b\n"          /* Spin if failed. */           \
572                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
573                   "=&r" (temp2)                                         \
574                 : "r" (val32.v32), "m" (*mem32));                       \
575         return (get_##N(&old, mem));                                    \
576 }
577
578 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
579 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
581 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
582 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
584
585 /*
586  * 32-bit routines.
587  */
588
589 uint32_t
590 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
591 {
592         uint32_t old, temp;
593
594         do_sync();
595         __asm volatile (
596                 "1:"
597                 "\tldrex %0, %4\n"      /* Load old value. */
598                 "\tstrex %2, %3, %1\n"  /* Attempt to store. */
599                 "\tcmp   %2, #0\n"      /* Did it succeed? */
600                 "\tbne   1b\n"          /* Spin if failed. */
601                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
602                 : "r" (val), "m" (*mem));
603         return (old);
604 }
605
606 uint32_t
607 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
608     uint32_t desired)
609 {
610         uint32_t old, temp;
611
612         do_sync();
613         __asm volatile (
614                 "1:"
615                 "\tldrex %0, %5\n"      /* Load old value. */
616                 "\tcmp   %0, %3\n"      /* Compare to expected value. */
617                 "\tbne   2f\n"          /* Values are unequal. */
618                 "\tstrex %2, %4, %1\n"  /* Attempt to store. */
619                 "\tcmp   %2, #0\n"      /* Did it succeed? */
620                 "\tbne   1b\n"          /* Spin if failed. */
621                 "2:"
622                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
623                 : "r" (expected), "r" (desired), "m" (*mem));
624         return (old);
625 }
626
627 #define EMIT_FETCH_AND_OP_4(name, op)                                   \
628 uint32_t                                                                \
629 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)                              \
630 {                                                                       \
631         uint32_t old, temp1, temp2;                                     \
632                                                                         \
633         do_sync();                                                      \
634         __asm volatile (                                                \
635                 "1:"                                                    \
636                 "\tldrex %0, %5\n"      /* Load old value. */           \
637                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
638                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
639                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
640                 "\tbne   1b\n"          /* Spin if failed. */           \
641                 : "=&r" (old), "=m" (*mem), "=&r" (temp1),              \
642                   "=&r" (temp2)                                         \
643                 : "r" (val), "m" (*mem));                               \
644         return (old);                                                   \
645 }
646
647 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
648 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
649 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
650 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
651 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
652
653 #ifndef __clang__
654 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
655 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
656 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
657 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
658 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
659 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
660 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
661 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
662 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
663 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
664 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
665 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
666 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
667 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
668 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
669 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
670 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
671 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
672 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
673 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
674 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
675 #endif
676
677 #else /* __ARM_ARCH < 6 */
678
679 #ifdef _KERNEL
680
681 #ifdef SMP
682 #error "On SMP systems we should have proper atomic operations."
683 #endif
684
685 /*
686  * On uniprocessor systems, we can perform the atomic operations by
687  * disabling interrupts.
688  */
689
690 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
691 uintN_t                                                                 \
692 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,         \
693     uintN_t desired)                                                    \
694 {                                                                       \
695         uintN_t ret;                                                    \
696                                                                         \
697         WITHOUT_INTERRUPTS({                                            \
698                 ret = *mem;                                             \
699                 if (*mem == expected)                                   \
700                         *mem = desired;                                 \
701         });                                                             \
702         return (ret);                                                   \
703 }
704
705 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)                       \
706 uintN_t                                                                 \
707 __sync_##name##_##N(uintN_t *mem, uintN_t val)                          \
708 {                                                                       \
709         uintN_t ret;                                                    \
710                                                                         \
711         WITHOUT_INTERRUPTS({                                            \
712                 ret = *mem;                                             \
713                 *mem op val;                                            \
714         });                                                             \
715         return (ret);                                                   \
716 }
717
718 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
719 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                                 \
720 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)                   \
721 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)                      \
722 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)                      \
723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)                       \
724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)                      \
725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
726
727 EMIT_ALL_OPS_N(1, uint8_t)
728 EMIT_ALL_OPS_N(2, uint16_t)
729 EMIT_ALL_OPS_N(4, uint32_t)
730 EMIT_ALL_OPS_N(8, uint64_t)
731 #undef  EMIT_ALL_OPS_N
732
733 #else /* !_KERNEL */
734
735 /*
736  * For userspace on uniprocessor systems, we can implement the atomic
737  * operations by using a Restartable Atomic Sequence. This makes the
738  * kernel restart the code from the beginning when interrupted.
739  */
740
741 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                  \
742 uintN_t                                                                 \
743 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
744 {                                                                       \
745         uint32_t old, temp, ras_start;                                  \
746                                                                         \
747         ras_start = ARM_RAS_START;                                      \
748         __asm volatile (                                                \
749                 /* Set up Restartable Atomic Sequence. */               \
750                 "1:"                                                    \
751                 "\tadr   %2, 1b\n"                                      \
752                 "\tstr   %2, [%5]\n"                                    \
753                 "\tadr   %2, 2f\n"                                      \
754                 "\tstr   %2, [%5, #4]\n"                                \
755                                                                         \
756                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
757                 "\t"str" %3, %1\n"      /* Store new value. */          \
758                                                                         \
759                 /* Tear down Restartable Atomic Sequence. */            \
760                 "2:"                                                    \
761                 "\tmov   %2, #0x00000000\n"                             \
762                 "\tstr   %2, [%5]\n"                                    \
763                 "\tmov   %2, #0xffffffff\n"                             \
764                 "\tstr   %2, [%5, #4]\n"                                \
765                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
766                 : "r" (val), "m" (*mem), "r" (ras_start));              \
767         return (old);                                                   \
768 }
769
770 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)             \
771 uintN_t                                                                 \
772 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
773     uintN_t desired)                                                    \
774 {                                                                       \
775         uint32_t old, temp, ras_start;                                  \
776                                                                         \
777         ras_start = ARM_RAS_START;                                      \
778         __asm volatile (                                                \
779                 /* Set up Restartable Atomic Sequence. */               \
780                 "1:"                                                    \
781                 "\tadr   %2, 1b\n"                                      \
782                 "\tstr   %2, [%6]\n"                                    \
783                 "\tadr   %2, 2f\n"                                      \
784                 "\tstr   %2, [%6, #4]\n"                                \
785                                                                         \
786                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
787                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
788                 "\t"streq" %4, %1\n"    /* Store new value. */          \
789                                                                         \
790                 /* Tear down Restartable Atomic Sequence. */            \
791                 "2:"                                                    \
792                 "\tmov   %2, #0x00000000\n"                             \
793                 "\tstr   %2, [%6]\n"                                    \
794                 "\tmov   %2, #0xffffffff\n"                             \
795                 "\tstr   %2, [%6, #4]\n"                                \
796                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
797                 : "r" (expected), "r" (desired), "m" (*mem),            \
798                   "r" (ras_start));                                     \
799         return (old);                                                   \
800 }
801
802 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)             \
803 uintN_t                                                                 \
804 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
805 {                                                                       \
806         uint32_t old, temp, ras_start;                                  \
807                                                                         \
808         ras_start = ARM_RAS_START;                                      \
809         __asm volatile (                                                \
810                 /* Set up Restartable Atomic Sequence. */               \
811                 "1:"                                                    \
812                 "\tadr   %2, 1b\n"                                      \
813                 "\tstr   %2, [%5]\n"                                    \
814                 "\tadr   %2, 2f\n"                                      \
815                 "\tstr   %2, [%5, #4]\n"                                \
816                                                                         \
817                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
818                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
819                 "\t"str" %2, %1\n"      /* Store new value. */          \
820                                                                         \
821                 /* Tear down Restartable Atomic Sequence. */            \
822                 "2:"                                                    \
823                 "\tmov   %2, #0x00000000\n"                             \
824                 "\tstr   %2, [%5]\n"                                    \
825                 "\tmov   %2, #0xffffffff\n"                             \
826                 "\tstr   %2, [%5, #4]\n"                                \
827                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
828                 : "r" (val), "m" (*mem), "r" (ras_start));              \
829         return (old);                                                   \
830 }
831
832 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
833 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                          \
834 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)                     \
835 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")         \
836 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")         \
837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")          \
838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")         \
839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
840
841 #ifdef __clang__
842 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
843 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
844 #else
845 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
846 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
847 #endif
848 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
849
850 #ifndef __clang__
851 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
852 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
853 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
854 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
855 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
856 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
857 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
858 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
859 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
860 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
861 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
862 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
863 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
864 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
865 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
866 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
867 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
868 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
869 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
870 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
871 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
872 #endif /* __ARM_ARCH */
873
874 #endif /* _KERNEL */
875
876 #endif
877
878 #endif /* __SYNC_ATOMICS */