]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/arm/stdatomic.c
MFV r298178:
[FreeBSD/FreeBSD.git] / sys / arm / arm / stdatomic.c
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33
34 #include <machine/acle-compat.h>
35 #include <machine/atomic.h>
36 #include <machine/cpufunc.h>
37 #include <machine/sysarch.h>
38
39 /*
40  * Executing statements with interrupts disabled.
41  */
42
43 #if defined(_KERNEL) && !defined(SMP)
44 #define WITHOUT_INTERRUPTS(s) do {                                      \
45         register_t regs;                                                \
46                                                                         \
47         regs = intr_disable();                                          \
48         do s while (0);                                                 \
49         intr_restore(regs);                                             \
50 } while (0)
51 #endif /* _KERNEL && !SMP */
52
53 /*
54  * Memory barriers.
55  *
56  * It turns out __sync_synchronize() does not emit any code when used
57  * with GCC 4.2. Implement our own version that does work reliably.
58  *
59  * Although __sync_lock_test_and_set() should only perform an acquire
60  * barrier, make it do a full barrier like the other functions. This
61  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
62  */
63
64 #if defined(_KERNEL) && !defined(SMP)
65 static inline void
66 do_sync(void)
67 {
68
69         __asm volatile ("" : : : "memory");
70 }
71 #elif __ARM_ARCH >= 6
72 static inline void
73 do_sync(void)
74 {
75
76         dmb();
77 }
78 #endif
79
80 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
81
82 /*
83  * New C11 __atomic_* API.
84  */
85
86 /* ARMv6+ systems should be supported by the compiler. */
87 #if __ARM_ARCH <= 5
88
89 /* Clang doesn't allow us to reimplement builtins without this. */
90 #ifdef __clang__
91 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
92 #define __sync_synchronize __sync_synchronize_ext
93 #endif
94
95 void
96 __sync_synchronize(void)
97 {
98 }
99
100 #ifdef _KERNEL
101
102 #ifdef SMP
103 #error "On SMP systems we should have proper atomic operations."
104 #endif
105
106 /*
107  * On uniprocessor systems, we can perform the atomic operations by
108  * disabling interrupts.
109  */
110
111 #define EMIT_LOAD_N(N, uintN_t)                                         \
112 uintN_t                                                                 \
113 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
114 {                                                                       \
115         uintN_t ret;                                                    \
116                                                                         \
117         WITHOUT_INTERRUPTS({                                            \
118                 ret = *mem;                                             \
119         });                                                             \
120         return (ret);                                                   \
121 }
122
123 #define EMIT_STORE_N(N, uintN_t)                                        \
124 void                                                                    \
125 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
126 {                                                                       \
127                                                                         \
128         WITHOUT_INTERRUPTS({                                            \
129                 *mem = val;                                             \
130         });                                                             \
131 }
132
133 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                             \
134 _Bool                                                                   \
135 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,          \
136     uintN_t desired, int success __unused, int failure __unused)        \
137 {                                                                       \
138         _Bool ret;                                                      \
139                                                                         \
140         WITHOUT_INTERRUPTS({                                            \
141                 if (*mem == *expected) {                                \
142                         *mem = desired;                                 \
143                         ret = 1;                                        \
144                 } else {                                                \
145                         *expected = *mem;                               \
146                         ret = 0;                                        \
147                 }                                                       \
148         });                                                             \
149         return (ret);                                                   \
150 }
151
152 #define EMIT_FETCH_OP_N(N, uintN_t, name, op)                           \
153 uintN_t                                                                 \
154 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
155 {                                                                       \
156         uintN_t ret;                                                    \
157                                                                         \
158         WITHOUT_INTERRUPTS({                                            \
159                 ret = *mem;                                             \
160                 *mem op val;                                            \
161         });                                                             \
162         return (ret);                                                   \
163 }
164
165 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
166 EMIT_LOAD_N(N, uintN_t)                                                 \
167 EMIT_STORE_N(N, uintN_t)                                                \
168 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                                     \
169 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)                                \
170 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)                              \
171 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)                              \
172 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)                               \
173 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)                              \
174 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
175
176 EMIT_ALL_OPS_N(1, uint8_t)
177 EMIT_ALL_OPS_N(2, uint16_t)
178 EMIT_ALL_OPS_N(4, uint32_t)
179 EMIT_ALL_OPS_N(8, uint64_t)
180 #undef  EMIT_ALL_OPS_N
181
182 #else /* !_KERNEL */
183
184 /*
185  * For userspace on uniprocessor systems, we can implement the atomic
186  * operations by using a Restartable Atomic Sequence. This makes the
187  * kernel restart the code from the beginning when interrupted.
188  */
189
190 #define EMIT_LOAD_N(N, uintN_t)                                         \
191 uintN_t                                                                 \
192 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
193 {                                                                       \
194                                                                         \
195         return (*mem);                                                  \
196 }
197
198 #define EMIT_STORE_N(N, uintN_t)                                        \
199 void                                                                    \
200 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
201 {                                                                       \
202                                                                         \
203         *mem = val;                                                     \
204 }
205
206 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                           \
207 uintN_t                                                                 \
208 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)    \
209 {                                                                       \
210         uint32_t old, temp, ras_start;                                  \
211                                                                         \
212         ras_start = ARM_RAS_START;                                      \
213         __asm volatile (                                                \
214                 /* Set up Restartable Atomic Sequence. */               \
215                 "1:"                                                    \
216                 "\tadr   %2, 1b\n"                                      \
217                 "\tstr   %2, [%5]\n"                                    \
218                 "\tadr   %2, 2f\n"                                      \
219                 "\tstr   %2, [%5, #4]\n"                                \
220                                                                         \
221                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
222                 "\t"str" %3, %1\n"      /* Store new value. */          \
223                                                                         \
224                 /* Tear down Restartable Atomic Sequence. */            \
225                 "2:"                                                    \
226                 "\tmov   %2, #0x00000000\n"                             \
227                 "\tstr   %2, [%5]\n"                                    \
228                 "\tmov   %2, #0xffffffff\n"                             \
229                 "\tstr   %2, [%5, #4]\n"                                \
230                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
231                 : "r" (val), "m" (*mem), "r" (ras_start));              \
232         return (old);                                                   \
233 }
234
235 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                 \
236 _Bool                                                                   \
237 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,         \
238     uintN_t desired, int success __unused, int failure __unused)        \
239 {                                                                       \
240         uint32_t expected, old, temp, ras_start;                        \
241                                                                         \
242         expected = *pexpected;                                          \
243         ras_start = ARM_RAS_START;                                      \
244         __asm volatile (                                                \
245                 /* Set up Restartable Atomic Sequence. */               \
246                 "1:"                                                    \
247                 "\tadr   %2, 1b\n"                                      \
248                 "\tstr   %2, [%6]\n"                                    \
249                 "\tadr   %2, 2f\n"                                      \
250                 "\tstr   %2, [%6, #4]\n"                                \
251                                                                         \
252                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
253                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
254                 "\t"streq" %4, %1\n"    /* Store new value. */          \
255                                                                         \
256                 /* Tear down Restartable Atomic Sequence. */            \
257                 "2:"                                                    \
258                 "\tmov   %2, #0x00000000\n"                             \
259                 "\tstr   %2, [%6]\n"                                    \
260                 "\tmov   %2, #0xffffffff\n"                             \
261                 "\tstr   %2, [%6, #4]\n"                                \
262                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
263                 : "r" (expected), "r" (desired), "m" (*mem),            \
264                   "r" (ras_start));                                     \
265         if (old == expected) {                                          \
266                 return (1);                                             \
267         } else {                                                        \
268                 *pexpected = old;                                       \
269                 return (0);                                             \
270         }                                                               \
271 }
272
273 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret)            \
274 uintN_t                                                                 \
275 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
276 {                                                                       \
277         uint32_t old, new, ras_start;                                   \
278                                                                         \
279         ras_start = ARM_RAS_START;                                      \
280         __asm volatile (                                                \
281                 /* Set up Restartable Atomic Sequence. */               \
282                 "1:"                                                    \
283                 "\tadr   %2, 1b\n"                                      \
284                 "\tstr   %2, [%5]\n"                                    \
285                 "\tadr   %2, 2f\n"                                      \
286                 "\tstr   %2, [%5, #4]\n"                                \
287                                                                         \
288                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
289                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
290                 "\t"str" %2, %1\n"      /* Store new value. */          \
291                                                                         \
292                 /* Tear down Restartable Atomic Sequence. */            \
293                 "2:"                                                    \
294                 "\tmov   %2, #0x00000000\n"                             \
295                 "\tstr   %2, [%5]\n"                                    \
296                 "\tmov   %2, #0xffffffff\n"                             \
297                 "\tstr   %2, [%5, #4]\n"                                \
298                 : "=&r" (old), "=m" (*mem), "=&r" (new)                 \
299                 : "r" (val), "m" (*mem), "r" (ras_start));              \
300         return (ret);                                                   \
301 }
302
303 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
304 EMIT_LOAD_N(N, uintN_t)                                                 \
305 EMIT_STORE_N(N, uintN_t)                                                \
306 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                                   \
307 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                         \
308 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old)            \
309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old)            \
310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or,  "orr", old)            \
311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old)            \
312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old)            \
313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new)            \
314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new)            \
315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch,  "orr", new)            \
316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new)            \
317 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new)
318
319 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
320 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
321 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
322 #undef  EMIT_ALL_OPS_N
323
324 #endif /* _KERNEL */
325
326 #endif /* __ARM_ARCH */
327
328 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
329
330 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
331
332 #ifdef __clang__
333 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
334 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
335 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
336 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
337 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
338 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
339 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
340 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
341 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
342 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
343 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
344 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
345 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
346 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
347 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
348 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
349 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
350 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
351 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
352 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
353 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
354 #endif
355
356 /*
357  * Old __sync_* API.
358  */
359
360 #if __ARM_ARCH >= 6
361
362 /* Implementations for old GCC versions, lacking support for atomics. */
363
364 typedef union {
365         uint8_t         v8[4];
366         uint32_t        v32;
367 } reg_t;
368
369 /*
370  * Given a memory address pointing to an 8-bit or 16-bit integer, return
371  * the address of the 32-bit word containing it.
372  */
373
374 static inline uint32_t *
375 round_to_word(void *ptr)
376 {
377
378         return ((uint32_t *)((intptr_t)ptr & ~3));
379 }
380
381 /*
382  * Utility functions for loading and storing 8-bit and 16-bit integers
383  * in 32-bit words at an offset corresponding with the location of the
384  * atomic variable.
385  */
386
387 static inline void
388 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
389 {
390         size_t offset;
391
392         offset = (intptr_t)offset_ptr & 3;
393         r->v8[offset] = val;
394 }
395
396 static inline uint8_t
397 get_1(const reg_t *r, const uint8_t *offset_ptr)
398 {
399         size_t offset;
400
401         offset = (intptr_t)offset_ptr & 3;
402         return (r->v8[offset]);
403 }
404
405 static inline void
406 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
407 {
408         size_t offset;
409         union {
410                 uint16_t in;
411                 uint8_t out[2];
412         } bytes;
413
414         offset = (intptr_t)offset_ptr & 3;
415         bytes.in = val;
416         r->v8[offset] = bytes.out[0];
417         r->v8[offset + 1] = bytes.out[1];
418 }
419
420 static inline uint16_t
421 get_2(const reg_t *r, const uint16_t *offset_ptr)
422 {
423         size_t offset;
424         union {
425                 uint8_t in[2];
426                 uint16_t out;
427         } bytes;
428
429         offset = (intptr_t)offset_ptr & 3;
430         bytes.in[0] = r->v8[offset];
431         bytes.in[1] = r->v8[offset + 1];
432         return (bytes.out);
433 }
434
435 /*
436  * 8-bit and 16-bit routines.
437  *
438  * These operations are not natively supported by the CPU, so we use
439  * some shifting and bitmasking on top of the 32-bit instructions.
440  */
441
442 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)                            \
443 uintN_t                                                                 \
444 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
445 {                                                                       \
446         uint32_t *mem32;                                                \
447         reg_t val32, negmask, old;                                      \
448         uint32_t temp1, temp2;                                          \
449                                                                         \
450         mem32 = round_to_word(mem);                                     \
451         val32.v32 = 0x00000000;                                         \
452         put_##N(&val32, mem, val);                                      \
453         negmask.v32 = 0xffffffff;                                       \
454         put_##N(&negmask, mem, 0);                                      \
455                                                                         \
456         do_sync();                                                      \
457         __asm volatile (                                                \
458                 "1:"                                                    \
459                 "\tldrex %0, %6\n"      /* Load old value. */           \
460                 "\tand   %2, %5, %0\n"  /* Remove the old value. */     \
461                 "\torr   %2, %2, %4\n"  /* Put in the new value. */     \
462                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
463                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
464                 "\tbne   1b\n"          /* Spin if failed. */           \
465                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
466                   "=&r" (temp2)                                         \
467                 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));    \
468         return (get_##N(&old, mem));                                    \
469 }
470
471 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
472 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
473
474 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
475 uintN_t                                                                 \
476 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
477     uintN_t desired)                                                    \
478 {                                                                       \
479         uint32_t *mem32;                                                \
480         reg_t expected32, desired32, posmask, old;                      \
481         uint32_t negmask, temp1, temp2;                                 \
482                                                                         \
483         mem32 = round_to_word(mem);                                     \
484         expected32.v32 = 0x00000000;                                    \
485         put_##N(&expected32, mem, expected);                            \
486         desired32.v32 = 0x00000000;                                     \
487         put_##N(&desired32, mem, desired);                              \
488         posmask.v32 = 0x00000000;                                       \
489         put_##N(&posmask, mem, ~0);                                     \
490         negmask = ~posmask.v32;                                         \
491                                                                         \
492         do_sync();                                                      \
493         __asm volatile (                                                \
494                 "1:"                                                    \
495                 "\tldrex %0, %8\n"      /* Load old value. */           \
496                 "\tand   %2, %6, %0\n"  /* Isolate the old value. */    \
497                 "\tcmp   %2, %4\n"      /* Compare to expected value. */\
498                 "\tbne   2f\n"          /* Values are unequal. */       \
499                 "\tand   %2, %7, %0\n"  /* Remove the old value. */     \
500                 "\torr   %2, %5\n"      /* Put in the new value. */     \
501                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
502                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
503                 "\tbne   1b\n"          /* Spin if failed. */           \
504                 "2:"                                                    \
505                 : "=&r" (old), "=m" (*mem32), "=&r" (temp1),            \
506                   "=&r" (temp2)                                         \
507                 : "r" (expected32.v32), "r" (desired32.v32),            \
508                   "r" (posmask.v32), "r" (negmask), "m" (*mem32));      \
509         return (get_##N(&old, mem));                                    \
510 }
511
512 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
513 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
514
515 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)            \
516 uintN_t                                                                 \
517 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
518 {                                                                       \
519         uint32_t *mem32;                                                \
520         reg_t val32, posmask, old;                                      \
521         uint32_t negmask, temp1, temp2;                                 \
522                                                                         \
523         mem32 = round_to_word(mem);                                     \
524         val32.v32 = 0x00000000;                                         \
525         put_##N(&val32, mem, val);                                      \
526         posmask.v32 = 0x00000000;                                       \
527         put_##N(&posmask, mem, ~0);                                     \
528         negmask = ~posmask.v32;                                         \
529                                                                         \
530         do_sync();                                                      \
531         __asm volatile (                                                \
532                 "1:"                                                    \
533                 "\tldrex %0, %7\n"      /* Load old value. */           \
534                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
535                 "\tand   %2, %5\n"      /* Isolate the new value. */    \
536                 "\tand   %3, %6, %0\n"  /* Remove the old value. */     \
537                 "\torr   %2, %2, %3\n"  /* Put in the new value. */     \
538                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
539                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
540                 "\tbne   1b\n"          /* Spin if failed. */           \
541                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
542                   "=&r" (temp2)                                         \
543                 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask),    \
544                   "m" (*mem32));                                        \
545         return (get_##N(&old, mem));                                    \
546 }
547
548 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
549 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
550 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
551 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
552
553 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)  \
554 uintN_t                                                                 \
555 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
556 {                                                                       \
557         uint32_t *mem32;                                                \
558         reg_t val32, old;                                               \
559         uint32_t temp1, temp2;                                          \
560                                                                         \
561         mem32 = round_to_word(mem);                                     \
562         val32.v32 = idempotence ? 0xffffffff : 0x00000000;              \
563         put_##N(&val32, mem, val);                                      \
564                                                                         \
565         do_sync();                                                      \
566         __asm volatile (                                                \
567                 "1:"                                                    \
568                 "\tldrex %0, %5\n"      /* Load old value. */           \
569                 "\t"op"  %2, %4, %0\n"  /* Calculate new value. */      \
570                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
571                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
572                 "\tbne   1b\n"          /* Spin if failed. */           \
573                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
574                   "=&r" (temp2)                                         \
575                 : "r" (val32.v32), "m" (*mem32));                       \
576         return (get_##N(&old, mem));                                    \
577 }
578
579 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
581 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
582 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
584 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
585
586 /*
587  * 32-bit routines.
588  */
589
590 uint32_t
591 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
592 {
593         uint32_t old, temp;
594
595         do_sync();
596         __asm volatile (
597                 "1:"
598                 "\tldrex %0, %4\n"      /* Load old value. */
599                 "\tstrex %2, %3, %1\n"  /* Attempt to store. */
600                 "\tcmp   %2, #0\n"      /* Did it succeed? */
601                 "\tbne   1b\n"          /* Spin if failed. */
602                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
603                 : "r" (val), "m" (*mem));
604         return (old);
605 }
606
607 uint32_t
608 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
609     uint32_t desired)
610 {
611         uint32_t old, temp;
612
613         do_sync();
614         __asm volatile (
615                 "1:"
616                 "\tldrex %0, %5\n"      /* Load old value. */
617                 "\tcmp   %0, %3\n"      /* Compare to expected value. */
618                 "\tbne   2f\n"          /* Values are unequal. */
619                 "\tstrex %2, %4, %1\n"  /* Attempt to store. */
620                 "\tcmp   %2, #0\n"      /* Did it succeed? */
621                 "\tbne   1b\n"          /* Spin if failed. */
622                 "2:"
623                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
624                 : "r" (expected), "r" (desired), "m" (*mem));
625         return (old);
626 }
627
628 #define EMIT_FETCH_AND_OP_4(name, op)                                   \
629 uint32_t                                                                \
630 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)                              \
631 {                                                                       \
632         uint32_t old, temp1, temp2;                                     \
633                                                                         \
634         do_sync();                                                      \
635         __asm volatile (                                                \
636                 "1:"                                                    \
637                 "\tldrex %0, %5\n"      /* Load old value. */           \
638                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
639                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
640                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
641                 "\tbne   1b\n"          /* Spin if failed. */           \
642                 : "=&r" (old), "=m" (*mem), "=&r" (temp1),              \
643                   "=&r" (temp2)                                         \
644                 : "r" (val), "m" (*mem));                               \
645         return (old);                                                   \
646 }
647
648 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
649 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
650 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
651 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
652 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
653
654 #ifndef __clang__
655 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
656 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
657 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
658 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
659 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
660 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
661 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
662 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
663 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
664 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
665 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
666 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
667 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
668 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
669 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
670 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
671 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
672 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
673 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
674 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
675 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
676 #endif
677
678 #else /* __ARM_ARCH < 6 */
679
680 #ifdef _KERNEL
681
682 #ifdef SMP
683 #error "On SMP systems we should have proper atomic operations."
684 #endif
685
686 /*
687  * On uniprocessor systems, we can perform the atomic operations by
688  * disabling interrupts.
689  */
690
691 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
692 uintN_t                                                                 \
693 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,         \
694     uintN_t desired)                                                    \
695 {                                                                       \
696         uintN_t ret;                                                    \
697                                                                         \
698         WITHOUT_INTERRUPTS({                                            \
699                 ret = *mem;                                             \
700                 if (*mem == expected)                                   \
701                         *mem = desired;                                 \
702         });                                                             \
703         return (ret);                                                   \
704 }
705
706 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)                       \
707 uintN_t                                                                 \
708 __sync_##name##_##N(uintN_t *mem, uintN_t val)                          \
709 {                                                                       \
710         uintN_t ret;                                                    \
711                                                                         \
712         WITHOUT_INTERRUPTS({                                            \
713                 ret = *mem;                                             \
714                 *mem op val;                                            \
715         });                                                             \
716         return (ret);                                                   \
717 }
718
719 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
720 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                                 \
721 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)                   \
722 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)                      \
723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)                      \
724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)                       \
725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)                      \
726 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
727
728 EMIT_ALL_OPS_N(1, uint8_t)
729 EMIT_ALL_OPS_N(2, uint16_t)
730 EMIT_ALL_OPS_N(4, uint32_t)
731 EMIT_ALL_OPS_N(8, uint64_t)
732 #undef  EMIT_ALL_OPS_N
733
734 #else /* !_KERNEL */
735
736 /*
737  * For userspace on uniprocessor systems, we can implement the atomic
738  * operations by using a Restartable Atomic Sequence. This makes the
739  * kernel restart the code from the beginning when interrupted.
740  */
741
742 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                  \
743 uintN_t                                                                 \
744 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
745 {                                                                       \
746         uint32_t old, temp, ras_start;                                  \
747                                                                         \
748         ras_start = ARM_RAS_START;                                      \
749         __asm volatile (                                                \
750                 /* Set up Restartable Atomic Sequence. */               \
751                 "1:"                                                    \
752                 "\tadr   %2, 1b\n"                                      \
753                 "\tstr   %2, [%5]\n"                                    \
754                 "\tadr   %2, 2f\n"                                      \
755                 "\tstr   %2, [%5, #4]\n"                                \
756                                                                         \
757                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
758                 "\t"str" %3, %1\n"      /* Store new value. */          \
759                                                                         \
760                 /* Tear down Restartable Atomic Sequence. */            \
761                 "2:"                                                    \
762                 "\tmov   %2, #0x00000000\n"                             \
763                 "\tstr   %2, [%5]\n"                                    \
764                 "\tmov   %2, #0xffffffff\n"                             \
765                 "\tstr   %2, [%5, #4]\n"                                \
766                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
767                 : "r" (val), "m" (*mem), "r" (ras_start));              \
768         return (old);                                                   \
769 }
770
771 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)             \
772 uintN_t                                                                 \
773 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
774     uintN_t desired)                                                    \
775 {                                                                       \
776         uint32_t old, temp, ras_start;                                  \
777                                                                         \
778         ras_start = ARM_RAS_START;                                      \
779         __asm volatile (                                                \
780                 /* Set up Restartable Atomic Sequence. */               \
781                 "1:"                                                    \
782                 "\tadr   %2, 1b\n"                                      \
783                 "\tstr   %2, [%6]\n"                                    \
784                 "\tadr   %2, 2f\n"                                      \
785                 "\tstr   %2, [%6, #4]\n"                                \
786                                                                         \
787                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
788                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
789                 "\t"streq" %4, %1\n"    /* Store new value. */          \
790                                                                         \
791                 /* Tear down Restartable Atomic Sequence. */            \
792                 "2:"                                                    \
793                 "\tmov   %2, #0x00000000\n"                             \
794                 "\tstr   %2, [%6]\n"                                    \
795                 "\tmov   %2, #0xffffffff\n"                             \
796                 "\tstr   %2, [%6, #4]\n"                                \
797                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
798                 : "r" (expected), "r" (desired), "m" (*mem),            \
799                   "r" (ras_start));                                     \
800         return (old);                                                   \
801 }
802
803 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)             \
804 uintN_t                                                                 \
805 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
806 {                                                                       \
807         uint32_t old, temp, ras_start;                                  \
808                                                                         \
809         ras_start = ARM_RAS_START;                                      \
810         __asm volatile (                                                \
811                 /* Set up Restartable Atomic Sequence. */               \
812                 "1:"                                                    \
813                 "\tadr   %2, 1b\n"                                      \
814                 "\tstr   %2, [%5]\n"                                    \
815                 "\tadr   %2, 2f\n"                                      \
816                 "\tstr   %2, [%5, #4]\n"                                \
817                                                                         \
818                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
819                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
820                 "\t"str" %2, %1\n"      /* Store new value. */          \
821                                                                         \
822                 /* Tear down Restartable Atomic Sequence. */            \
823                 "2:"                                                    \
824                 "\tmov   %2, #0x00000000\n"                             \
825                 "\tstr   %2, [%5]\n"                                    \
826                 "\tmov   %2, #0xffffffff\n"                             \
827                 "\tstr   %2, [%5, #4]\n"                                \
828                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
829                 : "r" (val), "m" (*mem), "r" (ras_start));              \
830         return (old);                                                   \
831 }
832
833 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
834 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                          \
835 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)                     \
836 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")         \
837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")         \
838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")          \
839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")         \
840 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
841
842 #ifdef __clang__
843 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
844 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
845 #else
846 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
847 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
848 #endif
849 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
850
851 #ifndef __clang__
852 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
853 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
854 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
855 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
856 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
857 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
858 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
859 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
860 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
861 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
862 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
863 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
864 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
865 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
866 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
867 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
868 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
869 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
870 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
871 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
872 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
873 #endif /* __ARM_ARCH */
874
875 #endif /* _KERNEL */
876
877 #endif
878
879 #endif /* __SYNC_ATOMICS */