]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/arm/arm/stdatomic.c
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / arm / arm / stdatomic.c
1 /*-
2  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <sys/param.h>
31 #include <sys/stdatomic.h>
32 #include <sys/types.h>
33
34 #include <machine/cpufunc.h>
35 #include <machine/sysarch.h>
36
37 #ifdef _KERNEL
38 #include "opt_global.h"
39 #endif
40
41 /*
42  * Executing statements with interrupts disabled.
43  */
44
45 #if defined(_KERNEL) && !defined(SMP)
46 #define WITHOUT_INTERRUPTS(s) do {                                      \
47         register_t regs;                                                \
48                                                                         \
49         regs = intr_disable();                                          \
50         do s while (0);                                                 \
51         intr_restore(regs);                                             \
52 } while (0)
53 #endif /* _KERNEL && !SMP */
54
55 /*
56  * Memory barriers.
57  *
58  * It turns out __sync_synchronize() does not emit any code when used
59  * with GCC 4.2. Implement our own version that does work reliably.
60  *
61  * Although __sync_lock_test_and_set() should only perform an acquire
62  * barrier, make it do a full barrier like the other functions. This
63  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
64  */
65
66 #if defined(_KERNEL) && !defined(SMP)
67 static inline void
68 do_sync(void)
69 {
70
71         __asm volatile ("" : : : "memory");
72 }
73 #elif defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
74 static inline void
75 do_sync(void)
76 {
77
78         __asm volatile ("dmb" : : : "memory");
79 }
80 #elif defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
81     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
82     defined(__ARM_ARCH_6ZK__)
83 static inline void
84 do_sync(void)
85 {
86
87         __asm volatile ("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory");
88 }
89 #endif
90
91 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
92
93 /*
94  * New C11 __atomic_* API.
95  */
96
97 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
98     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
99     defined(__ARM_ARCH_6ZK__) || \
100     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
101
102 /* These systems should be supported by the compiler. */
103
104 #else /* __ARM_ARCH_5__ */
105
106 /* Clang doesn't allow us to reimplement builtins without this. */
107 #ifdef __clang__
108 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
109 #define __sync_synchronize __sync_synchronize_ext
110 #endif
111
112 void
113 __sync_synchronize(void)
114 {
115 }
116
117 #ifdef _KERNEL
118
119 #ifdef SMP
120 #error "On SMP systems we should have proper atomic operations."
121 #endif
122
123 /*
124  * On uniprocessor systems, we can perform the atomic operations by
125  * disabling interrupts.
126  */
127
128 #define EMIT_LOAD_N(N, uintN_t)                                         \
129 uintN_t                                                                 \
130 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
131 {                                                                       \
132         uintN_t ret;                                                    \
133                                                                         \
134         WITHOUT_INTERRUPTS({                                            \
135                 ret = *mem;                                             \
136         });                                                             \
137         return (ret);                                                   \
138 }
139
140 #define EMIT_STORE_N(N, uintN_t)                                        \
141 void                                                                    \
142 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
143 {                                                                       \
144                                                                         \
145         WITHOUT_INTERRUPTS({                                            \
146                 *mem = val;                                             \
147         });                                                             \
148 }
149
150 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                             \
151 _Bool                                                                   \
152 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,          \
153     uintN_t desired, int success __unused, int failure __unused)        \
154 {                                                                       \
155         _Bool ret;                                                      \
156                                                                         \
157         WITHOUT_INTERRUPTS({                                            \
158                 if (*mem == *expected) {                                \
159                         *mem = desired;                                 \
160                         ret = 1;                                        \
161                 } else {                                                \
162                         *expected = *mem;                               \
163                         ret = 0;                                        \
164                 }                                                       \
165         });                                                             \
166         return (ret);                                                   \
167 }
168
169 #define EMIT_FETCH_OP_N(N, uintN_t, name, op)                           \
170 uintN_t                                                                 \
171 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
172 {                                                                       \
173         uintN_t ret;                                                    \
174                                                                         \
175         WITHOUT_INTERRUPTS({                                            \
176                 ret = *mem;                                             \
177                 *mem op val;                                            \
178         });                                                             \
179         return (ret);                                                   \
180 }
181
182 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
183 EMIT_LOAD_N(N, uintN_t)                                                 \
184 EMIT_STORE_N(N, uintN_t)                                                \
185 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                                     \
186 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)                                \
187 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)                              \
188 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)                              \
189 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)                               \
190 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)                              \
191 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
192
193 EMIT_ALL_OPS_N(1, uint8_t)
194 EMIT_ALL_OPS_N(2, uint16_t)
195 EMIT_ALL_OPS_N(4, uint32_t)
196 EMIT_ALL_OPS_N(8, uint64_t)
197 #undef  EMIT_ALL_OPS_N
198
199 #else /* !_KERNEL */
200
201 /*
202  * For userspace on uniprocessor systems, we can implement the atomic
203  * operations by using a Restartable Atomic Sequence. This makes the
204  * kernel restart the code from the beginning when interrupted.
205  */
206
207 #define EMIT_LOAD_N(N, uintN_t)                                         \
208 uintN_t                                                                 \
209 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
210 {                                                                       \
211                                                                         \
212         return (*mem);                                                  \
213 }
214
215 #define EMIT_STORE_N(N, uintN_t)                                        \
216 void                                                                    \
217 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
218 {                                                                       \
219                                                                         \
220         *mem = val;                                                     \
221 }
222
223 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                           \
224 uintN_t                                                                 \
225 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)    \
226 {                                                                       \
227         uint32_t old, temp, ras_start;                                  \
228                                                                         \
229         ras_start = ARM_RAS_START;                                      \
230         __asm volatile (                                                \
231                 /* Set up Restartable Atomic Sequence. */               \
232                 "1:"                                                    \
233                 "\tadr   %2, 1b\n"                                      \
234                 "\tstr   %2, [%5]\n"                                    \
235                 "\tadr   %2, 2f\n"                                      \
236                 "\tstr   %2, [%5, #4]\n"                                \
237                                                                         \
238                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
239                 "\t"str" %3, %1\n"      /* Store new value. */          \
240                                                                         \
241                 /* Tear down Restartable Atomic Sequence. */            \
242                 "2:"                                                    \
243                 "\tmov   %2, #0x00000000\n"                             \
244                 "\tstr   %2, [%5]\n"                                    \
245                 "\tmov   %2, #0xffffffff\n"                             \
246                 "\tstr   %2, [%5, #4]\n"                                \
247                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
248                 : "r" (val), "m" (*mem), "r" (ras_start));              \
249         return (old);                                                   \
250 }
251
252 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                 \
253 _Bool                                                                   \
254 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,         \
255     uintN_t desired, int success __unused, int failure __unused)        \
256 {                                                                       \
257         uint32_t expected, old, temp, ras_start;                        \
258                                                                         \
259         expected = *pexpected;                                          \
260         ras_start = ARM_RAS_START;                                      \
261         __asm volatile (                                                \
262                 /* Set up Restartable Atomic Sequence. */               \
263                 "1:"                                                    \
264                 "\tadr   %2, 1b\n"                                      \
265                 "\tstr   %2, [%6]\n"                                    \
266                 "\tadr   %2, 2f\n"                                      \
267                 "\tstr   %2, [%6, #4]\n"                                \
268                                                                         \
269                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
270                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
271                 "\t"streq" %4, %1\n"    /* Store new value. */          \
272                                                                         \
273                 /* Tear down Restartable Atomic Sequence. */            \
274                 "2:"                                                    \
275                 "\tmov   %2, #0x00000000\n"                             \
276                 "\tstr   %2, [%6]\n"                                    \
277                 "\tmov   %2, #0xffffffff\n"                             \
278                 "\tstr   %2, [%6, #4]\n"                                \
279                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
280                 : "r" (expected), "r" (desired), "m" (*mem),            \
281                   "r" (ras_start));                                     \
282         if (old == expected) {                                          \
283                 return (1);                                             \
284         } else {                                                        \
285                 *pexpected = old;                                       \
286                 return (0);                                             \
287         }                                                               \
288 }
289
290 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op)                 \
291 uintN_t                                                                 \
292 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
293 {                                                                       \
294         uint32_t old, temp, ras_start;                                  \
295                                                                         \
296         ras_start = ARM_RAS_START;                                      \
297         __asm volatile (                                                \
298                 /* Set up Restartable Atomic Sequence. */               \
299                 "1:"                                                    \
300                 "\tadr   %2, 1b\n"                                      \
301                 "\tstr   %2, [%5]\n"                                    \
302                 "\tadr   %2, 2f\n"                                      \
303                 "\tstr   %2, [%5, #4]\n"                                \
304                                                                         \
305                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
306                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
307                 "\t"str" %2, %1\n"      /* Store new value. */          \
308                                                                         \
309                 /* Tear down Restartable Atomic Sequence. */            \
310                 "2:"                                                    \
311                 "\tmov   %2, #0x00000000\n"                             \
312                 "\tstr   %2, [%5]\n"                                    \
313                 "\tmov   %2, #0xffffffff\n"                             \
314                 "\tstr   %2, [%5, #4]\n"                                \
315                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
316                 : "r" (val), "m" (*mem), "r" (ras_start));              \
317         return (old);                                                   \
318 }
319
320 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
321 EMIT_LOAD_N(N, uintN_t)                                                 \
322 EMIT_STORE_N(N, uintN_t)                                                \
323 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                                   \
324 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                         \
325 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add")                 \
326 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and")                 \
327 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or, "orr")                  \
328 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub")                 \
329 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor")
330
331 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
332 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
333 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
334 #undef  EMIT_ALL_OPS_N
335
336 #endif /* _KERNEL */
337
338 #endif
339
340 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
341
342 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
343
344 #ifdef __clang__
345 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
346 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
347 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
348 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
349 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
350 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
351 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
352 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
353 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
354 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
355 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
356 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
357 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
358 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
359 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
360 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
361 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
362 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
363 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
364 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
365 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
366 #endif
367
368 /*
369  * Old __sync_* API.
370  */
371
372 #if defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || \
373     defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || \
374     defined(__ARM_ARCH_6ZK__) || \
375     defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__)
376
377 /* Implementations for old GCC versions, lacking support for atomics. */
378
379 typedef union {
380         uint8_t         v8[4];
381         uint32_t        v32;
382 } reg_t;
383
384 /*
385  * Given a memory address pointing to an 8-bit or 16-bit integer, return
386  * the address of the 32-bit word containing it.
387  */
388
389 static inline uint32_t *
390 round_to_word(void *ptr)
391 {
392
393         return ((uint32_t *)((intptr_t)ptr & ~3));
394 }
395
396 /*
397  * Utility functions for loading and storing 8-bit and 16-bit integers
398  * in 32-bit words at an offset corresponding with the location of the
399  * atomic variable.
400  */
401
402 static inline void
403 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
404 {
405         size_t offset;
406
407         offset = (intptr_t)offset_ptr & 3;
408         r->v8[offset] = val;
409 }
410
411 static inline uint8_t
412 get_1(const reg_t *r, const uint8_t *offset_ptr)
413 {
414         size_t offset;
415
416         offset = (intptr_t)offset_ptr & 3;
417         return (r->v8[offset]);
418 }
419
420 static inline void
421 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
422 {
423         size_t offset;
424         union {
425                 uint16_t in;
426                 uint8_t out[2];
427         } bytes;
428
429         offset = (intptr_t)offset_ptr & 3;
430         bytes.in = val;
431         r->v8[offset] = bytes.out[0];
432         r->v8[offset + 1] = bytes.out[1];
433 }
434
435 static inline uint16_t
436 get_2(const reg_t *r, const uint16_t *offset_ptr)
437 {
438         size_t offset;
439         union {
440                 uint8_t in[2];
441                 uint16_t out;
442         } bytes;
443
444         offset = (intptr_t)offset_ptr & 3;
445         bytes.in[0] = r->v8[offset];
446         bytes.in[1] = r->v8[offset + 1];
447         return (bytes.out);
448 }
449
450 /*
451  * 8-bit and 16-bit routines.
452  *
453  * These operations are not natively supported by the CPU, so we use
454  * some shifting and bitmasking on top of the 32-bit instructions.
455  */
456
457 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)                            \
458 uintN_t                                                                 \
459 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
460 {                                                                       \
461         uint32_t *mem32;                                                \
462         reg_t val32, negmask, old;                                      \
463         uint32_t temp1, temp2;                                          \
464                                                                         \
465         mem32 = round_to_word(mem);                                     \
466         val32.v32 = 0x00000000;                                         \
467         put_##N(&val32, mem, val);                                      \
468         negmask.v32 = 0xffffffff;                                       \
469         put_##N(&negmask, mem, 0);                                      \
470                                                                         \
471         do_sync();                                                      \
472         __asm volatile (                                                \
473                 "1:"                                                    \
474                 "\tldrex %0, %6\n"      /* Load old value. */           \
475                 "\tand   %2, %5, %0\n"  /* Remove the old value. */     \
476                 "\torr   %2, %2, %4\n"  /* Put in the new value. */     \
477                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
478                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
479                 "\tbne   1b\n"          /* Spin if failed. */           \
480                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
481                   "=&r" (temp2)                                         \
482                 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));    \
483         return (get_##N(&old, mem));                                    \
484 }
485
486 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
487 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
488
489 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
490 uintN_t                                                                 \
491 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
492     uintN_t desired)                                                    \
493 {                                                                       \
494         uint32_t *mem32;                                                \
495         reg_t expected32, desired32, posmask, old;                      \
496         uint32_t negmask, temp1, temp2;                                 \
497                                                                         \
498         mem32 = round_to_word(mem);                                     \
499         expected32.v32 = 0x00000000;                                    \
500         put_##N(&expected32, mem, expected);                            \
501         desired32.v32 = 0x00000000;                                     \
502         put_##N(&desired32, mem, desired);                              \
503         posmask.v32 = 0x00000000;                                       \
504         put_##N(&posmask, mem, ~0);                                     \
505         negmask = ~posmask.v32;                                         \
506                                                                         \
507         do_sync();                                                      \
508         __asm volatile (                                                \
509                 "1:"                                                    \
510                 "\tldrex %0, %8\n"      /* Load old value. */           \
511                 "\tand   %2, %6, %0\n"  /* Isolate the old value. */    \
512                 "\tcmp   %2, %4\n"      /* Compare to expected value. */\
513                 "\tbne   2f\n"          /* Values are unequal. */       \
514                 "\tand   %2, %7, %0\n"  /* Remove the old value. */     \
515                 "\torr   %2, %5\n"      /* Put in the new value. */     \
516                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
517                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
518                 "\tbne   1b\n"          /* Spin if failed. */           \
519                 "2:"                                                    \
520                 : "=&r" (old), "=m" (*mem32), "=&r" (temp1),            \
521                   "=&r" (temp2)                                         \
522                 : "r" (expected32.v32), "r" (desired32.v32),            \
523                   "r" (posmask.v32), "r" (negmask), "m" (*mem32));      \
524         return (get_##N(&old, mem));                                    \
525 }
526
527 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
528 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
529
530 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)            \
531 uintN_t                                                                 \
532 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
533 {                                                                       \
534         uint32_t *mem32;                                                \
535         reg_t val32, posmask, old;                                      \
536         uint32_t negmask, temp1, temp2;                                 \
537                                                                         \
538         mem32 = round_to_word(mem);                                     \
539         val32.v32 = 0x00000000;                                         \
540         put_##N(&val32, mem, val);                                      \
541         posmask.v32 = 0x00000000;                                       \
542         put_##N(&posmask, mem, ~0);                                     \
543         negmask = ~posmask.v32;                                         \
544                                                                         \
545         do_sync();                                                      \
546         __asm volatile (                                                \
547                 "1:"                                                    \
548                 "\tldrex %0, %7\n"      /* Load old value. */           \
549                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
550                 "\tand   %2, %5\n"      /* Isolate the new value. */    \
551                 "\tand   %3, %6, %0\n"  /* Remove the old value. */     \
552                 "\torr   %2, %2, %3\n"  /* Put in the new value. */     \
553                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
554                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
555                 "\tbne   1b\n"          /* Spin if failed. */           \
556                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
557                   "=&r" (temp2)                                         \
558                 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask),    \
559                   "m" (*mem32));                                        \
560         return (get_##N(&old, mem));                                    \
561 }
562
563 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
564 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
565 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
566 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
567
568 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)  \
569 uintN_t                                                                 \
570 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
571 {                                                                       \
572         uint32_t *mem32;                                                \
573         reg_t val32, old;                                               \
574         uint32_t temp1, temp2;                                          \
575                                                                         \
576         mem32 = round_to_word(mem);                                     \
577         val32.v32 = idempotence ? 0xffffffff : 0x00000000;              \
578         put_##N(&val32, mem, val);                                      \
579                                                                         \
580         do_sync();                                                      \
581         __asm volatile (                                                \
582                 "1:"                                                    \
583                 "\tldrex %0, %5\n"      /* Load old value. */           \
584                 "\t"op"  %2, %4, %0\n"  /* Calculate new value. */      \
585                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
586                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
587                 "\tbne   1b\n"          /* Spin if failed. */           \
588                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
589                   "=&r" (temp2)                                         \
590                 : "r" (val32.v32), "m" (*mem32));                       \
591         return (get_##N(&old, mem));                                    \
592 }
593
594 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
595 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
596 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
597 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
598 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
599 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
600
601 /*
602  * 32-bit routines.
603  */
604
605 uint32_t
606 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
607 {
608         uint32_t old, temp;
609
610         do_sync();
611         __asm volatile (
612                 "1:"
613                 "\tldrex %0, %4\n"      /* Load old value. */
614                 "\tstrex %2, %3, %1\n"  /* Attempt to store. */
615                 "\tcmp   %2, #0\n"      /* Did it succeed? */
616                 "\tbne   1b\n"          /* Spin if failed. */
617                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
618                 : "r" (val), "m" (*mem));
619         return (old);
620 }
621
622 uint32_t
623 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
624     uint32_t desired)
625 {
626         uint32_t old, temp;
627
628         do_sync();
629         __asm volatile (
630                 "1:"
631                 "\tldrex %0, %5\n"      /* Load old value. */
632                 "\tcmp   %0, %3\n"      /* Compare to expected value. */
633                 "\tbne   2f\n"          /* Values are unequal. */
634                 "\tstrex %2, %4, %1\n"  /* Attempt to store. */
635                 "\tcmp   %2, #0\n"      /* Did it succeed? */
636                 "\tbne   1b\n"          /* Spin if failed. */
637                 "2:"
638                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
639                 : "r" (expected), "r" (desired), "m" (*mem));
640         return (old);
641 }
642
643 #define EMIT_FETCH_AND_OP_4(name, op)                                   \
644 uint32_t                                                                \
645 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)                              \
646 {                                                                       \
647         uint32_t old, temp1, temp2;                                     \
648                                                                         \
649         do_sync();                                                      \
650         __asm volatile (                                                \
651                 "1:"                                                    \
652                 "\tldrex %0, %5\n"      /* Load old value. */           \
653                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
654                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
655                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
656                 "\tbne   1b\n"          /* Spin if failed. */           \
657                 : "=&r" (old), "=m" (*mem), "=&r" (temp1),              \
658                   "=&r" (temp2)                                         \
659                 : "r" (val), "m" (*mem));                               \
660         return (old);                                                   \
661 }
662
663 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
664 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
665 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
666 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
667 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
668
669 #ifndef __clang__
670 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
671 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
672 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
673 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
674 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
675 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
676 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
677 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
678 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
679 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
680 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
681 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
682 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
683 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
684 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
685 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
686 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
687 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
688 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
689 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
690 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
691 #endif
692
693 #else /* __ARM_ARCH_5__ */
694
695 #ifdef _KERNEL
696
697 #ifdef SMP
698 #error "On SMP systems we should have proper atomic operations."
699 #endif
700
701 /*
702  * On uniprocessor systems, we can perform the atomic operations by
703  * disabling interrupts.
704  */
705
706 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
707 uintN_t                                                                 \
708 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,         \
709     uintN_t desired)                                                    \
710 {                                                                       \
711         uintN_t ret;                                                    \
712                                                                         \
713         WITHOUT_INTERRUPTS({                                            \
714                 ret = *mem;                                             \
715                 if (*mem == expected)                                   \
716                         *mem = desired;                                 \
717         });                                                             \
718         return (ret);                                                   \
719 }
720
721 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)                       \
722 uintN_t                                                                 \
723 __sync_##name##_##N(uintN_t *mem, uintN_t val)                          \
724 {                                                                       \
725         uintN_t ret;                                                    \
726                                                                         \
727         WITHOUT_INTERRUPTS({                                            \
728                 ret = *mem;                                             \
729                 *mem op val;                                            \
730         });                                                             \
731         return (ret);                                                   \
732 }
733
734 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
735 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                                 \
736 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)                   \
737 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)                      \
738 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)                      \
739 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)                       \
740 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)                      \
741 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
742
743 EMIT_ALL_OPS_N(1, uint8_t)
744 EMIT_ALL_OPS_N(2, uint16_t)
745 EMIT_ALL_OPS_N(4, uint32_t)
746 EMIT_ALL_OPS_N(8, uint64_t)
747 #undef  EMIT_ALL_OPS_N
748
749 #else /* !_KERNEL */
750
751 /*
752  * For userspace on uniprocessor systems, we can implement the atomic
753  * operations by using a Restartable Atomic Sequence. This makes the
754  * kernel restart the code from the beginning when interrupted.
755  */
756
757 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                  \
758 uintN_t                                                                 \
759 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
760 {                                                                       \
761         uint32_t old, temp, ras_start;                                  \
762                                                                         \
763         ras_start = ARM_RAS_START;                                      \
764         __asm volatile (                                                \
765                 /* Set up Restartable Atomic Sequence. */               \
766                 "1:"                                                    \
767                 "\tadr   %2, 1b\n"                                      \
768                 "\tstr   %2, [%5]\n"                                    \
769                 "\tadr   %2, 2f\n"                                      \
770                 "\tstr   %2, [%5, #4]\n"                                \
771                                                                         \
772                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
773                 "\t"str" %3, %1\n"      /* Store new value. */          \
774                                                                         \
775                 /* Tear down Restartable Atomic Sequence. */            \
776                 "2:"                                                    \
777                 "\tmov   %2, #0x00000000\n"                             \
778                 "\tstr   %2, [%5]\n"                                    \
779                 "\tmov   %2, #0xffffffff\n"                             \
780                 "\tstr   %2, [%5, #4]\n"                                \
781                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
782                 : "r" (val), "m" (*mem), "r" (ras_start));              \
783         return (old);                                                   \
784 }
785
786 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)             \
787 uintN_t                                                                 \
788 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
789     uintN_t desired)                                                    \
790 {                                                                       \
791         uint32_t old, temp, ras_start;                                  \
792                                                                         \
793         ras_start = ARM_RAS_START;                                      \
794         __asm volatile (                                                \
795                 /* Set up Restartable Atomic Sequence. */               \
796                 "1:"                                                    \
797                 "\tadr   %2, 1b\n"                                      \
798                 "\tstr   %2, [%6]\n"                                    \
799                 "\tadr   %2, 2f\n"                                      \
800                 "\tstr   %2, [%6, #4]\n"                                \
801                                                                         \
802                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
803                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
804                 "\t"streq" %4, %1\n"    /* Store new value. */          \
805                                                                         \
806                 /* Tear down Restartable Atomic Sequence. */            \
807                 "2:"                                                    \
808                 "\tmov   %2, #0x00000000\n"                             \
809                 "\tstr   %2, [%6]\n"                                    \
810                 "\tmov   %2, #0xffffffff\n"                             \
811                 "\tstr   %2, [%6, #4]\n"                                \
812                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
813                 : "r" (expected), "r" (desired), "m" (*mem),            \
814                   "r" (ras_start));                                     \
815         return (old);                                                   \
816 }
817
818 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)             \
819 uintN_t                                                                 \
820 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
821 {                                                                       \
822         uint32_t old, temp, ras_start;                                  \
823                                                                         \
824         ras_start = ARM_RAS_START;                                      \
825         __asm volatile (                                                \
826                 /* Set up Restartable Atomic Sequence. */               \
827                 "1:"                                                    \
828                 "\tadr   %2, 1b\n"                                      \
829                 "\tstr   %2, [%5]\n"                                    \
830                 "\tadr   %2, 2f\n"                                      \
831                 "\tstr   %2, [%5, #4]\n"                                \
832                                                                         \
833                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
834                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
835                 "\t"str" %2, %1\n"      /* Store new value. */          \
836                                                                         \
837                 /* Tear down Restartable Atomic Sequence. */            \
838                 "2:"                                                    \
839                 "\tmov   %2, #0x00000000\n"                             \
840                 "\tstr   %2, [%5]\n"                                    \
841                 "\tmov   %2, #0xffffffff\n"                             \
842                 "\tstr   %2, [%5, #4]\n"                                \
843                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
844                 : "r" (val), "m" (*mem), "r" (ras_start));              \
845         return (old);                                                   \
846 }
847
848 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
849 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                          \
850 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)                     \
851 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")         \
852 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")         \
853 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")          \
854 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")         \
855 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
856
857 #ifdef __clang__
858 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
859 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
860 #else
861 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
862 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
863 #endif
864 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
865
866 #ifndef __clang__
867 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
868 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
869 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
870 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
871 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
872 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
873 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
874 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
875 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
876 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
877 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
878 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
879 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
880 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
881 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
882 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
883 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
884 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
885 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
886 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
887 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
888 #endif
889
890 #endif /* _KERNEL */
891
892 #endif
893
894 #endif /* __SYNC_ATOMICS */