]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/arm/stdatomic.c
Update bxe(4) to use new zlib.
[FreeBSD/FreeBSD.git] / sys / arm / arm / stdatomic.c
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2013 Ed Schouten <ed@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31
32 #include <sys/param.h>
33 #include <sys/stdatomic.h>
34 #include <sys/types.h>
35
36 #include <machine/atomic.h>
37 #include <machine/cpufunc.h>
38 #include <machine/sysarch.h>
39
40 /*
41  * Executing statements with interrupts disabled.
42  */
43
44 #if defined(_KERNEL) && !defined(SMP)
45 #define WITHOUT_INTERRUPTS(s) do {                                      \
46         register_t regs;                                                \
47                                                                         \
48         regs = intr_disable();                                          \
49         do s while (0);                                                 \
50         intr_restore(regs);                                             \
51 } while (0)
52 #endif /* _KERNEL && !SMP */
53
54 /*
55  * Memory barriers.
56  *
57  * It turns out __sync_synchronize() does not emit any code when used
58  * with GCC 4.2. Implement our own version that does work reliably.
59  *
60  * Although __sync_lock_test_and_set() should only perform an acquire
61  * barrier, make it do a full barrier like the other functions. This
62  * should make <stdatomic.h>'s atomic_exchange_explicit() work reliably.
63  */
64
65 #if defined(_KERNEL) && !defined(SMP)
66 static inline void
67 do_sync(void)
68 {
69
70         __asm volatile ("" : : : "memory");
71 }
72 #elif __ARM_ARCH >= 6
73 static inline void
74 do_sync(void)
75 {
76
77         dmb();
78 }
79 #endif
80
81 #if defined(__CLANG_ATOMICS) || defined(__GNUC_ATOMICS)
82
83 /*
84  * New C11 __atomic_* API.
85  */
86
87 /* ARMv6+ systems should be supported by the compiler. */
88 #if __ARM_ARCH <= 5
89
90 /* Clang doesn't allow us to reimplement builtins without this. */
91 #ifdef __clang__
92 #pragma redefine_extname __sync_synchronize_ext __sync_synchronize
93 #define __sync_synchronize __sync_synchronize_ext
94 #endif
95
96 void
97 __sync_synchronize(void)
98 {
99 }
100
101 #ifdef _KERNEL
102
103 #ifdef SMP
104 #error "On SMP systems we should have proper atomic operations."
105 #endif
106
107 /*
108  * On uniprocessor systems, we can perform the atomic operations by
109  * disabling interrupts.
110  */
111
112 #define EMIT_LOAD_N(N, uintN_t)                                         \
113 uintN_t                                                                 \
114 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
115 {                                                                       \
116         uintN_t ret;                                                    \
117                                                                         \
118         WITHOUT_INTERRUPTS({                                            \
119                 ret = *mem;                                             \
120         });                                                             \
121         return (ret);                                                   \
122 }
123
124 #define EMIT_STORE_N(N, uintN_t)                                        \
125 void                                                                    \
126 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
127 {                                                                       \
128                                                                         \
129         WITHOUT_INTERRUPTS({                                            \
130                 *mem = val;                                             \
131         });                                                             \
132 }
133
134 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                             \
135 _Bool                                                                   \
136 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *expected,          \
137     uintN_t desired, int success __unused, int failure __unused)        \
138 {                                                                       \
139         _Bool ret;                                                      \
140                                                                         \
141         WITHOUT_INTERRUPTS({                                            \
142                 if (*mem == *expected) {                                \
143                         *mem = desired;                                 \
144                         ret = 1;                                        \
145                 } else {                                                \
146                         *expected = *mem;                               \
147                         ret = 0;                                        \
148                 }                                                       \
149         });                                                             \
150         return (ret);                                                   \
151 }
152
153 #define EMIT_FETCH_OP_N(N, uintN_t, name, op)                           \
154 uintN_t                                                                 \
155 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
156 {                                                                       \
157         uintN_t ret;                                                    \
158                                                                         \
159         WITHOUT_INTERRUPTS({                                            \
160                 ret = *mem;                                             \
161                 *mem op val;                                            \
162         });                                                             \
163         return (ret);                                                   \
164 }
165
166 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
167 EMIT_LOAD_N(N, uintN_t)                                                 \
168 EMIT_STORE_N(N, uintN_t)                                                \
169 EMIT_COMPARE_EXCHANGE_N(N, uintN_t)                                     \
170 EMIT_FETCH_OP_N(N, uintN_t, exchange, =)                                \
171 EMIT_FETCH_OP_N(N, uintN_t, fetch_add, +=)                              \
172 EMIT_FETCH_OP_N(N, uintN_t, fetch_and, &=)                              \
173 EMIT_FETCH_OP_N(N, uintN_t, fetch_or, |=)                               \
174 EMIT_FETCH_OP_N(N, uintN_t, fetch_sub, -=)                              \
175 EMIT_FETCH_OP_N(N, uintN_t, fetch_xor, ^=)
176
177 EMIT_ALL_OPS_N(1, uint8_t)
178 EMIT_ALL_OPS_N(2, uint16_t)
179 EMIT_ALL_OPS_N(4, uint32_t)
180 EMIT_ALL_OPS_N(8, uint64_t)
181 #undef  EMIT_ALL_OPS_N
182
183 #else /* !_KERNEL */
184
185 /*
186  * For userspace on uniprocessor systems, we can implement the atomic
187  * operations by using a Restartable Atomic Sequence. This makes the
188  * kernel restart the code from the beginning when interrupted.
189  */
190
191 #define EMIT_LOAD_N(N, uintN_t)                                         \
192 uintN_t                                                                 \
193 __atomic_load_##N(uintN_t *mem, int model __unused)                     \
194 {                                                                       \
195                                                                         \
196         return (*mem);                                                  \
197 }
198
199 #define EMIT_STORE_N(N, uintN_t)                                        \
200 void                                                                    \
201 __atomic_store_##N(uintN_t *mem, uintN_t val, int model __unused)       \
202 {                                                                       \
203                                                                         \
204         *mem = val;                                                     \
205 }
206
207 #define EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                           \
208 uintN_t                                                                 \
209 __atomic_exchange_##N(uintN_t *mem, uintN_t val, int model __unused)    \
210 {                                                                       \
211         uint32_t old, temp, ras_start;                                  \
212                                                                         \
213         ras_start = ARM_RAS_START;                                      \
214         __asm volatile (                                                \
215                 /* Set up Restartable Atomic Sequence. */               \
216                 "1:"                                                    \
217                 "\tadr   %2, 1b\n"                                      \
218                 "\tstr   %2, [%5]\n"                                    \
219                 "\tadr   %2, 2f\n"                                      \
220                 "\tstr   %2, [%5, #4]\n"                                \
221                                                                         \
222                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
223                 "\t"str" %3, %1\n"      /* Store new value. */          \
224                                                                         \
225                 /* Tear down Restartable Atomic Sequence. */            \
226                 "2:"                                                    \
227                 "\tmov   %2, #0x00000000\n"                             \
228                 "\tstr   %2, [%5]\n"                                    \
229                 "\tmov   %2, #0xffffffff\n"                             \
230                 "\tstr   %2, [%5, #4]\n"                                \
231                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
232                 : "r" (val), "m" (*mem), "r" (ras_start));              \
233         return (old);                                                   \
234 }
235
236 #define EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                 \
237 _Bool                                                                   \
238 __atomic_compare_exchange_##N(uintN_t *mem, uintN_t *pexpected,         \
239     uintN_t desired, int success __unused, int failure __unused)        \
240 {                                                                       \
241         uint32_t expected, old, temp, ras_start;                        \
242                                                                         \
243         expected = *pexpected;                                          \
244         ras_start = ARM_RAS_START;                                      \
245         __asm volatile (                                                \
246                 /* Set up Restartable Atomic Sequence. */               \
247                 "1:"                                                    \
248                 "\tadr   %2, 1b\n"                                      \
249                 "\tstr   %2, [%6]\n"                                    \
250                 "\tadr   %2, 2f\n"                                      \
251                 "\tstr   %2, [%6, #4]\n"                                \
252                                                                         \
253                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
254                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
255                 "\t"streq" %4, %1\n"    /* Store new value. */          \
256                                                                         \
257                 /* Tear down Restartable Atomic Sequence. */            \
258                 "2:"                                                    \
259                 "\tmov   %2, #0x00000000\n"                             \
260                 "\tstr   %2, [%6]\n"                                    \
261                 "\tmov   %2, #0xffffffff\n"                             \
262                 "\tstr   %2, [%6, #4]\n"                                \
263                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
264                 : "r" (expected), "r" (desired), "m" (*mem),            \
265                   "r" (ras_start));                                     \
266         if (old == expected) {                                          \
267                 return (1);                                             \
268         } else {                                                        \
269                 *pexpected = old;                                       \
270                 return (0);                                             \
271         }                                                               \
272 }
273
274 #define EMIT_FETCH_OP_N(N, uintN_t, ldr, str, name, op, ret)            \
275 uintN_t                                                                 \
276 __atomic_##name##_##N(uintN_t *mem, uintN_t val, int model __unused)    \
277 {                                                                       \
278         uint32_t old, new, ras_start;                                   \
279                                                                         \
280         ras_start = ARM_RAS_START;                                      \
281         __asm volatile (                                                \
282                 /* Set up Restartable Atomic Sequence. */               \
283                 "1:"                                                    \
284                 "\tadr   %2, 1b\n"                                      \
285                 "\tstr   %2, [%5]\n"                                    \
286                 "\tadr   %2, 2f\n"                                      \
287                 "\tstr   %2, [%5, #4]\n"                                \
288                                                                         \
289                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
290                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
291                 "\t"str" %2, %1\n"      /* Store new value. */          \
292                                                                         \
293                 /* Tear down Restartable Atomic Sequence. */            \
294                 "2:"                                                    \
295                 "\tmov   %2, #0x00000000\n"                             \
296                 "\tstr   %2, [%5]\n"                                    \
297                 "\tmov   %2, #0xffffffff\n"                             \
298                 "\tstr   %2, [%5, #4]\n"                                \
299                 : "=&r" (old), "=m" (*mem), "=&r" (new)                 \
300                 : "r" (val), "m" (*mem), "r" (ras_start));              \
301         return (ret);                                                   \
302 }
303
304 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
305 EMIT_LOAD_N(N, uintN_t)                                                 \
306 EMIT_STORE_N(N, uintN_t)                                                \
307 EMIT_EXCHANGE_N(N, uintN_t, ldr, str)                                   \
308 EMIT_COMPARE_EXCHANGE_N(N, uintN_t, ldr, streq)                         \
309 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_add, "add", old)            \
310 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_and, "and", old)            \
311 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_or,  "orr", old)            \
312 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_sub, "sub", old)            \
313 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, fetch_xor, "eor", old)            \
314 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, add_fetch, "add", new)            \
315 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, and_fetch, "and", new)            \
316 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, or_fetch,  "orr", new)            \
317 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, sub_fetch, "sub", new)            \
318 EMIT_FETCH_OP_N(N, uintN_t, ldr, str, xor_fetch, "eor", new)
319
320 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
321 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
322 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
323 #undef  EMIT_ALL_OPS_N
324
325 #endif /* _KERNEL */
326
327 #endif /* __ARM_ARCH */
328
329 #endif /* __CLANG_ATOMICS || __GNUC_ATOMICS */
330
331 #if defined(__SYNC_ATOMICS) || defined(EMIT_SYNC_ATOMICS)
332
333 #ifdef __clang__
334 #pragma redefine_extname __sync_lock_test_and_set_1_c __sync_lock_test_and_set_1
335 #pragma redefine_extname __sync_lock_test_and_set_2_c __sync_lock_test_and_set_2
336 #pragma redefine_extname __sync_lock_test_and_set_4_c __sync_lock_test_and_set_4
337 #pragma redefine_extname __sync_val_compare_and_swap_1_c __sync_val_compare_and_swap_1
338 #pragma redefine_extname __sync_val_compare_and_swap_2_c __sync_val_compare_and_swap_2
339 #pragma redefine_extname __sync_val_compare_and_swap_4_c __sync_val_compare_and_swap_4
340 #pragma redefine_extname __sync_fetch_and_add_1_c __sync_fetch_and_add_1
341 #pragma redefine_extname __sync_fetch_and_add_2_c __sync_fetch_and_add_2
342 #pragma redefine_extname __sync_fetch_and_add_4_c __sync_fetch_and_add_4
343 #pragma redefine_extname __sync_fetch_and_and_1_c __sync_fetch_and_and_1
344 #pragma redefine_extname __sync_fetch_and_and_2_c __sync_fetch_and_and_2
345 #pragma redefine_extname __sync_fetch_and_and_4_c __sync_fetch_and_and_4
346 #pragma redefine_extname __sync_fetch_and_or_1_c __sync_fetch_and_or_1
347 #pragma redefine_extname __sync_fetch_and_or_2_c __sync_fetch_and_or_2
348 #pragma redefine_extname __sync_fetch_and_or_4_c __sync_fetch_and_or_4
349 #pragma redefine_extname __sync_fetch_and_xor_1_c __sync_fetch_and_xor_1
350 #pragma redefine_extname __sync_fetch_and_xor_2_c __sync_fetch_and_xor_2
351 #pragma redefine_extname __sync_fetch_and_xor_4_c __sync_fetch_and_xor_4
352 #pragma redefine_extname __sync_fetch_and_sub_1_c __sync_fetch_and_sub_1
353 #pragma redefine_extname __sync_fetch_and_sub_2_c __sync_fetch_and_sub_2
354 #pragma redefine_extname __sync_fetch_and_sub_4_c __sync_fetch_and_sub_4
355 #endif
356
357 /*
358  * Old __sync_* API.
359  */
360
361 #if __ARM_ARCH >= 6
362
363 /* Implementations for old GCC versions, lacking support for atomics. */
364
365 typedef union {
366         uint8_t         v8[4];
367         uint32_t        v32;
368 } reg_t;
369
370 /*
371  * Given a memory address pointing to an 8-bit or 16-bit integer, return
372  * the address of the 32-bit word containing it.
373  */
374
375 static inline uint32_t *
376 round_to_word(void *ptr)
377 {
378
379         return ((uint32_t *)((intptr_t)ptr & ~3));
380 }
381
382 /*
383  * Utility functions for loading and storing 8-bit and 16-bit integers
384  * in 32-bit words at an offset corresponding with the location of the
385  * atomic variable.
386  */
387
388 static inline void
389 put_1(reg_t *r, const uint8_t *offset_ptr, uint8_t val)
390 {
391         size_t offset;
392
393         offset = (intptr_t)offset_ptr & 3;
394         r->v8[offset] = val;
395 }
396
397 static inline uint8_t
398 get_1(const reg_t *r, const uint8_t *offset_ptr)
399 {
400         size_t offset;
401
402         offset = (intptr_t)offset_ptr & 3;
403         return (r->v8[offset]);
404 }
405
406 static inline void
407 put_2(reg_t *r, const uint16_t *offset_ptr, uint16_t val)
408 {
409         size_t offset;
410         union {
411                 uint16_t in;
412                 uint8_t out[2];
413         } bytes;
414
415         offset = (intptr_t)offset_ptr & 3;
416         bytes.in = val;
417         r->v8[offset] = bytes.out[0];
418         r->v8[offset + 1] = bytes.out[1];
419 }
420
421 static inline uint16_t
422 get_2(const reg_t *r, const uint16_t *offset_ptr)
423 {
424         size_t offset;
425         union {
426                 uint8_t in[2];
427                 uint16_t out;
428         } bytes;
429
430         offset = (intptr_t)offset_ptr & 3;
431         bytes.in[0] = r->v8[offset];
432         bytes.in[1] = r->v8[offset + 1];
433         return (bytes.out);
434 }
435
436 /*
437  * 8-bit and 16-bit routines.
438  *
439  * These operations are not natively supported by the CPU, so we use
440  * some shifting and bitmasking on top of the 32-bit instructions.
441  */
442
443 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t)                            \
444 uintN_t                                                                 \
445 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
446 {                                                                       \
447         uint32_t *mem32;                                                \
448         reg_t val32, negmask, old;                                      \
449         uint32_t temp1, temp2;                                          \
450                                                                         \
451         mem32 = round_to_word(mem);                                     \
452         val32.v32 = 0x00000000;                                         \
453         put_##N(&val32, mem, val);                                      \
454         negmask.v32 = 0xffffffff;                                       \
455         put_##N(&negmask, mem, 0);                                      \
456                                                                         \
457         do_sync();                                                      \
458         __asm volatile (                                                \
459                 "1:"                                                    \
460                 "\tldrex %0, %6\n"      /* Load old value. */           \
461                 "\tand   %2, %5, %0\n"  /* Remove the old value. */     \
462                 "\torr   %2, %2, %4\n"  /* Put in the new value. */     \
463                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
464                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
465                 "\tbne   1b\n"          /* Spin if failed. */           \
466                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
467                   "=&r" (temp2)                                         \
468                 : "r" (val32.v32), "r" (negmask.v32), "m" (*mem32));    \
469         return (get_##N(&old, mem));                                    \
470 }
471
472 EMIT_LOCK_TEST_AND_SET_N(1, uint8_t)
473 EMIT_LOCK_TEST_AND_SET_N(2, uint16_t)
474
475 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
476 uintN_t                                                                 \
477 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
478     uintN_t desired)                                                    \
479 {                                                                       \
480         uint32_t *mem32;                                                \
481         reg_t expected32, desired32, posmask, old;                      \
482         uint32_t negmask, temp1, temp2;                                 \
483                                                                         \
484         mem32 = round_to_word(mem);                                     \
485         expected32.v32 = 0x00000000;                                    \
486         put_##N(&expected32, mem, expected);                            \
487         desired32.v32 = 0x00000000;                                     \
488         put_##N(&desired32, mem, desired);                              \
489         posmask.v32 = 0x00000000;                                       \
490         put_##N(&posmask, mem, ~0);                                     \
491         negmask = ~posmask.v32;                                         \
492                                                                         \
493         do_sync();                                                      \
494         __asm volatile (                                                \
495                 "1:"                                                    \
496                 "\tldrex %0, %8\n"      /* Load old value. */           \
497                 "\tand   %2, %6, %0\n"  /* Isolate the old value. */    \
498                 "\tcmp   %2, %4\n"      /* Compare to expected value. */\
499                 "\tbne   2f\n"          /* Values are unequal. */       \
500                 "\tand   %2, %7, %0\n"  /* Remove the old value. */     \
501                 "\torr   %2, %5\n"      /* Put in the new value. */     \
502                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
503                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
504                 "\tbne   1b\n"          /* Spin if failed. */           \
505                 "2:"                                                    \
506                 : "=&r" (old), "=m" (*mem32), "=&r" (temp1),            \
507                   "=&r" (temp2)                                         \
508                 : "r" (expected32.v32), "r" (desired32.v32),            \
509                   "r" (posmask.v32), "r" (negmask), "m" (*mem32));      \
510         return (get_##N(&old, mem));                                    \
511 }
512
513 EMIT_VAL_COMPARE_AND_SWAP_N(1, uint8_t)
514 EMIT_VAL_COMPARE_AND_SWAP_N(2, uint16_t)
515
516 #define EMIT_ARITHMETIC_FETCH_AND_OP_N(N, uintN_t, name, op)            \
517 uintN_t                                                                 \
518 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
519 {                                                                       \
520         uint32_t *mem32;                                                \
521         reg_t val32, posmask, old;                                      \
522         uint32_t negmask, temp1, temp2;                                 \
523                                                                         \
524         mem32 = round_to_word(mem);                                     \
525         val32.v32 = 0x00000000;                                         \
526         put_##N(&val32, mem, val);                                      \
527         posmask.v32 = 0x00000000;                                       \
528         put_##N(&posmask, mem, ~0);                                     \
529         negmask = ~posmask.v32;                                         \
530                                                                         \
531         do_sync();                                                      \
532         __asm volatile (                                                \
533                 "1:"                                                    \
534                 "\tldrex %0, %7\n"      /* Load old value. */           \
535                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
536                 "\tand   %2, %5\n"      /* Isolate the new value. */    \
537                 "\tand   %3, %6, %0\n"  /* Remove the old value. */     \
538                 "\torr   %2, %2, %3\n"  /* Put in the new value. */     \
539                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
540                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
541                 "\tbne   1b\n"          /* Spin if failed. */           \
542                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
543                   "=&r" (temp2)                                         \
544                 : "r" (val32.v32), "r" (posmask.v32), "r" (negmask),    \
545                   "m" (*mem32));                                        \
546         return (get_##N(&old, mem));                                    \
547 }
548
549 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_add, "add")
550 EMIT_ARITHMETIC_FETCH_AND_OP_N(1, uint8_t, fetch_and_sub, "sub")
551 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_add, "add")
552 EMIT_ARITHMETIC_FETCH_AND_OP_N(2, uint16_t, fetch_and_sub, "sub")
553
554 #define EMIT_BITWISE_FETCH_AND_OP_N(N, uintN_t, name, op, idempotence)  \
555 uintN_t                                                                 \
556 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
557 {                                                                       \
558         uint32_t *mem32;                                                \
559         reg_t val32, old;                                               \
560         uint32_t temp1, temp2;                                          \
561                                                                         \
562         mem32 = round_to_word(mem);                                     \
563         val32.v32 = idempotence ? 0xffffffff : 0x00000000;              \
564         put_##N(&val32, mem, val);                                      \
565                                                                         \
566         do_sync();                                                      \
567         __asm volatile (                                                \
568                 "1:"                                                    \
569                 "\tldrex %0, %5\n"      /* Load old value. */           \
570                 "\t"op"  %2, %4, %0\n"  /* Calculate new value. */      \
571                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
572                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
573                 "\tbne   1b\n"          /* Spin if failed. */           \
574                 : "=&r" (old.v32), "=m" (*mem32), "=&r" (temp1),        \
575                   "=&r" (temp2)                                         \
576                 : "r" (val32.v32), "m" (*mem32));                       \
577         return (get_##N(&old, mem));                                    \
578 }
579
580 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_and, "and", 1)
581 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_or, "orr", 0)
582 EMIT_BITWISE_FETCH_AND_OP_N(1, uint8_t, fetch_and_xor, "eor", 0)
583 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_and, "and", 1)
584 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_or, "orr", 0)
585 EMIT_BITWISE_FETCH_AND_OP_N(2, uint16_t, fetch_and_xor, "eor", 0)
586
587 /*
588  * 32-bit routines.
589  */
590
591 uint32_t
592 __sync_lock_test_and_set_4_c(uint32_t *mem, uint32_t val)
593 {
594         uint32_t old, temp;
595
596         do_sync();
597         __asm volatile (
598                 "1:"
599                 "\tldrex %0, %4\n"      /* Load old value. */
600                 "\tstrex %2, %3, %1\n"  /* Attempt to store. */
601                 "\tcmp   %2, #0\n"      /* Did it succeed? */
602                 "\tbne   1b\n"          /* Spin if failed. */
603                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
604                 : "r" (val), "m" (*mem));
605         return (old);
606 }
607
608 uint32_t
609 __sync_val_compare_and_swap_4_c(uint32_t *mem, uint32_t expected,
610     uint32_t desired)
611 {
612         uint32_t old, temp;
613
614         do_sync();
615         __asm volatile (
616                 "1:"
617                 "\tldrex %0, %5\n"      /* Load old value. */
618                 "\tcmp   %0, %3\n"      /* Compare to expected value. */
619                 "\tbne   2f\n"          /* Values are unequal. */
620                 "\tstrex %2, %4, %1\n"  /* Attempt to store. */
621                 "\tcmp   %2, #0\n"      /* Did it succeed? */
622                 "\tbne   1b\n"          /* Spin if failed. */
623                 "2:"
624                 : "=&r" (old), "=m" (*mem), "=&r" (temp)
625                 : "r" (expected), "r" (desired), "m" (*mem));
626         return (old);
627 }
628
629 #define EMIT_FETCH_AND_OP_4(name, op)                                   \
630 uint32_t                                                                \
631 __sync_##name##_4##_c(uint32_t *mem, uint32_t val)                              \
632 {                                                                       \
633         uint32_t old, temp1, temp2;                                     \
634                                                                         \
635         do_sync();                                                      \
636         __asm volatile (                                                \
637                 "1:"                                                    \
638                 "\tldrex %0, %5\n"      /* Load old value. */           \
639                 "\t"op"  %2, %0, %4\n"  /* Calculate new value. */      \
640                 "\tstrex %3, %2, %1\n"  /* Attempt to store. */         \
641                 "\tcmp   %3, #0\n"      /* Did it succeed? */           \
642                 "\tbne   1b\n"          /* Spin if failed. */           \
643                 : "=&r" (old), "=m" (*mem), "=&r" (temp1),              \
644                   "=&r" (temp2)                                         \
645                 : "r" (val), "m" (*mem));                               \
646         return (old);                                                   \
647 }
648
649 EMIT_FETCH_AND_OP_4(fetch_and_add, "add")
650 EMIT_FETCH_AND_OP_4(fetch_and_and, "and")
651 EMIT_FETCH_AND_OP_4(fetch_and_or, "orr")
652 EMIT_FETCH_AND_OP_4(fetch_and_sub, "sub")
653 EMIT_FETCH_AND_OP_4(fetch_and_xor, "eor")
654
655 #ifndef __clang__
656 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
657 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
658 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
659 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
660 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
661 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
662 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
663 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
664 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
665 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
666 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
667 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
668 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
669 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
670 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
671 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
672 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
673 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
674 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
675 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
676 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
677 #endif
678
679 #else /* __ARM_ARCH < 6 */
680
681 #ifdef _KERNEL
682
683 #ifdef SMP
684 #error "On SMP systems we should have proper atomic operations."
685 #endif
686
687 /*
688  * On uniprocessor systems, we can perform the atomic operations by
689  * disabling interrupts.
690  */
691
692 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                         \
693 uintN_t                                                                 \
694 __sync_val_compare_and_swap_##N(uintN_t *mem, uintN_t expected,         \
695     uintN_t desired)                                                    \
696 {                                                                       \
697         uintN_t ret;                                                    \
698                                                                         \
699         WITHOUT_INTERRUPTS({                                            \
700                 ret = *mem;                                             \
701                 if (*mem == expected)                                   \
702                         *mem = desired;                                 \
703         });                                                             \
704         return (ret);                                                   \
705 }
706
707 #define EMIT_FETCH_AND_OP_N(N, uintN_t, name, op)                       \
708 uintN_t                                                                 \
709 __sync_##name##_##N(uintN_t *mem, uintN_t val)                          \
710 {                                                                       \
711         uintN_t ret;                                                    \
712                                                                         \
713         WITHOUT_INTERRUPTS({                                            \
714                 ret = *mem;                                             \
715                 *mem op val;                                            \
716         });                                                             \
717         return (ret);                                                   \
718 }
719
720 #define EMIT_ALL_OPS_N(N, uintN_t)                                      \
721 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t)                                 \
722 EMIT_FETCH_AND_OP_N(N, uintN_t, lock_test_and_set, =)                   \
723 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_add, +=)                      \
724 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_and, &=)                      \
725 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_or, |=)                       \
726 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_sub, -=)                      \
727 EMIT_FETCH_AND_OP_N(N, uintN_t, fetch_and_xor, ^=)
728
729 EMIT_ALL_OPS_N(1, uint8_t)
730 EMIT_ALL_OPS_N(2, uint16_t)
731 EMIT_ALL_OPS_N(4, uint32_t)
732 EMIT_ALL_OPS_N(8, uint64_t)
733 #undef  EMIT_ALL_OPS_N
734
735 #else /* !_KERNEL */
736
737 /*
738  * For userspace on uniprocessor systems, we can implement the atomic
739  * operations by using a Restartable Atomic Sequence. This makes the
740  * kernel restart the code from the beginning when interrupted.
741  */
742
743 #define EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                  \
744 uintN_t                                                                 \
745 __sync_lock_test_and_set_##N##_c(uintN_t *mem, uintN_t val)                     \
746 {                                                                       \
747         uint32_t old, temp, ras_start;                                  \
748                                                                         \
749         ras_start = ARM_RAS_START;                                      \
750         __asm volatile (                                                \
751                 /* Set up Restartable Atomic Sequence. */               \
752                 "1:"                                                    \
753                 "\tadr   %2, 1b\n"                                      \
754                 "\tstr   %2, [%5]\n"                                    \
755                 "\tadr   %2, 2f\n"                                      \
756                 "\tstr   %2, [%5, #4]\n"                                \
757                                                                         \
758                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
759                 "\t"str" %3, %1\n"      /* Store new value. */          \
760                                                                         \
761                 /* Tear down Restartable Atomic Sequence. */            \
762                 "2:"                                                    \
763                 "\tmov   %2, #0x00000000\n"                             \
764                 "\tstr   %2, [%5]\n"                                    \
765                 "\tmov   %2, #0xffffffff\n"                             \
766                 "\tstr   %2, [%5, #4]\n"                                \
767                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
768                 : "r" (val), "m" (*mem), "r" (ras_start));              \
769         return (old);                                                   \
770 }
771
772 #define EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)             \
773 uintN_t                                                                 \
774 __sync_val_compare_and_swap_##N##_c(uintN_t *mem, uintN_t expected,             \
775     uintN_t desired)                                                    \
776 {                                                                       \
777         uint32_t old, temp, ras_start;                                  \
778                                                                         \
779         ras_start = ARM_RAS_START;                                      \
780         __asm volatile (                                                \
781                 /* Set up Restartable Atomic Sequence. */               \
782                 "1:"                                                    \
783                 "\tadr   %2, 1b\n"                                      \
784                 "\tstr   %2, [%6]\n"                                    \
785                 "\tadr   %2, 2f\n"                                      \
786                 "\tstr   %2, [%6, #4]\n"                                \
787                                                                         \
788                 "\t"ldr" %0, %5\n"      /* Load old value. */           \
789                 "\tcmp   %0, %3\n"      /* Compare to expected value. */\
790                 "\t"streq" %4, %1\n"    /* Store new value. */          \
791                                                                         \
792                 /* Tear down Restartable Atomic Sequence. */            \
793                 "2:"                                                    \
794                 "\tmov   %2, #0x00000000\n"                             \
795                 "\tstr   %2, [%6]\n"                                    \
796                 "\tmov   %2, #0xffffffff\n"                             \
797                 "\tstr   %2, [%6, #4]\n"                                \
798                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
799                 : "r" (expected), "r" (desired), "m" (*mem),            \
800                   "r" (ras_start));                                     \
801         return (old);                                                   \
802 }
803
804 #define EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, name, op)             \
805 uintN_t                                                                 \
806 __sync_##name##_##N##_c(uintN_t *mem, uintN_t val)                              \
807 {                                                                       \
808         uint32_t old, temp, ras_start;                                  \
809                                                                         \
810         ras_start = ARM_RAS_START;                                      \
811         __asm volatile (                                                \
812                 /* Set up Restartable Atomic Sequence. */               \
813                 "1:"                                                    \
814                 "\tadr   %2, 1b\n"                                      \
815                 "\tstr   %2, [%5]\n"                                    \
816                 "\tadr   %2, 2f\n"                                      \
817                 "\tstr   %2, [%5, #4]\n"                                \
818                                                                         \
819                 "\t"ldr" %0, %4\n"      /* Load old value. */           \
820                 "\t"op"  %2, %0, %3\n"  /* Calculate new value. */      \
821                 "\t"str" %2, %1\n"      /* Store new value. */          \
822                                                                         \
823                 /* Tear down Restartable Atomic Sequence. */            \
824                 "2:"                                                    \
825                 "\tmov   %2, #0x00000000\n"                             \
826                 "\tstr   %2, [%5]\n"                                    \
827                 "\tmov   %2, #0xffffffff\n"                             \
828                 "\tstr   %2, [%5, #4]\n"                                \
829                 : "=&r" (old), "=m" (*mem), "=&r" (temp)                \
830                 : "r" (val), "m" (*mem), "r" (ras_start));              \
831         return (old);                                                   \
832 }
833
834 #define EMIT_ALL_OPS_N(N, uintN_t, ldr, str, streq)                     \
835 EMIT_LOCK_TEST_AND_SET_N(N, uintN_t, ldr, str)                          \
836 EMIT_VAL_COMPARE_AND_SWAP_N(N, uintN_t, ldr, streq)                     \
837 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_add, "add")         \
838 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_and, "and")         \
839 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_or, "orr")          \
840 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_sub, "sub")         \
841 EMIT_FETCH_AND_OP_N(N, uintN_t, ldr, str, fetch_and_xor, "eor")
842
843 #ifdef __clang__
844 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "strbeq")
845 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "strheq")
846 #else
847 EMIT_ALL_OPS_N(1, uint8_t, "ldrb", "strb", "streqb")
848 EMIT_ALL_OPS_N(2, uint16_t, "ldrh", "strh", "streqh")
849 #endif
850 EMIT_ALL_OPS_N(4, uint32_t, "ldr", "str", "streq")
851
852 #ifndef __clang__
853 __strong_reference(__sync_lock_test_and_set_1_c, __sync_lock_test_and_set_1);
854 __strong_reference(__sync_lock_test_and_set_2_c, __sync_lock_test_and_set_2);
855 __strong_reference(__sync_lock_test_and_set_4_c, __sync_lock_test_and_set_4);
856 __strong_reference(__sync_val_compare_and_swap_1_c, __sync_val_compare_and_swap_1);
857 __strong_reference(__sync_val_compare_and_swap_2_c, __sync_val_compare_and_swap_2);
858 __strong_reference(__sync_val_compare_and_swap_4_c, __sync_val_compare_and_swap_4);
859 __strong_reference(__sync_fetch_and_add_1_c, __sync_fetch_and_add_1);
860 __strong_reference(__sync_fetch_and_add_2_c, __sync_fetch_and_add_2);
861 __strong_reference(__sync_fetch_and_add_4_c, __sync_fetch_and_add_4);
862 __strong_reference(__sync_fetch_and_and_1_c, __sync_fetch_and_and_1);
863 __strong_reference(__sync_fetch_and_and_2_c, __sync_fetch_and_and_2);
864 __strong_reference(__sync_fetch_and_and_4_c, __sync_fetch_and_and_4);
865 __strong_reference(__sync_fetch_and_sub_1_c, __sync_fetch_and_sub_1);
866 __strong_reference(__sync_fetch_and_sub_2_c, __sync_fetch_and_sub_2);
867 __strong_reference(__sync_fetch_and_sub_4_c, __sync_fetch_and_sub_4);
868 __strong_reference(__sync_fetch_and_or_1_c, __sync_fetch_and_or_1);
869 __strong_reference(__sync_fetch_and_or_2_c, __sync_fetch_and_or_2);
870 __strong_reference(__sync_fetch_and_or_4_c, __sync_fetch_and_or_4);
871 __strong_reference(__sync_fetch_and_xor_1_c, __sync_fetch_and_xor_1);
872 __strong_reference(__sync_fetch_and_xor_2_c, __sync_fetch_and_xor_2);
873 __strong_reference(__sync_fetch_and_xor_4_c, __sync_fetch_and_xor_4);
874 #endif /* __ARM_ARCH */
875
876 #endif /* _KERNEL */
877
878 #endif
879
880 #endif /* __SYNC_ATOMICS */