]> CyberLeo.Net >> Repos - FreeBSD/releng/10.2.git/blob - sys/arm/include/atomic.h
- Copy stable/10@285827 to releng/10.2 in preparation for 10.2-RC1
[FreeBSD/releng/10.2.git] / sys / arm / include / atomic.h
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3 /*-
4  * Copyright (C) 2003-2004 Olivier Houchard
5  * Copyright (C) 1994-1997 Mark Brinicombe
6  * Copyright (C) 1994 Brini
7  * All rights reserved.
8  *
9  * This code is derived from software written for Brini by Mark Brinicombe
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by Brini.
22  * 4. The name of Brini may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * $FreeBSD$
37  */
38
39 #ifndef _MACHINE_ATOMIC_H_
40 #define _MACHINE_ATOMIC_H_
41
42 #include <sys/types.h>
43 #include <machine/armreg.h>
44
45 #ifndef _KERNEL
46 #include <machine/sysarch.h>
47 #else
48 #include <machine/cpuconf.h>
49 #endif
50
51 #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
52 #define isb()  __asm __volatile("isb" : : : "memory")
53 #define dsb()  __asm __volatile("dsb" : : : "memory")
54 #define dmb()  __asm __volatile("dmb" : : : "memory")
55 #elif defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) || \
56   defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6T2__) || \
57   defined (__ARM_ARCH_6Z__) || defined (__ARM_ARCH_6ZK__)
58 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
59 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
60 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
61 #else
62 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
63 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
64 #define dmb()  dsb()
65 #endif
66
67 #define mb()   dmb()
68 #define wmb()  dmb()
69 #define rmb()  dmb()
70
71
72
73 /*
74  * It would be nice to use _HAVE_ARMv6_INSTRUCTIONS from machine/asm.h
75  * here, but that header can't be included here because this is C
76  * code.  I would like to move the _HAVE_ARMv6_INSTRUCTIONS definition
77  * out of asm.h so it can be used in both asm and C code. - kientzle@
78  */
79 #if defined (__ARM_ARCH_7__) || \
80         defined (__ARM_ARCH_7A__)  || \
81         defined (__ARM_ARCH_6__)   || \
82         defined (__ARM_ARCH_6J__)  || \
83         defined (__ARM_ARCH_6K__)  || \
84         defined (__ARM_ARCH_6T2__) || \
85         defined (__ARM_ARCH_6Z__)  || \
86         defined (__ARM_ARCH_6ZK__)
87 #define ARM_HAVE_ATOMIC64
88
89 static __inline void
90 __do_dmb(void)
91 {
92
93 #if defined (__ARM_ARCH_7__) || defined (__ARM_ARCH_7A__)
94         __asm __volatile("dmb" : : : "memory");
95 #else
96         __asm __volatile("mcr p15, 0, r0, c7, c10, 5" : : : "memory");
97 #endif
98 }
99
100 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
101 static __inline void                                                    \
102 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
103 {                                                                       \
104         atomic_##NAME##_long(p, v);                                     \
105         __do_dmb();                                                     \
106 }                                                                       \
107                                                                         \
108 static __inline  void                                                   \
109 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
110 {                                                                       \
111         __do_dmb();                                                     \
112         atomic_##NAME##_long(p, v);                                     \
113 }
114
115 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
116 static __inline  void                                                   \
117 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
118 {                                                                       \
119         atomic_##NAME##_##WIDTH(p, v);                                  \
120         __do_dmb();                                                     \
121 }                                                                       \
122                                                                         \
123 static __inline  void                                                   \
124 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
125 {                                                                       \
126         __do_dmb();                                                     \
127         atomic_##NAME##_##WIDTH(p, v);                                  \
128 }
129
130 static __inline void
131 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
132 {
133         uint32_t tmp = 0, tmp2 = 0;
134
135         __asm __volatile("1: ldrex %0, [%2]\n"
136                             "orr %0, %0, %3\n"
137                             "strex %1, %0, [%2]\n"
138                             "cmp %1, #0\n"
139                             "it ne\n"
140                             "bne        1b\n"
141                            : "=&r" (tmp), "+r" (tmp2)
142                            , "+r" (address), "+r" (setmask) : : "cc", "memory");
143                              
144 }
145
146 static __inline void
147 atomic_set_64(volatile uint64_t *p, uint64_t val)
148 {
149         uint64_t tmp;
150         uint32_t exflag;
151
152         __asm __volatile(
153                 "1:          \n"
154                 "   ldrexd   %[tmp], [%[ptr]]\n"
155                 "   orr      %Q[tmp], %Q[val]\n"
156                 "   orr      %R[tmp], %R[val]\n"
157                 "   strexd   %[exf], %[tmp], [%[ptr]]\n"
158                 "   teq      %[exf], #0\n"
159                 "   it ne    \n"
160                 "   bne      1b\n"
161                 :   [exf]    "=&r"  (exflag), 
162                     [tmp]    "=&r"  (tmp)
163                 :   [ptr]    "r"    (p), 
164                     [val]    "r"    (val)
165                 :   "cc", "memory");
166 }
167
168 static __inline void
169 atomic_set_long(volatile u_long *address, u_long setmask)
170 {
171         u_long tmp = 0, tmp2 = 0;
172
173         __asm __volatile("1: ldrex %0, [%2]\n"
174                             "orr %0, %0, %3\n"
175                             "strex %1, %0, [%2]\n"
176                             "cmp %1, #0\n"
177                             "it ne\n"
178                             "bne        1b\n"
179                            : "=&r" (tmp), "+r" (tmp2)
180                            , "+r" (address), "+r" (setmask) : : "cc", "memory");
181                              
182 }
183
184 static __inline void
185 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
186 {
187         uint32_t tmp = 0, tmp2 = 0;
188
189         __asm __volatile("1: ldrex %0, [%2]\n"
190                             "bic %0, %0, %3\n"
191                             "strex %1, %0, [%2]\n"
192                             "cmp %1, #0\n"
193                             "it ne\n"
194                             "bne        1b\n"
195                            : "=&r" (tmp), "+r" (tmp2)
196                            ,"+r" (address), "+r" (setmask) : : "cc", "memory");
197 }
198
199 static __inline void
200 atomic_clear_64(volatile uint64_t *p, uint64_t val)
201 {
202         uint64_t tmp;
203         uint32_t exflag;
204
205         __asm __volatile(
206                 "1:          \n"
207                 "   ldrexd   %[tmp], [%[ptr]]\n"
208                 "   bic      %Q[tmp], %Q[val]\n"
209                 "   bic      %R[tmp], %R[val]\n"
210                 "   strexd   %[exf], %[tmp], [%[ptr]]\n"
211                 "   teq      %[exf], #0\n"
212                 "   it ne    \n"
213                 "   bne      1b\n"
214                 :   [exf]    "=&r"  (exflag), 
215                     [tmp]    "=&r"  (tmp)
216                 :   [ptr]    "r"    (p), 
217                     [val]    "r"    (val)
218                 :   "cc", "memory");
219 }
220
221 static __inline void
222 atomic_clear_long(volatile u_long *address, u_long setmask)
223 {
224         u_long tmp = 0, tmp2 = 0;
225
226         __asm __volatile("1: ldrex %0, [%2]\n"
227                             "bic %0, %0, %3\n"
228                             "strex %1, %0, [%2]\n"
229                             "cmp %1, #0\n"
230                             "it ne\n"
231                             "bne        1b\n"
232                            : "=&r" (tmp), "+r" (tmp2)
233                            ,"+r" (address), "+r" (setmask) : : "cc", "memory");
234 }
235
236 static __inline u_int32_t
237 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
238 {
239         uint32_t ret;
240         
241         __asm __volatile("1: ldrex %0, [%1]\n"
242                          "cmp %0, %2\n"
243                          "itt ne\n"
244                          "movne %0, #0\n"
245                          "bne 2f\n"
246                          "strex %0, %3, [%1]\n"
247                          "cmp %0, #0\n"
248                          "ite eq\n"
249                          "moveq %0, #1\n"
250                          "bne   1b\n"
251                          "2:"
252                          : "=&r" (ret)
253                          ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
254                          "memory");
255         return (ret);
256 }
257
258 static __inline int
259 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
260 {
261         uint64_t tmp;
262         uint32_t ret;
263
264         __asm __volatile(
265                 "1:          \n"
266                 "   ldrexd   %[tmp], [%[ptr]]\n"
267                 "   teq      %Q[tmp], %Q[cmpval]\n"
268                 "   itee eq  \n"
269                 "   teqeq    %R[tmp], %R[cmpval]\n"
270                 "   movne    %[ret], #0\n"
271                 "   bne      2f\n"
272                 "   strexd   %[ret], %[newval], [%[ptr]]\n"
273                 "   teq      %[ret], #0\n"
274                 "   it ne    \n"
275                 "   bne      1b\n"
276                 "   mov      %[ret], #1\n"
277                 "2:          \n"
278                 :   [ret]    "=&r"  (ret), 
279                     [tmp]    "=&r"  (tmp)
280                 :   [ptr]    "r"    (p), 
281                     [cmpval] "r"    (cmpval), 
282                     [newval] "r"    (newval)
283                 :   "cc", "memory");
284         return (ret);
285 }
286
287 static __inline u_long
288 atomic_cmpset_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
289 {
290         u_long ret;
291         
292         __asm __volatile("1: ldrex %0, [%1]\n"
293                          "cmp %0, %2\n"
294                          "itt ne\n"
295                          "movne %0, #0\n"
296                          "bne 2f\n"
297                          "strex %0, %3, [%1]\n"
298                          "cmp %0, #0\n"
299                          "ite eq\n"
300                          "moveq %0, #1\n"
301                          "bne   1b\n"
302                          "2:"
303                          : "=&r" (ret)
304                          ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc",
305                          "memory");
306         return (ret);
307 }
308
309 static __inline u_int32_t
310 atomic_cmpset_acq_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
311 {
312         u_int32_t ret = atomic_cmpset_32(p, cmpval, newval);
313
314         __do_dmb();
315         return (ret);
316 }
317
318 static __inline uint64_t
319 atomic_cmpset_acq_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
320 {
321         uint64_t ret = atomic_cmpset_64(p, cmpval, newval);
322
323         __do_dmb();
324         return (ret);
325 }
326
327 static __inline u_long
328 atomic_cmpset_acq_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
329 {
330         u_long ret = atomic_cmpset_long(p, cmpval, newval);
331
332         __do_dmb();
333         return (ret);
334 }
335
336 static __inline u_int32_t
337 atomic_cmpset_rel_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
338 {
339         
340         __do_dmb();
341         return (atomic_cmpset_32(p, cmpval, newval));
342 }
343
344 static __inline uint64_t
345 atomic_cmpset_rel_64(volatile uint64_t *p, volatile uint64_t cmpval, volatile uint64_t newval)
346 {
347         
348         __do_dmb();
349         return (atomic_cmpset_64(p, cmpval, newval));
350 }
351
352 static __inline u_long
353 atomic_cmpset_rel_long(volatile u_long *p, volatile u_long cmpval, volatile u_long newval)
354 {
355         
356         __do_dmb();
357         return (atomic_cmpset_long(p, cmpval, newval));
358 }
359
360
361 static __inline void
362 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
363 {
364         uint32_t tmp = 0, tmp2 = 0;
365
366         __asm __volatile("1: ldrex %0, [%2]\n"
367                             "add %0, %0, %3\n"
368                             "strex %1, %0, [%2]\n"
369                             "cmp %1, #0\n"
370                             "it ne\n"
371                             "bne        1b\n"
372                             : "=&r" (tmp), "+r" (tmp2)
373                             ,"+r" (p), "+r" (val) : : "cc", "memory");
374 }
375
376 static __inline void
377 atomic_add_64(volatile uint64_t *p, uint64_t val)
378 {
379         uint64_t tmp;
380         uint32_t exflag;
381
382         __asm __volatile(
383                 "1:          \n"
384                 "   ldrexd   %[tmp], [%[ptr]]\n"
385                 "   adds     %Q[tmp], %Q[val]\n"
386                 "   adc      %R[tmp], %R[val]\n"
387                 "   strexd   %[exf], %[tmp], [%[ptr]]\n"
388                 "   teq      %[exf], #0\n"
389                 "   it ne    \n"
390                 "   bne      1b\n"
391                 :   [exf]    "=&r"  (exflag), 
392                     [tmp]    "=&r"  (tmp)
393                 :   [ptr]    "r"    (p), 
394                     [val]    "r"    (val)
395                 :   "cc", "memory");
396 }
397
398 static __inline void
399 atomic_add_long(volatile u_long *p, u_long val)
400 {
401         u_long tmp = 0, tmp2 = 0;
402
403         __asm __volatile("1: ldrex %0, [%2]\n"
404                             "add %0, %0, %3\n"
405                             "strex %1, %0, [%2]\n"
406                             "cmp %1, #0\n"
407                             "it ne\n"
408                             "bne        1b\n"
409                             : "=&r" (tmp), "+r" (tmp2)
410                             ,"+r" (p), "+r" (val) : : "cc", "memory");
411 }
412
413 static __inline void
414 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
415 {
416         uint32_t tmp = 0, tmp2 = 0;
417
418         __asm __volatile("1: ldrex %0, [%2]\n"
419                             "sub %0, %0, %3\n"
420                             "strex %1, %0, [%2]\n"
421                             "cmp %1, #0\n"
422                             "it ne\n"
423                             "bne        1b\n"
424                             : "=&r" (tmp), "+r" (tmp2)
425                             ,"+r" (p), "+r" (val) : : "cc", "memory");
426 }
427
428 static __inline void
429 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
430 {
431         uint64_t tmp;
432         uint32_t exflag;
433
434         __asm __volatile(
435                 "1:          \n"
436                 "   ldrexd   %[tmp], [%[ptr]]\n"
437                 "   subs     %Q[tmp], %Q[val]\n"
438                 "   sbc      %R[tmp], %R[val]\n"
439                 "   strexd   %[exf], %[tmp], [%[ptr]]\n"
440                 "   teq      %[exf], #0\n"
441                 "   it ne    \n"
442                 "   bne      1b\n"
443                 :   [exf]    "=&r"  (exflag), 
444                     [tmp]    "=&r"  (tmp)
445                 :   [ptr]    "r"    (p), 
446                     [val]    "r"    (val)
447                 :   "cc", "memory");
448 }
449
450 static __inline void
451 atomic_subtract_long(volatile u_long *p, u_long val)
452 {
453         u_long tmp = 0, tmp2 = 0;
454
455         __asm __volatile("1: ldrex %0, [%2]\n"
456                             "sub %0, %0, %3\n"
457                             "strex %1, %0, [%2]\n"
458                             "cmp %1, #0\n"
459                             "it ne\n"
460                             "bne        1b\n"
461                             : "=&r" (tmp), "+r" (tmp2)
462                             ,"+r" (p), "+r" (val) : : "cc", "memory");
463 }
464
465 ATOMIC_ACQ_REL(clear, 32)
466 ATOMIC_ACQ_REL(add, 32)
467 ATOMIC_ACQ_REL(subtract, 32)
468 ATOMIC_ACQ_REL(set, 32)
469 ATOMIC_ACQ_REL(clear, 64)
470 ATOMIC_ACQ_REL(add, 64)
471 ATOMIC_ACQ_REL(subtract, 64)
472 ATOMIC_ACQ_REL(set, 64)
473 ATOMIC_ACQ_REL_LONG(clear)
474 ATOMIC_ACQ_REL_LONG(add)
475 ATOMIC_ACQ_REL_LONG(subtract)
476 ATOMIC_ACQ_REL_LONG(set)
477
478 #undef ATOMIC_ACQ_REL
479 #undef ATOMIC_ACQ_REL_LONG
480
481 static __inline uint32_t
482 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
483 {
484         uint32_t tmp = 0, tmp2 = 0, ret = 0;
485
486         __asm __volatile("1: ldrex %0, [%3]\n"
487                             "add %1, %0, %4\n"
488                             "strex %2, %1, [%3]\n"
489                             "cmp %2, #0\n"
490                             "it ne\n"
491                             "bne        1b\n"
492                            : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
493                            ,"+r" (p), "+r" (val) : : "cc", "memory");
494         return (ret);
495 }
496
497 static __inline uint32_t
498 atomic_readandclear_32(volatile u_int32_t *p)
499 {
500         uint32_t ret, tmp = 0, tmp2 = 0;
501
502         __asm __volatile("1: ldrex %0, [%3]\n"
503                          "mov %1, #0\n"
504                          "strex %2, %1, [%3]\n"
505                          "cmp %2, #0\n"
506                          "it ne\n"
507                          "bne 1b\n"
508                          : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
509                          ,"+r" (p) : : "cc", "memory");
510         return (ret);
511 }
512
513 static __inline uint32_t
514 atomic_load_acq_32(volatile uint32_t *p)
515 {
516         uint32_t v;
517
518         v = *p;
519         __do_dmb();
520         return (v);
521 }
522
523 static __inline void
524 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
525 {
526         
527         __do_dmb();
528         *p = v;
529 }
530
531 static __inline uint64_t
532 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
533 {
534         uint64_t ret, tmp;
535         uint32_t exflag;
536
537         __asm __volatile(
538                 "1:          \n"
539                 "   ldrexd   %[ret], [%[ptr]]\n"
540                 "   adds     %Q[tmp], %Q[ret], %Q[val]\n"
541                 "   adc      %R[tmp], %R[ret], %R[val]\n"
542                 "   strexd   %[exf], %[tmp], [%[ptr]]\n"
543                 "   teq      %[exf], #0\n"
544                 "   it ne    \n"
545                 "   bne      1b\n"
546                 :   [ret]    "=&r"  (ret),
547                     [exf]    "=&r"  (exflag),
548                     [tmp]    "=&r"  (tmp)
549                 :   [ptr]    "r"    (p), 
550                     [val]    "r"    (val)
551                 :   "cc", "memory");
552         return (ret);
553 }
554
555 static __inline uint64_t
556 atomic_readandclear_64(volatile uint64_t *p)
557 {
558         uint64_t ret, tmp;
559         uint32_t exflag;
560
561         __asm __volatile(
562                 "1:          \n"
563                 "   ldrexd   %[ret], [%[ptr]]\n"
564                 "   mov      %Q[tmp], #0\n"
565                 "   mov      %R[tmp], #0\n"
566                 "   strexd   %[exf], %[tmp], [%[ptr]]\n"
567                 "   teq      %[exf], #0\n"
568                 "   it ne    \n"
569                 "   bne      1b\n"
570                 :   [ret]    "=&r"  (ret),
571                     [exf]    "=&r"  (exflag),
572                     [tmp]    "=&r"  (tmp)
573                 :   [ptr]    "r"    (p)
574                 :   "cc", "memory");
575         return (ret);
576 }
577
578 static __inline uint64_t
579 atomic_load_64(volatile uint64_t *p)
580 {
581         uint64_t ret;
582
583         /*
584          * The only way to atomically load 64 bits is with LDREXD which puts the
585          * exclusive monitor into the exclusive state, so reset it to open state
586          * with CLREX because we don't actually need to store anything.
587          */
588         __asm __volatile(
589                 "1:          \n"
590                 "   ldrexd   %[ret], [%[ptr]]\n"
591                 "   clrex    \n"
592                 :   [ret]    "=&r"  (ret)
593                 :   [ptr]    "r"    (p)
594                 :   "cc", "memory");
595         return (ret);
596 }
597
598 static __inline uint64_t
599 atomic_load_acq_64(volatile uint64_t *p)
600 {
601         uint64_t ret;
602
603         ret = atomic_load_64(p);
604         __do_dmb();
605         return (ret);
606 }
607
608 static __inline void
609 atomic_store_64(volatile uint64_t *p, uint64_t val)
610 {
611         uint64_t tmp;
612         uint32_t exflag;
613
614         /*
615          * The only way to atomically store 64 bits is with STREXD, which will
616          * succeed only if paired up with a preceeding LDREXD using the same
617          * address, so we read and discard the existing value before storing.
618          */
619         __asm __volatile(
620                 "1:          \n"
621                 "   ldrexd   %[tmp], [%[ptr]]\n"
622                 "   strexd   %[exf], %[val], [%[ptr]]\n"
623                 "   teq      %[exf], #0\n"
624                 "   it ne    \n"
625                 "   bne      1b\n"
626                 :   [tmp]    "=&r"  (tmp),
627                     [exf]    "=&r"  (exflag)
628                 :   [ptr]    "r"    (p),
629                     [val]    "r"    (val)
630                 :   "cc", "memory");
631 }
632
633 static __inline void
634 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
635 {
636
637         __do_dmb();
638         atomic_store_64(p, val);
639 }
640
641 static __inline u_long
642 atomic_fetchadd_long(volatile u_long *p, u_long val)
643 {
644         u_long tmp = 0, tmp2 = 0, ret = 0;
645
646         __asm __volatile("1: ldrex %0, [%3]\n"
647                             "add %1, %0, %4\n"
648                             "strex %2, %1, [%3]\n"
649                             "cmp %2, #0\n"
650                             "it ne\n"
651                             "bne        1b\n"
652                            : "+r" (ret), "=&r" (tmp), "+r" (tmp2)
653                            ,"+r" (p), "+r" (val) : : "cc", "memory");
654         return (ret);
655 }
656
657 static __inline u_long
658 atomic_readandclear_long(volatile u_long *p)
659 {
660         u_long ret, tmp = 0, tmp2 = 0;
661
662         __asm __volatile("1: ldrex %0, [%3]\n"
663                          "mov %1, #0\n"
664                          "strex %2, %1, [%3]\n"
665                          "cmp %2, #0\n"
666                          "it ne\n"
667                          "bne 1b\n"
668                          : "=r" (ret), "=&r" (tmp), "+r" (tmp2)
669                          ,"+r" (p) : : "cc", "memory");
670         return (ret);
671 }
672
673 static __inline u_long
674 atomic_load_acq_long(volatile u_long *p)
675 {
676         u_long v;
677
678         v = *p;
679         __do_dmb();
680         return (v);
681 }
682
683 static __inline void
684 atomic_store_rel_long(volatile u_long *p, u_long v)
685 {
686         
687         __do_dmb();
688         *p = v;
689 }
690 #else /* < armv6 */
691
692 #define __with_interrupts_disabled(expr) \
693         do {                                            \
694                 u_int cpsr_save, tmp;                   \
695                                                         \
696                 __asm __volatile(                       \
697                         "mrs  %0, cpsr;"                \
698                         "orr  %1, %0, %2;"              \
699                         "msr  cpsr_fsxc, %1;"           \
700                         : "=r" (cpsr_save), "=r" (tmp)  \
701                         : "I" (PSR_I | PSR_F)           \
702                         : "cc" );               \
703                 (expr);                         \
704                  __asm __volatile(              \
705                         "msr  cpsr_fsxc, %0"    \
706                         : /* no output */       \
707                         : "r" (cpsr_save)       \
708                         : "cc" );               \
709         } while(0)
710
711 static __inline uint32_t
712 __swp(uint32_t val, volatile uint32_t *ptr)
713 {
714         __asm __volatile("swp   %0, %2, [%3]"
715             : "=&r" (val), "=m" (*ptr)
716             : "r" (val), "r" (ptr), "m" (*ptr)
717             : "memory");
718         return (val);
719 }
720
721
722 #ifdef _KERNEL
723 #define ARM_HAVE_ATOMIC64
724
725 static __inline void
726 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
727 {
728         __with_interrupts_disabled(*address |= setmask);
729 }
730
731 static __inline void
732 atomic_set_64(volatile uint64_t *address, uint64_t setmask)
733 {
734         __with_interrupts_disabled(*address |= setmask);
735 }
736
737 static __inline void
738 atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
739 {
740         __with_interrupts_disabled(*address &= ~clearmask);
741 }
742
743 static __inline void
744 atomic_clear_64(volatile uint64_t *address, uint64_t clearmask)
745 {
746         __with_interrupts_disabled(*address &= ~clearmask);
747 }
748
749 static __inline u_int32_t
750 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
751 {
752         int ret;
753         
754         __with_interrupts_disabled(
755          {
756                 if (*p == cmpval) {
757                         *p = newval;
758                         ret = 1;
759                 } else {
760                         ret = 0;
761                 }
762         });
763         return (ret);
764 }
765
766 static __inline u_int64_t
767 atomic_cmpset_64(volatile u_int64_t *p, volatile u_int64_t cmpval, volatile u_int64_t newval)
768 {
769         int ret;
770         
771         __with_interrupts_disabled(
772          {
773                 if (*p == cmpval) {
774                         *p = newval;
775                         ret = 1;
776                 } else {
777                         ret = 0;
778                 }
779         });
780         return (ret);
781 }
782
783 static __inline void
784 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
785 {
786         __with_interrupts_disabled(*p += val);
787 }
788
789 static __inline void
790 atomic_add_64(volatile u_int64_t *p, u_int64_t val)
791 {
792         __with_interrupts_disabled(*p += val);
793 }
794
795 static __inline void
796 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
797 {
798         __with_interrupts_disabled(*p -= val);
799 }
800
801 static __inline void
802 atomic_subtract_64(volatile u_int64_t *p, u_int64_t val)
803 {
804         __with_interrupts_disabled(*p -= val);
805 }
806
807 static __inline uint32_t
808 atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
809 {
810         uint32_t value;
811
812         __with_interrupts_disabled(
813         {
814                 value = *p;
815                 *p += v;
816         });
817         return (value);
818 }
819
820 static __inline uint64_t
821 atomic_fetchadd_64(volatile uint64_t *p, uint64_t v)
822 {
823         uint64_t value;
824
825         __with_interrupts_disabled(
826         {
827                 value = *p;
828                 *p += v;
829         });
830         return (value);
831 }
832
833 static __inline uint64_t
834 atomic_load_64(volatile uint64_t *p)
835 {
836         uint64_t value;
837
838         __with_interrupts_disabled(value = *p);
839         return (value);
840 }
841
842 static __inline void
843 atomic_store_64(volatile uint64_t *p, uint64_t value)
844 {
845         __with_interrupts_disabled(*p = value);
846 }
847
848 #else /* !_KERNEL */
849
850 static __inline u_int32_t
851 atomic_cmpset_32(volatile u_int32_t *p, volatile u_int32_t cmpval, volatile u_int32_t newval)
852 {
853         register int done, ras_start = ARM_RAS_START;
854
855         __asm __volatile("1:\n"
856             "adr        %1, 1b\n"
857             "str        %1, [%0]\n"
858             "adr        %1, 2f\n"
859             "str        %1, [%0, #4]\n"
860             "ldr        %1, [%2]\n"
861             "cmp        %1, %3\n"
862             "streq      %4, [%2]\n"
863             "2:\n"
864             "mov        %1, #0\n"
865             "str        %1, [%0]\n"
866             "mov        %1, #0xffffffff\n"
867             "str        %1, [%0, #4]\n"
868             "moveq      %1, #1\n"
869             "movne      %1, #0\n"
870             : "+r" (ras_start), "=r" (done)
871             ,"+r" (p), "+r" (cmpval), "+r" (newval) : : "cc", "memory");
872         return (done);
873 }
874
875 static __inline void
876 atomic_add_32(volatile u_int32_t *p, u_int32_t val)
877 {
878         int start, ras_start = ARM_RAS_START;
879
880         __asm __volatile("1:\n"
881             "adr        %1, 1b\n"
882             "str        %1, [%0]\n"
883             "adr        %1, 2f\n"
884             "str        %1, [%0, #4]\n"
885             "ldr        %1, [%2]\n"
886             "add        %1, %1, %3\n"
887             "str        %1, [%2]\n"
888             "2:\n"
889             "mov        %1, #0\n"
890             "str        %1, [%0]\n"
891             "mov        %1, #0xffffffff\n"
892             "str        %1, [%0, #4]\n"
893             : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
894             : : "memory");
895 }
896
897 static __inline void
898 atomic_subtract_32(volatile u_int32_t *p, u_int32_t val)
899 {
900         int start, ras_start = ARM_RAS_START;
901
902         __asm __volatile("1:\n"
903             "adr        %1, 1b\n"
904             "str        %1, [%0]\n"
905             "adr        %1, 2f\n"
906             "str        %1, [%0, #4]\n"
907             "ldr        %1, [%2]\n"
908             "sub        %1, %1, %3\n"
909             "str        %1, [%2]\n"
910             "2:\n"
911             "mov        %1, #0\n"
912             "str        %1, [%0]\n"
913             "mov        %1, #0xffffffff\n"
914             "str        %1, [%0, #4]\n"
915
916             : "+r" (ras_start), "=r" (start), "+r" (p), "+r" (val)
917             : : "memory");
918 }
919
920 static __inline void
921 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
922 {
923         int start, ras_start = ARM_RAS_START;
924
925         __asm __volatile("1:\n"
926             "adr        %1, 1b\n"
927             "str        %1, [%0]\n"
928             "adr        %1, 2f\n"
929             "str        %1, [%0, #4]\n"
930             "ldr        %1, [%2]\n"
931             "orr        %1, %1, %3\n"
932             "str        %1, [%2]\n"
933             "2:\n"
934             "mov        %1, #0\n"
935             "str        %1, [%0]\n"
936             "mov        %1, #0xffffffff\n"
937             "str        %1, [%0, #4]\n"
938
939             : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (setmask)
940             : : "memory");
941 }
942
943 static __inline void
944 atomic_clear_32(volatile uint32_t *address, uint32_t clearmask)
945 {
946         int start, ras_start = ARM_RAS_START;
947
948         __asm __volatile("1:\n"
949             "adr        %1, 1b\n"
950             "str        %1, [%0]\n"
951             "adr        %1, 2f\n"
952             "str        %1, [%0, #4]\n"
953             "ldr        %1, [%2]\n"
954             "bic        %1, %1, %3\n"
955             "str        %1, [%2]\n"
956             "2:\n"
957             "mov        %1, #0\n"
958             "str        %1, [%0]\n"
959             "mov        %1, #0xffffffff\n"
960             "str        %1, [%0, #4]\n"
961             : "+r" (ras_start), "=r" (start), "+r" (address), "+r" (clearmask)
962             : : "memory");
963
964 }
965
966 static __inline uint32_t
967 atomic_fetchadd_32(volatile uint32_t *p, uint32_t v)
968 {
969         uint32_t start, tmp, ras_start = ARM_RAS_START;
970
971         __asm __volatile("1:\n"
972             "adr        %1, 1b\n"
973             "str        %1, [%0]\n"
974             "adr        %1, 2f\n"
975             "str        %1, [%0, #4]\n"
976             "ldr        %1, [%3]\n"
977             "mov        %2, %1\n"
978             "add        %2, %2, %4\n"
979             "str        %2, [%3]\n"
980             "2:\n"
981             "mov        %2, #0\n"
982             "str        %2, [%0]\n"
983             "mov        %2, #0xffffffff\n"
984             "str        %2, [%0, #4]\n"
985             : "+r" (ras_start), "=r" (start), "=r" (tmp), "+r" (p), "+r" (v)
986             : : "memory");
987         return (start);
988 }
989
990 #endif /* _KERNEL */
991
992
993 static __inline uint32_t
994 atomic_readandclear_32(volatile u_int32_t *p)
995 {
996
997         return (__swp(0, p));
998 }
999
1000 #define atomic_cmpset_rel_32    atomic_cmpset_32
1001 #define atomic_cmpset_acq_32    atomic_cmpset_32
1002 #define atomic_set_rel_32       atomic_set_32
1003 #define atomic_set_acq_32       atomic_set_32
1004 #define atomic_clear_rel_32     atomic_clear_32
1005 #define atomic_clear_acq_32     atomic_clear_32
1006 #define atomic_add_rel_32       atomic_add_32
1007 #define atomic_add_acq_32       atomic_add_32
1008 #define atomic_subtract_rel_32  atomic_subtract_32
1009 #define atomic_subtract_acq_32  atomic_subtract_32
1010 #define atomic_store_rel_32     atomic_store_32
1011 #define atomic_store_rel_long   atomic_store_long
1012 #define atomic_load_acq_32      atomic_load_32
1013 #define atomic_load_acq_long    atomic_load_long
1014 #define atomic_add_acq_long             atomic_add_long
1015 #define atomic_add_rel_long             atomic_add_long
1016 #define atomic_subtract_acq_long        atomic_subtract_long
1017 #define atomic_subtract_rel_long        atomic_subtract_long
1018 #define atomic_clear_acq_long           atomic_clear_long
1019 #define atomic_clear_rel_long           atomic_clear_long
1020 #define atomic_set_acq_long             atomic_set_long
1021 #define atomic_set_rel_long             atomic_set_long
1022 #define atomic_cmpset_acq_long          atomic_cmpset_long
1023 #define atomic_cmpset_rel_long          atomic_cmpset_long
1024 #define atomic_load_acq_long            atomic_load_long
1025 #undef __with_interrupts_disabled
1026
1027 static __inline void
1028 atomic_add_long(volatile u_long *p, u_long v)
1029 {
1030
1031         atomic_add_32((volatile uint32_t *)p, v);
1032 }
1033
1034 static __inline void
1035 atomic_clear_long(volatile u_long *p, u_long v)
1036 {
1037
1038         atomic_clear_32((volatile uint32_t *)p, v);
1039 }
1040
1041 static __inline int
1042 atomic_cmpset_long(volatile u_long *dst, u_long old, u_long newe)
1043 {
1044
1045         return (atomic_cmpset_32((volatile uint32_t *)dst, old, newe));
1046 }
1047
1048 static __inline u_long
1049 atomic_fetchadd_long(volatile u_long *p, u_long v)
1050 {
1051
1052         return (atomic_fetchadd_32((volatile uint32_t *)p, v));
1053 }
1054
1055 static __inline void
1056 atomic_readandclear_long(volatile u_long *p)
1057 {
1058
1059         atomic_readandclear_32((volatile uint32_t *)p);
1060 }
1061
1062 static __inline void
1063 atomic_set_long(volatile u_long *p, u_long v)
1064 {
1065
1066         atomic_set_32((volatile uint32_t *)p, v);
1067 }
1068
1069 static __inline void
1070 atomic_subtract_long(volatile u_long *p, u_long v)
1071 {
1072
1073         atomic_subtract_32((volatile uint32_t *)p, v);
1074 }
1075
1076
1077
1078 #endif /* Arch >= v6 */
1079
1080 static __inline int
1081 atomic_load_32(volatile uint32_t *v)
1082 {
1083
1084         return (*v);
1085 }
1086
1087 static __inline void
1088 atomic_store_32(volatile uint32_t *dst, uint32_t src)
1089 {
1090         *dst = src;
1091 }
1092
1093 static __inline int
1094 atomic_load_long(volatile u_long *v)
1095 {
1096
1097         return (*v);
1098 }
1099
1100 static __inline void
1101 atomic_store_long(volatile u_long *dst, u_long src)
1102 {
1103         *dst = src;
1104 }
1105
1106 #define atomic_clear_ptr                atomic_clear_32
1107 #define atomic_set_ptr                  atomic_set_32
1108 #define atomic_cmpset_ptr               atomic_cmpset_32
1109 #define atomic_cmpset_rel_ptr           atomic_cmpset_rel_32
1110 #define atomic_cmpset_acq_ptr           atomic_cmpset_acq_32
1111 #define atomic_store_ptr                atomic_store_32
1112 #define atomic_store_rel_ptr            atomic_store_rel_32
1113
1114 #define atomic_add_int                  atomic_add_32
1115 #define atomic_add_acq_int              atomic_add_acq_32
1116 #define atomic_add_rel_int              atomic_add_rel_32
1117 #define atomic_subtract_int             atomic_subtract_32
1118 #define atomic_subtract_acq_int         atomic_subtract_acq_32
1119 #define atomic_subtract_rel_int         atomic_subtract_rel_32
1120 #define atomic_clear_int                atomic_clear_32
1121 #define atomic_clear_acq_int            atomic_clear_acq_32
1122 #define atomic_clear_rel_int            atomic_clear_rel_32
1123 #define atomic_set_int                  atomic_set_32
1124 #define atomic_set_acq_int              atomic_set_acq_32
1125 #define atomic_set_rel_int              atomic_set_rel_32
1126 #define atomic_cmpset_int               atomic_cmpset_32
1127 #define atomic_cmpset_acq_int           atomic_cmpset_acq_32
1128 #define atomic_cmpset_rel_int           atomic_cmpset_rel_32
1129 #define atomic_fetchadd_int             atomic_fetchadd_32
1130 #define atomic_readandclear_int         atomic_readandclear_32
1131 #define atomic_load_acq_int             atomic_load_acq_32
1132 #define atomic_store_rel_int            atomic_store_rel_32
1133
1134 #endif /* _MACHINE_ATOMIC_H_ */