]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/include/atomic-v6.h
Merge llvm, clang, compiler-rt, libc++, libunwind, lld, lldb and openmp
[FreeBSD/FreeBSD.git] / sys / arm / include / atomic-v6.h
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3 /*-
4  * Copyright (C) 2003-2004 Olivier Houchard
5  * Copyright (C) 1994-1997 Mark Brinicombe
6  * Copyright (C) 1994 Brini
7  * All rights reserved.
8  *
9  * This code is derived from software written for Brini by Mark Brinicombe
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by Brini.
22  * 4. The name of Brini may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * $FreeBSD$
37  */
38
39 #ifndef _MACHINE_ATOMIC_V6_H_
40 #define _MACHINE_ATOMIC_V6_H_
41
42 #ifndef _MACHINE_ATOMIC_H_
43 #error Do not include this file directly, use <machine/atomic.h>
44 #endif
45
46 #if __ARM_ARCH >= 7
47 #define isb()  __asm __volatile("isb" : : : "memory")
48 #define dsb()  __asm __volatile("dsb" : : : "memory")
49 #define dmb()  __asm __volatile("dmb" : : : "memory")
50 #elif __ARM_ARCH >= 6
51 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
52 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
53 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
54 #else
55 #error Only use this file with ARMv6 and later
56 #endif
57
58 #define mb()   dmb()
59 #define wmb()  dmb()
60 #define rmb()  dmb()
61
62 #define ARM_HAVE_ATOMIC64
63
64 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
65 static __inline void                                                    \
66 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
67 {                                                                       \
68         atomic_##NAME##_long(p, v);                                     \
69         dmb();                                                          \
70 }                                                                       \
71                                                                         \
72 static __inline  void                                                   \
73 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
74 {                                                                       \
75         dmb();                                                          \
76         atomic_##NAME##_long(p, v);                                     \
77 }
78
79 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
80 static __inline  void                                                   \
81 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
82 {                                                                       \
83         atomic_##NAME##_##WIDTH(p, v);                                  \
84         dmb();                                                          \
85 }                                                                       \
86                                                                         \
87 static __inline  void                                                   \
88 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
89 {                                                                       \
90         dmb();                                                          \
91         atomic_##NAME##_##WIDTH(p, v);                                  \
92 }
93
94
95 static __inline void
96 atomic_add_32(volatile uint32_t *p, uint32_t val)
97 {
98         uint32_t tmp = 0, tmp2 = 0;
99
100         __asm __volatile(
101             "1: ldrex   %0, [%2]        \n"
102             "   add     %0, %0, %3      \n"
103             "   strex   %1, %0, [%2]    \n"
104             "   cmp     %1, #0          \n"
105             "   it      ne              \n"
106             "   bne     1b              \n"
107             : "=&r" (tmp), "+r" (tmp2)
108             ,"+r" (p), "+r" (val) : : "cc", "memory");
109 }
110
111 static __inline void
112 atomic_add_64(volatile uint64_t *p, uint64_t val)
113 {
114         uint64_t tmp;
115         uint32_t exflag;
116
117         __asm __volatile(
118             "1:                                                 \n"
119             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
120             "   adds    %Q[tmp], %Q[val]                        \n"
121             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
122             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
123             "   teq     %[exf], #0                              \n"
124             "   it      ne                                      \n"
125             "   bne     1b                                      \n"
126             : [exf] "=&r" (exflag),
127               [tmp] "=&r" (tmp)
128             : [ptr] "r"   (p),
129               [val] "r"   (val)
130             : "cc", "memory");
131 }
132
133 static __inline void
134 atomic_add_long(volatile u_long *p, u_long val)
135 {
136
137         atomic_add_32((volatile uint32_t *)p, val);
138 }
139
140 ATOMIC_ACQ_REL(add, 32)
141 ATOMIC_ACQ_REL(add, 64)
142 ATOMIC_ACQ_REL_LONG(add)
143
144 static __inline void
145 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
146 {
147         uint32_t tmp = 0, tmp2 = 0;
148
149         __asm __volatile(
150             "1: ldrex   %0, [%2]        \n"
151             "   bic     %0, %0, %3      \n"
152             "   strex   %1, %0, [%2]    \n"
153             "   cmp     %1, #0          \n"
154             "   it      ne              \n"
155             "   bne     1b              \n"
156             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
157             : : "cc", "memory");
158 }
159
160 static __inline void
161 atomic_clear_64(volatile uint64_t *p, uint64_t val)
162 {
163         uint64_t tmp;
164         uint32_t exflag;
165
166         __asm __volatile(
167             "1:                                                 \n"
168             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
169             "   bic     %Q[tmp], %Q[val]                        \n"
170             "   bic     %R[tmp], %R[val]                        \n"
171             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
172             "   teq     %[exf], #0                              \n"
173             "   it      ne                                      \n"
174             "   bne     1b                                      \n"
175             : [exf] "=&r" (exflag),
176               [tmp] "=&r" (tmp)
177             : [ptr] "r"   (p),
178               [val] "r"   (val)
179             : "cc", "memory");
180 }
181
182 static __inline void
183 atomic_clear_long(volatile u_long *address, u_long setmask)
184 {
185
186         atomic_clear_32((volatile uint32_t *)address, setmask);
187 }
188
189 ATOMIC_ACQ_REL(clear, 32)
190 ATOMIC_ACQ_REL(clear, 64)
191 ATOMIC_ACQ_REL_LONG(clear)
192
193 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
194     {                                                         \
195         TYPE tmp;                                             \
196                                                               \
197         __asm __volatile(                                     \
198             "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
199             "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
200             "   teq            %[tmp], %[ret]            \n"  \
201             "   ittee          ne                        \n"  \
202             "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
203             "   movne          %[ret], #0                \n"  \
204             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
205             "   eorseq         %[ret], #1                \n"  \
206             "   beq            1b                        \n"  \
207             : [ret] "=&r" (RET),                              \
208               [tmp] "=&r" (tmp)                               \
209             : [ptr] "r"   (_ptr),                             \
210               [oldv] "r"  (_old),                             \
211               [newv] "r"  (_new)                              \
212             : "cc", "memory");                                \
213     }
214
215 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
216     {                                                              \
217         uint64_t cmp, tmp;                                         \
218                                                                    \
219         __asm __volatile(                                          \
220             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
221             "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
222             "   teq      %Q[tmp], %Q[cmp]                     \n"  \
223             "   it       eq                                   \n"  \
224             "   teqeq    %R[tmp], %R[cmp]                     \n"  \
225             "   ittee    ne                                   \n"  \
226             "   movne    %[ret], #0                           \n"  \
227             "   strdne   %[cmp], [%[oldv]]                    \n"  \
228             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
229             "   eorseq   %[ret], #1                           \n"  \
230             "   beq      1b                                   \n"  \
231             : [ret] "=&r" (RET),                                   \
232               [cmp] "=&r" (cmp),                                   \
233               [tmp] "=&r" (tmp)                                    \
234             : [ptr] "r"   (_ptr),                                  \
235               [oldv] "r"  (_old),                                  \
236               [newv] "r"  (_new)                                   \
237             : "cc", "memory");                                     \
238     }
239
240 static __inline int
241 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
242 {
243         int ret;
244
245         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
246         return (ret);
247 }
248 #define atomic_fcmpset_8        atomic_fcmpset_8
249
250 static __inline int
251 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
252 {
253         int ret;
254
255         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
256         dmb();
257         return (ret);
258 }
259
260 static __inline int
261 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
262 {
263         int ret;
264
265         dmb();
266         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
267         return (ret);
268 }
269
270 static __inline int
271 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
272 {
273         int ret;
274
275         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
276         return (ret);
277 }
278 #define atomic_fcmpset_16       atomic_fcmpset_16
279
280 static __inline int
281 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
282 {
283         int ret;
284
285         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
286         dmb();
287         return (ret);
288 }
289
290 static __inline int
291 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
292 {
293         int ret;
294
295         dmb();
296         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
297         return (ret);
298 }
299
300 static __inline int
301 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
302 {
303         int ret;
304
305         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
306         return (ret);
307 }
308
309 static __inline int
310 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
311 {
312         int ret;
313
314         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
315         dmb();
316         return (ret);
317 }
318
319 static __inline int
320 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
321 {
322         int ret;
323
324         dmb();
325         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
326         return (ret);
327 }
328
329 static __inline int
330 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
331 {
332         int ret;
333
334         ATOMIC_FCMPSET_CODE(ret, u_long, "");
335         return (ret);
336 }
337
338 static __inline int
339 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
340 {
341         int ret;
342
343         ATOMIC_FCMPSET_CODE(ret, u_long, "");
344         dmb();
345         return (ret);
346 }
347
348 static __inline int
349 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
350 {
351         int ret;
352
353         dmb();
354         ATOMIC_FCMPSET_CODE(ret, u_long, "");
355         return (ret);
356 }
357
358 static __inline int
359 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
360 {
361         int ret;
362
363         ATOMIC_FCMPSET_CODE64(ret);
364         return (ret);
365 }
366
367 static __inline int
368 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
369 {
370         int ret;
371
372         ATOMIC_FCMPSET_CODE64(ret);
373         dmb();
374         return (ret);
375 }
376
377 static __inline int
378 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
379 {
380         int ret;
381
382         dmb();
383         ATOMIC_FCMPSET_CODE64(ret);
384         return (ret);
385 }
386
387 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
388     {                                                        \
389         __asm __volatile(                                    \
390             "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
391             "   teq            %[ret], %[oldv]           \n" \
392             "   itee           ne                        \n" \
393             "   movne          %[ret], #0                \n" \
394             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
395             "   eorseq         %[ret], #1                \n" \
396             "   beq            1b                        \n" \
397             : [ret] "=&r" (RET)                              \
398             : [ptr] "r"   (_ptr),                            \
399               [oldv] "r"  (_old),                            \
400               [newv] "r"  (_new)                             \
401             : "cc", "memory");                               \
402     }
403
404 #define ATOMIC_CMPSET_CODE64(RET)                                 \
405     {                                                             \
406         uint64_t tmp;                                             \
407                                                                   \
408         __asm __volatile(                                         \
409             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
410             "   teq      %Q[tmp], %Q[oldv]                    \n" \
411             "   it       eq                                   \n" \
412             "   teqeq    %R[tmp], %R[oldv]                    \n" \
413             "   itee     ne                                   \n" \
414             "   movne    %[ret], #0                           \n" \
415             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
416             "   eorseq   %[ret], #1                           \n" \
417             "   beq      1b                                   \n" \
418             : [ret] "=&r" (RET),                                  \
419               [tmp] "=&r" (tmp)                                   \
420             : [ptr] "r"   (_ptr),                                 \
421               [oldv] "r"  (_old),                                 \
422               [newv] "r"  (_new)                                  \
423             : "cc", "memory");                                    \
424     }
425
426 static __inline int
427 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
428 {
429         int ret;
430
431         ATOMIC_CMPSET_CODE(ret, "b");
432         return (ret);
433 }
434 #define atomic_cmpset_8         atomic_cmpset_8
435
436 static __inline int
437 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
438 {
439         int ret;
440
441         ATOMIC_CMPSET_CODE(ret, "b");
442         dmb();
443         return (ret);
444 }
445
446 static __inline int
447 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
448 {
449         int ret;
450
451         dmb();
452         ATOMIC_CMPSET_CODE(ret, "b");
453         return (ret);
454 }
455
456 static __inline int
457 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
458 {
459         int ret;
460
461         ATOMIC_CMPSET_CODE(ret, "h");
462         return (ret);
463 }
464 #define atomic_cmpset_16        atomic_cmpset_16
465
466 static __inline int
467 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
468 {
469         int ret;
470
471         ATOMIC_CMPSET_CODE(ret, "h");
472         dmb();
473         return (ret);
474 }
475
476 static __inline int
477 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
478 {
479         int ret;
480
481         dmb();
482         ATOMIC_CMPSET_CODE(ret, "h");
483         return (ret);
484 }
485
486 static __inline int
487 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
488 {
489         int ret;
490
491         ATOMIC_CMPSET_CODE(ret, "");
492         return (ret);
493 }
494
495 static __inline int
496 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
497 {
498         int ret;
499
500         ATOMIC_CMPSET_CODE(ret, "");
501         dmb();
502         return (ret);
503 }
504
505 static __inline int
506 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
507 {
508         int ret;
509
510         dmb();
511         ATOMIC_CMPSET_CODE(ret, "");
512         return (ret);
513 }
514
515 static __inline int
516 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
517 {
518         int ret;
519
520         ATOMIC_CMPSET_CODE(ret, "");
521         return (ret);
522 }
523
524 static __inline int
525 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
526 {
527         int ret;
528
529         ATOMIC_CMPSET_CODE(ret, "");
530         dmb();
531         return (ret);
532 }
533
534 static __inline int
535 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
536 {
537         int ret;
538
539         dmb();
540         ATOMIC_CMPSET_CODE(ret, "");
541         return (ret);
542 }
543
544 static __inline int
545 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
546 {
547         int ret;
548
549         ATOMIC_CMPSET_CODE64(ret);
550         return (ret);
551 }
552
553 static __inline int
554 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
555 {
556         int ret;
557
558         ATOMIC_CMPSET_CODE64(ret);
559         dmb();
560         return (ret);
561 }
562
563 static __inline int
564 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
565 {
566         int ret;
567
568         dmb();
569         ATOMIC_CMPSET_CODE64(ret);
570         return (ret);
571 }
572
573 static __inline uint32_t
574 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
575 {
576         uint32_t tmp = 0, tmp2 = 0, ret = 0;
577
578         __asm __volatile(
579             "1: ldrex   %0, [%3]        \n"
580             "   add     %1, %0, %4      \n"
581             "   strex   %2, %1, [%3]    \n"
582             "   cmp     %2, #0          \n"
583             "   it      ne              \n"
584             "   bne     1b              \n"
585             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
586             : : "cc", "memory");
587         return (ret);
588 }
589
590 static __inline uint64_t
591 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
592 {
593         uint64_t ret, tmp;
594         uint32_t exflag;
595
596         __asm __volatile(
597             "1:                                                 \n"
598             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
599             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
600             "   adc     %R[tmp], %R[ret], %R[val]               \n"
601             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
602             "   teq     %[exf], #0                              \n"
603             "   it      ne                                      \n"
604             "   bne     1b                                      \n"
605             : [ret] "=&r" (ret),
606               [exf] "=&r" (exflag),
607               [tmp] "=&r" (tmp)
608             : [ptr] "r"   (p),
609               [val] "r"   (val)
610             : "cc", "memory");
611         return (ret);
612 }
613
614 static __inline u_long
615 atomic_fetchadd_long(volatile u_long *p, u_long val)
616 {
617
618         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
619 }
620
621 static __inline uint32_t
622 atomic_load_acq_32(volatile uint32_t *p)
623 {
624         uint32_t v;
625
626         v = *p;
627         dmb();
628         return (v);
629 }
630
631 static __inline uint64_t
632 atomic_load_64(volatile uint64_t *p)
633 {
634         uint64_t ret;
635
636         /*
637          * The only way to atomically load 64 bits is with LDREXD which puts the
638          * exclusive monitor into the exclusive state, so reset it to open state
639          * with CLREX because we don't actually need to store anything.
640          */
641         __asm __volatile(
642             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
643             "clrex                                      \n"
644             : [ret] "=&r" (ret)
645             : [ptr] "r"   (p)
646             : "cc", "memory");
647         return (ret);
648 }
649
650 static __inline uint64_t
651 atomic_load_acq_64(volatile uint64_t *p)
652 {
653         uint64_t ret;
654
655         ret = atomic_load_64(p);
656         dmb();
657         return (ret);
658 }
659
660 static __inline u_long
661 atomic_load_acq_long(volatile u_long *p)
662 {
663         u_long v;
664
665         v = *p;
666         dmb();
667         return (v);
668 }
669
670 static __inline uint32_t
671 atomic_readandclear_32(volatile uint32_t *p)
672 {
673         uint32_t ret, tmp = 0, tmp2 = 0;
674
675         __asm __volatile(
676             "1: ldrex   %0, [%3]        \n"
677             "   mov     %1, #0          \n"
678             "   strex   %2, %1, [%3]    \n"
679             "   cmp     %2, #0          \n"
680             "   it      ne              \n"
681             "   bne     1b              \n"
682             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
683             : : "cc", "memory");
684         return (ret);
685 }
686
687 static __inline uint64_t
688 atomic_readandclear_64(volatile uint64_t *p)
689 {
690         uint64_t ret, tmp;
691         uint32_t exflag;
692
693         __asm __volatile(
694             "1:                                                 \n"
695             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
696             "   mov     %Q[tmp], #0                             \n"
697             "   mov     %R[tmp], #0                             \n"
698             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
699             "   teq     %[exf], #0                              \n"
700             "   it      ne                                      \n"
701             "   bne     1b                                      \n"
702             : [ret] "=&r" (ret),
703               [exf] "=&r" (exflag),
704               [tmp] "=&r" (tmp)
705             : [ptr] "r"   (p)
706             : "cc", "memory");
707         return (ret);
708 }
709
710 static __inline u_long
711 atomic_readandclear_long(volatile u_long *p)
712 {
713
714         return (atomic_readandclear_32((volatile uint32_t *)p));
715 }
716
717 static __inline void
718 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
719 {
720         uint32_t tmp = 0, tmp2 = 0;
721
722         __asm __volatile(
723             "1: ldrex   %0, [%2]        \n"
724             "   orr     %0, %0, %3      \n"
725             "   strex   %1, %0, [%2]    \n"
726             "   cmp     %1, #0          \n"
727             "   it      ne              \n"
728             "   bne     1b              \n"
729             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
730             : : "cc", "memory");
731 }
732
733 static __inline void
734 atomic_set_64(volatile uint64_t *p, uint64_t val)
735 {
736         uint64_t tmp;
737         uint32_t exflag;
738
739         __asm __volatile(
740             "1:                                                 \n"
741             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
742             "   orr     %Q[tmp], %Q[val]                        \n"
743             "   orr     %R[tmp], %R[val]                        \n"
744             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
745             "   teq     %[exf], #0                              \n"
746             "   it      ne                                      \n"
747             "   bne     1b                                      \n"
748             : [exf] "=&r" (exflag),
749               [tmp] "=&r" (tmp)
750             : [ptr] "r"   (p),
751               [val] "r"   (val)
752             : "cc", "memory");
753 }
754
755 static __inline void
756 atomic_set_long(volatile u_long *address, u_long setmask)
757 {
758
759         atomic_set_32((volatile uint32_t *)address, setmask);
760 }
761
762 ATOMIC_ACQ_REL(set, 32)
763 ATOMIC_ACQ_REL(set, 64)
764 ATOMIC_ACQ_REL_LONG(set)
765
766 static __inline void
767 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
768 {
769         uint32_t tmp = 0, tmp2 = 0;
770
771         __asm __volatile(
772             "1: ldrex   %0, [%2]        \n"
773             "   sub     %0, %0, %3      \n"
774             "   strex   %1, %0, [%2]    \n"
775             "   cmp     %1, #0          \n"
776             "   it      ne              \n"
777             "   bne     1b              \n"
778             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
779             : : "cc", "memory");
780 }
781
782 static __inline void
783 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
784 {
785         uint64_t tmp;
786         uint32_t exflag;
787
788         __asm __volatile(
789             "1:                                                 \n"
790             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
791             "   subs    %Q[tmp], %Q[val]                        \n"
792             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
793             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
794             "   teq     %[exf], #0                              \n"
795             "   it      ne                                      \n"
796             "   bne     1b                                      \n"
797             : [exf] "=&r" (exflag),
798               [tmp] "=&r" (tmp)
799             : [ptr] "r"   (p),
800               [val] "r"   (val)
801             : "cc", "memory");
802 }
803
804 static __inline void
805 atomic_subtract_long(volatile u_long *p, u_long val)
806 {
807
808         atomic_subtract_32((volatile uint32_t *)p, val);
809 }
810
811 ATOMIC_ACQ_REL(subtract, 32)
812 ATOMIC_ACQ_REL(subtract, 64)
813 ATOMIC_ACQ_REL_LONG(subtract)
814
815 static __inline void
816 atomic_store_64(volatile uint64_t *p, uint64_t val)
817 {
818         uint64_t tmp;
819         uint32_t exflag;
820
821         /*
822          * The only way to atomically store 64 bits is with STREXD, which will
823          * succeed only if paired up with a preceeding LDREXD using the same
824          * address, so we read and discard the existing value before storing.
825          */
826         __asm __volatile(
827             "1:                                                 \n"
828             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
829             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
830             "   teq     %[exf], #0                              \n"
831             "   it      ne                                      \n"
832             "   bne     1b                                      \n"
833             : [tmp] "=&r" (tmp),
834               [exf] "=&r" (exflag)
835             : [ptr] "r"   (p),
836               [val] "r"   (val)
837             : "cc", "memory");
838 }
839
840 static __inline void
841 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
842 {
843
844         dmb();
845         *p = v;
846 }
847
848 static __inline void
849 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
850 {
851
852         dmb();
853         atomic_store_64(p, val);
854 }
855
856 static __inline void
857 atomic_store_rel_long(volatile u_long *p, u_long v)
858 {
859
860         dmb();
861         *p = v;
862 }
863
864 static __inline int
865 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
866 {
867         int newv, oldv, result;
868
869         __asm __volatile(
870             "   mov     ip, #1                                  \n"
871             "   lsl     ip, ip, %[bit]                          \n"
872             /*  Done with %[bit] as input, reuse below as output. */
873             "1:                                                 \n"
874             "   ldrex   %[oldv], [%[ptr]]                       \n"
875             "   bic     %[newv], %[oldv], ip                    \n"
876             "   strex   %[bit], %[newv], [%[ptr]]               \n"
877             "   teq     %[bit], #0                              \n"
878             "   it      ne                                      \n"
879             "   bne     1b                                      \n"
880             "   ands    %[bit], %[oldv], ip                     \n"
881             "   it      ne                                      \n"
882             "   movne   %[bit], #1                              \n"
883             : [bit]  "=&r"   (result),
884               [oldv] "=&r"   (oldv),
885               [newv] "=&r"   (newv)
886             : [ptr]  "r"     (ptr),
887                      "[bit]" (bit)
888             : "cc", "ip", "memory");
889
890         return (result);
891 }
892
893 static __inline int
894 atomic_testandclear_int(volatile u_int *p, u_int v)
895 {
896
897         return (atomic_testandclear_32((volatile uint32_t *)p, v));
898 }
899
900 static __inline int
901 atomic_testandclear_long(volatile u_long *p, u_int v)
902 {
903
904         return (atomic_testandclear_32((volatile uint32_t *)p, v));
905 }
906 #define atomic_testandclear_long        atomic_testandclear_long
907
908 static __inline int
909 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
910 {
911         int newv, oldv, result;
912
913         __asm __volatile(
914             "   mov     ip, #1                                  \n"
915             "   lsl     ip, ip, %[bit]                          \n"
916             /*  Done with %[bit] as input, reuse below as output. */
917             "1:                                                 \n"
918             "   ldrex   %[oldv], [%[ptr]]                       \n"
919             "   orr     %[newv], %[oldv], ip                    \n"
920             "   strex   %[bit], %[newv], [%[ptr]]               \n"
921             "   teq     %[bit], #0                              \n"
922             "   it      ne                                      \n"
923             "   bne     1b                                      \n"
924             "   ands    %[bit], %[oldv], ip                     \n"
925             "   it      ne                                      \n"
926             "   movne   %[bit], #1                              \n"
927             : [bit]  "=&r"   (result),
928               [oldv] "=&r"   (oldv),
929               [newv] "=&r"   (newv)
930             : [ptr]  "r"     (ptr),
931                      "[bit]" (bit)
932             : "cc", "ip", "memory");
933
934         return (result);
935 }
936
937 static __inline int
938 atomic_testandset_int(volatile u_int *p, u_int v)
939 {
940
941         return (atomic_testandset_32((volatile uint32_t *)p, v));
942 }
943
944 static __inline int
945 atomic_testandset_long(volatile u_long *p, u_int v)
946 {
947
948         return (atomic_testandset_32((volatile uint32_t *)p, v));
949 }
950 #define atomic_testandset_long  atomic_testandset_long
951
952 static __inline int
953 atomic_testandset_64(volatile uint64_t *p, u_int v)
954 {
955         volatile uint32_t *p32;
956
957         p32 = (volatile uint32_t *)p;
958         /* Assume little-endian */
959         if (v >= 32) {
960                 v &= 0x1f;
961                 p32++;
962         }
963         return (atomic_testandset_32(p32, v));
964 }
965
966 static __inline uint32_t
967 atomic_swap_32(volatile uint32_t *p, uint32_t v)
968 {
969         uint32_t ret, exflag;
970
971         __asm __volatile(
972             "1: ldrex   %[ret], [%[ptr]]                \n"
973             "   strex   %[exf], %[val], [%[ptr]]        \n"
974             "   teq     %[exf], #0                      \n"
975             "   it      ne                              \n"
976             "   bne     1b                              \n"
977             : [ret] "=&r"  (ret),
978               [exf] "=&r" (exflag)
979             : [val] "r"  (v),
980               [ptr] "r"  (p)
981             : "cc", "memory");
982         return (ret);
983 }
984
985 static __inline uint64_t
986 atomic_swap_64(volatile uint64_t *p, uint64_t v)
987 {
988         uint64_t ret;
989         uint32_t exflag;
990
991         __asm __volatile(
992             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
993             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
994             "   teq     %[exf], #0                              \n"
995             "   it      ne                                      \n"
996             "   bne     1b                                      \n"
997             : [ret] "=&r" (ret),
998               [exf] "=&r" (exflag)
999             : [val] "r"   (v),
1000               [ptr] "r"   (p)
1001             : "cc", "memory");
1002         return (ret);
1003 }
1004
1005 #undef ATOMIC_ACQ_REL
1006 #undef ATOMIC_ACQ_REL_LONG
1007
1008 static __inline void
1009 atomic_thread_fence_acq(void)
1010 {
1011
1012         dmb();
1013 }
1014
1015 static __inline void
1016 atomic_thread_fence_rel(void)
1017 {
1018
1019         dmb();
1020 }
1021
1022 static __inline void
1023 atomic_thread_fence_acq_rel(void)
1024 {
1025
1026         dmb();
1027 }
1028
1029 static __inline void
1030 atomic_thread_fence_seq_cst(void)
1031 {
1032
1033         dmb();
1034 }
1035
1036 #endif /* _MACHINE_ATOMIC_V6_H_ */