]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/include/atomic-v6.h
Implement pci_enable_msi() and pci_disable_msi() in the LinuxKPI.
[FreeBSD/FreeBSD.git] / sys / arm / include / atomic-v6.h
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3 /*-
4  * Copyright (C) 2003-2004 Olivier Houchard
5  * Copyright (C) 1994-1997 Mark Brinicombe
6  * Copyright (C) 1994 Brini
7  * All rights reserved.
8  *
9  * This code is derived from software written for Brini by Mark Brinicombe
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *      This product includes software developed by Brini.
22  * 4. The name of Brini may not be used to endorse or promote products
23  *    derived from this software without specific prior written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
26  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
27  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
28  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
29  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
31  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
34  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35  *
36  * $FreeBSD$
37  */
38
39 #ifndef _MACHINE_ATOMIC_V6_H_
40 #define _MACHINE_ATOMIC_V6_H_
41
42 #ifndef _MACHINE_ATOMIC_H_
43 #error Do not include this file directly, use <machine/atomic.h>
44 #endif
45
46 #if __ARM_ARCH >= 7
47 #define isb()  __asm __volatile("isb" : : : "memory")
48 #define dsb()  __asm __volatile("dsb" : : : "memory")
49 #define dmb()  __asm __volatile("dmb" : : : "memory")
50 #elif __ARM_ARCH >= 6
51 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
52 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
53 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
54 #else
55 #error Only use this file with ARMv6 and later
56 #endif
57
58 #define mb()   dmb()
59 #define wmb()  dmb()
60 #define rmb()  dmb()
61
62 #define ARM_HAVE_ATOMIC64
63
64 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
65 static __inline void                                                    \
66 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
67 {                                                                       \
68         atomic_##NAME##_long(p, v);                                     \
69         dmb();                                                          \
70 }                                                                       \
71                                                                         \
72 static __inline  void                                                   \
73 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
74 {                                                                       \
75         dmb();                                                          \
76         atomic_##NAME##_long(p, v);                                     \
77 }
78
79 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
80 static __inline  void                                                   \
81 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
82 {                                                                       \
83         atomic_##NAME##_##WIDTH(p, v);                                  \
84         dmb();                                                          \
85 }                                                                       \
86                                                                         \
87 static __inline  void                                                   \
88 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
89 {                                                                       \
90         dmb();                                                          \
91         atomic_##NAME##_##WIDTH(p, v);                                  \
92 }
93
94
95 static __inline void
96 atomic_add_32(volatile uint32_t *p, uint32_t val)
97 {
98         uint32_t tmp = 0, tmp2 = 0;
99
100         __asm __volatile(
101             "1: ldrex   %0, [%2]        \n"
102             "   add     %0, %0, %3      \n"
103             "   strex   %1, %0, [%2]    \n"
104             "   cmp     %1, #0          \n"
105             "   it      ne              \n"
106             "   bne     1b              \n"
107             : "=&r" (tmp), "+r" (tmp2)
108             ,"+r" (p), "+r" (val) : : "cc", "memory");
109 }
110
111 static __inline void
112 atomic_add_64(volatile uint64_t *p, uint64_t val)
113 {
114         uint64_t tmp;
115         uint32_t exflag;
116
117         __asm __volatile(
118             "1:                                                 \n"
119             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
120             "   adds    %Q[tmp], %Q[val]                        \n"
121             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
122             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
123             "   teq     %[exf], #0                              \n"
124             "   it      ne                                      \n"
125             "   bne     1b                                      \n"
126             : [exf] "=&r" (exflag),
127               [tmp] "=&r" (tmp)
128             : [ptr] "r"   (p),
129               [val] "r"   (val)
130             : "cc", "memory");
131 }
132
133 static __inline void
134 atomic_add_long(volatile u_long *p, u_long val)
135 {
136
137         atomic_add_32((volatile uint32_t *)p, val);
138 }
139
140 ATOMIC_ACQ_REL(add, 32)
141 ATOMIC_ACQ_REL(add, 64)
142 ATOMIC_ACQ_REL_LONG(add)
143
144 static __inline void
145 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
146 {
147         uint32_t tmp = 0, tmp2 = 0;
148
149         __asm __volatile(
150             "1: ldrex   %0, [%2]        \n"
151             "   bic     %0, %0, %3      \n"
152             "   strex   %1, %0, [%2]    \n"
153             "   cmp     %1, #0          \n"
154             "   it      ne              \n"
155             "   bne     1b              \n"
156             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
157             : : "cc", "memory");
158 }
159
160 static __inline void
161 atomic_clear_64(volatile uint64_t *p, uint64_t val)
162 {
163         uint64_t tmp;
164         uint32_t exflag;
165
166         __asm __volatile(
167             "1:                                                 \n"
168             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
169             "   bic     %Q[tmp], %Q[val]                        \n"
170             "   bic     %R[tmp], %R[val]                        \n"
171             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
172             "   teq     %[exf], #0                              \n"
173             "   it      ne                                      \n"
174             "   bne     1b                                      \n"
175             : [exf] "=&r" (exflag),
176               [tmp] "=&r" (tmp)
177             : [ptr] "r"   (p),
178               [val] "r"   (val)
179             : "cc", "memory");
180 }
181
182 static __inline void
183 atomic_clear_long(volatile u_long *address, u_long setmask)
184 {
185
186         atomic_clear_32((volatile uint32_t *)address, setmask);
187 }
188
189 ATOMIC_ACQ_REL(clear, 32)
190 ATOMIC_ACQ_REL(clear, 64)
191 ATOMIC_ACQ_REL_LONG(clear)
192
193 static __inline int
194 atomic_fcmpset_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
195 {
196         uint32_t tmp;
197         uint32_t _cmpval = *cmpval;
198         int ret;
199
200         __asm __volatile(
201             "   mov     %0, #1          \n"
202             "   ldrex   %1, [%2]        \n"
203             "   cmp     %1, %3          \n"
204             "   it      eq              \n"
205             "   strexeq %0, %4, [%2]    \n"
206             : "=&r" (ret), "=&r" (tmp), "+r" (p), "+r" (_cmpval), "+r" (newval)
207             : : "cc", "memory");
208         *cmpval = tmp;
209         return (!ret);
210 }
211
212 static __inline int
213 atomic_fcmpset_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
214 {
215         uint64_t tmp;
216         uint64_t _cmpval = *cmpval;
217         int ret;
218
219         __asm __volatile(
220             "1: mov     %[ret], #1                              \n"
221             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
222             "   teq     %Q[tmp], %Q[_cmpval]                    \n"
223             "   ite     eq                                      \n"
224             "   teqeq   %R[tmp], %R[_cmpval]                    \n"
225             "   bne     2f                                      \n"
226             "   strexd  %[ret], %Q[newval], %R[newval], [%[ptr]]\n"
227             "2:                                                 \n"
228             : [ret]    "=&r" (ret),
229               [tmp]    "=&r" (tmp)
230             : [ptr]    "r"   (p),
231               [_cmpval] "r"   (_cmpval),
232               [newval] "r"   (newval)
233             : "cc", "memory");
234         *cmpval = tmp;
235         return (!ret);
236 }
237
238 static __inline int
239 atomic_fcmpset_long(volatile u_long *p, u_long *cmpval, u_long newval)
240 {
241
242         return (atomic_fcmpset_32((volatile uint32_t *)p, 
243             (uint32_t *)cmpval, newval));
244 }
245
246 static __inline int
247 atomic_fcmpset_acq_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
248 {
249         int ret;
250
251         ret = atomic_fcmpset_64(p, cmpval, newval);
252         dmb();
253         return (ret);
254 }
255
256 static __inline int
257 atomic_fcmpset_acq_long(volatile u_long *p, u_long *cmpval, u_long newval)
258 {
259         int ret;
260
261         ret = atomic_fcmpset_long(p, cmpval, newval);
262         dmb();
263         return (ret);
264 }
265
266 static __inline int
267 atomic_fcmpset_acq_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
268 {
269
270         int ret;
271
272         ret = atomic_fcmpset_32(p, cmpval, newval);
273         dmb();
274         return (ret);
275 }
276
277 static __inline int
278 atomic_fcmpset_rel_32(volatile uint32_t *p, uint32_t *cmpval, uint32_t newval)
279 {
280
281         dmb();
282         return (atomic_fcmpset_32(p, cmpval, newval));
283 }
284
285 static __inline int
286 atomic_fcmpset_rel_64(volatile uint64_t *p, uint64_t *cmpval, uint64_t newval)
287 {
288
289         dmb();
290         return (atomic_fcmpset_64(p, cmpval, newval));
291 }
292
293 static __inline int
294 atomic_fcmpset_rel_long(volatile u_long *p, u_long *cmpval, u_long newval)
295 {
296
297         dmb();
298         return (atomic_fcmpset_long(p, cmpval, newval));
299 }
300
301 static __inline int
302 atomic_cmpset_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
303 {
304         int ret;
305
306         __asm __volatile(
307             "1: ldrex   %0, [%1]        \n"
308             "   cmp     %0, %2          \n"
309             "   itt     ne              \n"
310             "   movne   %0, #0          \n"
311             "   bne     2f              \n"
312             "   strex   %0, %3, [%1]    \n"
313             "   cmp     %0, #0          \n"
314             "   ite     eq              \n"
315             "   moveq   %0, #1          \n"
316             "   bne     1b              \n"
317             "2:"
318             : "=&r" (ret), "+r" (p), "+r" (cmpval), "+r" (newval)
319             : : "cc", "memory");
320         return (ret);
321 }
322
323 static __inline int
324 atomic_cmpset_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
325 {
326         uint64_t tmp;
327         uint32_t ret;
328
329         __asm __volatile(
330             "1:                                                 \n"
331             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
332             "   teq     %Q[tmp], %Q[cmpval]                     \n"
333             "   itee    eq                                      \n"
334             "   teqeq   %R[tmp], %R[cmpval]                     \n"
335             "   movne   %[ret], #0                              \n"
336             "   bne     2f                                      \n"
337             "   strexd  %[ret], %Q[newval], %R[newval], [%[ptr]]\n"
338             "   teq     %[ret], #0                              \n"
339             "   it      ne                                      \n"
340             "   bne     1b                                      \n"
341             "   mov     %[ret], #1                              \n"
342             "2:                                                 \n"
343             : [ret]    "=&r" (ret),
344               [tmp]    "=&r" (tmp)
345             : [ptr]    "r"   (p),
346               [cmpval] "r"   (cmpval),
347               [newval] "r"   (newval)
348             : "cc", "memory");
349         return (ret);
350 }
351
352 static __inline int
353 atomic_cmpset_long(volatile u_long *p, u_long cmpval, u_long newval)
354 {
355
356         return (atomic_cmpset_32((volatile uint32_t *)p, cmpval, newval));
357 }
358
359 static __inline int
360 atomic_cmpset_acq_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
361 {
362         int ret;
363
364         ret = atomic_cmpset_32(p, cmpval, newval);
365         dmb();
366         return (ret);
367 }
368
369 static __inline int
370 atomic_cmpset_acq_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
371 {
372         int ret;
373
374         ret = atomic_cmpset_64(p, cmpval, newval);
375         dmb();
376         return (ret);
377 }
378
379 static __inline int
380 atomic_cmpset_acq_long(volatile u_long *p, u_long cmpval, u_long newval)
381 {
382         int ret;
383
384         ret = atomic_cmpset_long(p, cmpval, newval);
385         dmb();
386         return (ret);
387 }
388
389 static __inline int
390 atomic_cmpset_rel_32(volatile uint32_t *p, uint32_t cmpval, uint32_t newval)
391 {
392
393         dmb();
394         return (atomic_cmpset_32(p, cmpval, newval));
395 }
396
397 static __inline int
398 atomic_cmpset_rel_64(volatile uint64_t *p, uint64_t cmpval, uint64_t newval)
399 {
400
401         dmb();
402         return (atomic_cmpset_64(p, cmpval, newval));
403 }
404
405 static __inline int
406 atomic_cmpset_rel_long(volatile u_long *p, u_long cmpval, u_long newval)
407 {
408
409         dmb();
410         return (atomic_cmpset_long(p, cmpval, newval));
411 }
412
413 static __inline uint32_t
414 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
415 {
416         uint32_t tmp = 0, tmp2 = 0, ret = 0;
417
418         __asm __volatile(
419             "1: ldrex   %0, [%3]        \n"
420             "   add     %1, %0, %4      \n"
421             "   strex   %2, %1, [%3]    \n"
422             "   cmp     %2, #0          \n"
423             "   it      ne              \n"
424             "   bne     1b              \n"
425             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
426             : : "cc", "memory");
427         return (ret);
428 }
429
430 static __inline uint64_t
431 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
432 {
433         uint64_t ret, tmp;
434         uint32_t exflag;
435
436         __asm __volatile(
437             "1:                                                 \n"
438             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
439             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
440             "   adc     %R[tmp], %R[ret], %R[val]               \n"
441             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
442             "   teq     %[exf], #0                              \n"
443             "   it      ne                                      \n"
444             "   bne     1b                                      \n"
445             : [ret] "=&r" (ret),
446               [exf] "=&r" (exflag),
447               [tmp] "=&r" (tmp)
448             : [ptr] "r"   (p),
449               [val] "r"   (val)
450             : "cc", "memory");
451         return (ret);
452 }
453
454 static __inline u_long
455 atomic_fetchadd_long(volatile u_long *p, u_long val)
456 {
457
458         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
459 }
460
461 static __inline uint32_t
462 atomic_load_acq_32(volatile uint32_t *p)
463 {
464         uint32_t v;
465
466         v = *p;
467         dmb();
468         return (v);
469 }
470
471 static __inline uint64_t
472 atomic_load_64(volatile uint64_t *p)
473 {
474         uint64_t ret;
475
476         /*
477          * The only way to atomically load 64 bits is with LDREXD which puts the
478          * exclusive monitor into the exclusive state, so reset it to open state
479          * with CLREX because we don't actually need to store anything.
480          */
481         __asm __volatile(
482             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
483             "clrex                                      \n"
484             : [ret] "=&r" (ret)
485             : [ptr] "r"   (p)
486             : "cc", "memory");
487         return (ret);
488 }
489
490 static __inline uint64_t
491 atomic_load_acq_64(volatile uint64_t *p)
492 {
493         uint64_t ret;
494
495         ret = atomic_load_64(p);
496         dmb();
497         return (ret);
498 }
499
500 static __inline u_long
501 atomic_load_acq_long(volatile u_long *p)
502 {
503         u_long v;
504
505         v = *p;
506         dmb();
507         return (v);
508 }
509
510 static __inline uint32_t
511 atomic_readandclear_32(volatile uint32_t *p)
512 {
513         uint32_t ret, tmp = 0, tmp2 = 0;
514
515         __asm __volatile(
516             "1: ldrex   %0, [%3]        \n"
517             "   mov     %1, #0          \n"
518             "   strex   %2, %1, [%3]    \n"
519             "   cmp     %2, #0          \n"
520             "   it      ne              \n"
521             "   bne     1b              \n"
522             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
523             : : "cc", "memory");
524         return (ret);
525 }
526
527 static __inline uint64_t
528 atomic_readandclear_64(volatile uint64_t *p)
529 {
530         uint64_t ret, tmp;
531         uint32_t exflag;
532
533         __asm __volatile(
534             "1:                                                 \n"
535             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
536             "   mov     %Q[tmp], #0                             \n"
537             "   mov     %R[tmp], #0                             \n"
538             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
539             "   teq     %[exf], #0                              \n"
540             "   it      ne                                      \n"
541             "   bne     1b                                      \n"
542             : [ret] "=&r" (ret),
543               [exf] "=&r" (exflag),
544               [tmp] "=&r" (tmp)
545             : [ptr] "r"   (p)
546             : "cc", "memory");
547         return (ret);
548 }
549
550 static __inline u_long
551 atomic_readandclear_long(volatile u_long *p)
552 {
553
554         return (atomic_readandclear_32((volatile uint32_t *)p));
555 }
556
557 static __inline void
558 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
559 {
560         uint32_t tmp = 0, tmp2 = 0;
561
562         __asm __volatile(
563             "1: ldrex   %0, [%2]        \n"
564             "   orr     %0, %0, %3      \n"
565             "   strex   %1, %0, [%2]    \n"
566             "   cmp     %1, #0          \n"
567             "   it      ne              \n"
568             "   bne     1b              \n"
569             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
570             : : "cc", "memory");
571 }
572
573 static __inline void
574 atomic_set_64(volatile uint64_t *p, uint64_t val)
575 {
576         uint64_t tmp;
577         uint32_t exflag;
578
579         __asm __volatile(
580             "1:                                                 \n"
581             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
582             "   orr     %Q[tmp], %Q[val]                        \n"
583             "   orr     %R[tmp], %R[val]                        \n"
584             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
585             "   teq     %[exf], #0                              \n"
586             "   it      ne                                      \n"
587             "   bne     1b                                      \n"
588             : [exf] "=&r" (exflag),
589               [tmp] "=&r" (tmp)
590             : [ptr] "r"   (p),
591               [val] "r"   (val)
592             : "cc", "memory");
593 }
594
595 static __inline void
596 atomic_set_long(volatile u_long *address, u_long setmask)
597 {
598
599         atomic_set_32((volatile uint32_t *)address, setmask);
600 }
601
602 ATOMIC_ACQ_REL(set, 32)
603 ATOMIC_ACQ_REL(set, 64)
604 ATOMIC_ACQ_REL_LONG(set)
605
606 static __inline void
607 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
608 {
609         uint32_t tmp = 0, tmp2 = 0;
610
611         __asm __volatile(
612             "1: ldrex   %0, [%2]        \n"
613             "   sub     %0, %0, %3      \n"
614             "   strex   %1, %0, [%2]    \n"
615             "   cmp     %1, #0          \n"
616             "   it      ne              \n"
617             "   bne     1b              \n"
618             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
619             : : "cc", "memory");
620 }
621
622 static __inline void
623 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
624 {
625         uint64_t tmp;
626         uint32_t exflag;
627
628         __asm __volatile(
629             "1:                                                 \n"
630             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
631             "   subs    %Q[tmp], %Q[val]                        \n"
632             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
633             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
634             "   teq     %[exf], #0                              \n"
635             "   it      ne                                      \n"
636             "   bne     1b                                      \n"
637             : [exf] "=&r" (exflag),
638               [tmp] "=&r" (tmp)
639             : [ptr] "r"   (p),
640               [val] "r"   (val)
641             : "cc", "memory");
642 }
643
644 static __inline void
645 atomic_subtract_long(volatile u_long *p, u_long val)
646 {
647
648         atomic_subtract_32((volatile uint32_t *)p, val);
649 }
650
651 ATOMIC_ACQ_REL(subtract, 32)
652 ATOMIC_ACQ_REL(subtract, 64)
653 ATOMIC_ACQ_REL_LONG(subtract)
654
655 static __inline void
656 atomic_store_64(volatile uint64_t *p, uint64_t val)
657 {
658         uint64_t tmp;
659         uint32_t exflag;
660
661         /*
662          * The only way to atomically store 64 bits is with STREXD, which will
663          * succeed only if paired up with a preceeding LDREXD using the same
664          * address, so we read and discard the existing value before storing.
665          */
666         __asm __volatile(
667             "1:                                                 \n"
668             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
669             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
670             "   teq     %[exf], #0                              \n"
671             "   it      ne                                      \n"
672             "   bne     1b                                      \n"
673             : [tmp] "=&r" (tmp),
674               [exf] "=&r" (exflag)
675             : [ptr] "r"   (p),
676               [val] "r"   (val)
677             : "cc", "memory");
678 }
679
680 static __inline void
681 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
682 {
683
684         dmb();
685         *p = v;
686 }
687
688 static __inline void
689 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
690 {
691
692         dmb();
693         atomic_store_64(p, val);
694 }
695
696 static __inline void
697 atomic_store_rel_long(volatile u_long *p, u_long v)
698 {
699
700         dmb();
701         *p = v;
702 }
703
704 static __inline int
705 atomic_testandset_32(volatile uint32_t *p, u_int v)
706 {
707         uint32_t tmp, tmp2, res, mask;
708
709         mask = 1u << (v & 0x1f);
710         tmp = tmp2 = 0;
711         __asm __volatile(
712         "1:     ldrex   %0, [%4]        \n"
713         "       orr     %1, %0, %3      \n"
714         "       strex   %2, %1, [%4]    \n"
715         "       cmp     %2, #0          \n"
716         "       it      ne              \n"
717         "       bne     1b              \n"
718         : "=&r" (res), "=&r" (tmp), "=&r" (tmp2)
719         : "r" (mask), "r" (p)
720         : "cc", "memory");
721         return ((res & mask) != 0);
722 }
723
724 static __inline int
725 atomic_testandset_int(volatile u_int *p, u_int v)
726 {
727
728         return (atomic_testandset_32((volatile uint32_t *)p, v));
729 }
730
731 static __inline int
732 atomic_testandset_long(volatile u_long *p, u_int v)
733 {
734
735         return (atomic_testandset_32((volatile uint32_t *)p, v));
736 }
737
738 static __inline int
739 atomic_testandset_64(volatile uint64_t *p, u_int v)
740 {
741         volatile uint32_t *p32;
742
743         p32 = (volatile uint32_t *)p;
744         /* Assume little-endian */
745         if (v >= 32) {
746                 v &= 0x1f;
747                 p32++;
748         }
749         return (atomic_testandset_32(p32, v));
750 }
751
752 static __inline uint32_t
753 atomic_swap_32(volatile uint32_t *p, uint32_t v)
754 {
755         uint32_t ret, exflag;
756
757         __asm __volatile(
758             "1: ldrex   %[ret], [%[ptr]]                \n"
759             "   strex   %[exf], %[val], [%[ptr]]        \n"
760             "   teq     %[exf], #0                      \n"
761             "   it      ne                              \n"
762             "   bne     1b                              \n"
763             : [ret] "=&r"  (ret),
764               [exf] "=&r" (exflag)
765             : [val] "r"  (v),
766               [ptr] "r"  (p)
767             : "cc", "memory");
768         return (ret);
769 }
770
771 static __inline uint64_t
772 atomic_swap_64(volatile uint64_t *p, uint64_t v)
773 {
774         uint64_t ret;
775         uint32_t exflag;
776
777         __asm __volatile(
778             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
779             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
780             "   teq     %[exf], #0                              \n"
781             "   it      ne                                      \n"
782             "   bne     1b                                      \n"
783             : [ret] "=&r" (ret),
784               [exf] "=&r" (exflag)
785             : [val] "r"   (v),
786               [ptr] "r"   (p)
787             : "cc", "memory");
788         return (ret);
789 }
790
791 #undef ATOMIC_ACQ_REL
792 #undef ATOMIC_ACQ_REL_LONG
793
794 static __inline void
795 atomic_thread_fence_acq(void)
796 {
797
798         dmb();
799 }
800
801 static __inline void
802 atomic_thread_fence_rel(void)
803 {
804
805         dmb();
806 }
807
808 static __inline void
809 atomic_thread_fence_acq_rel(void)
810 {
811
812         dmb();
813 }
814
815 static __inline void
816 atomic_thread_fence_seq_cst(void)
817 {
818
819         dmb();
820 }
821
822 #endif /* _MACHINE_ATOMIC_V6_H_ */