]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/arm/include/atomic.h
sys: Remove $FreeBSD$: one-line .c comment pattern
[FreeBSD/FreeBSD.git] / sys / arm / include / atomic.h
1 /* $NetBSD: atomic.h,v 1.1 2002/10/19 12:22:34 bsh Exp $ */
2
3 /*-
4  * SPDX-License-Identifier: BSD-4-Clause
5  *
6  * Copyright (C) 2003-2004 Olivier Houchard
7  * Copyright (C) 1994-1997 Mark Brinicombe
8  * Copyright (C) 1994 Brini
9  * All rights reserved.
10  *
11  * This code is derived from software written for Brini by Mark Brinicombe
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *      This product includes software developed by Brini.
24  * 4. The name of Brini may not be used to endorse or promote products
25  *    derived from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY BRINI ``AS IS'' AND ANY EXPRESS OR
28  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
29  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
30  * IN NO EVENT SHALL BRINI BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
32  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
33  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
34  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
35  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
36  * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37  */
38
39 #ifndef _MACHINE_ATOMIC_H_
40 #define _MACHINE_ATOMIC_H_
41
42 #include <sys/atomic_common.h>
43
44 #if __ARM_ARCH >= 7
45 #define isb()  __asm __volatile("isb" : : : "memory")
46 #define dsb()  __asm __volatile("dsb" : : : "memory")
47 #define dmb()  __asm __volatile("dmb" : : : "memory")
48 #else
49 #define isb()  __asm __volatile("mcr p15, 0, %0, c7, c5, 4" : : "r" (0) : "memory")
50 #define dsb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 4" : : "r" (0) : "memory")
51 #define dmb()  __asm __volatile("mcr p15, 0, %0, c7, c10, 5" : : "r" (0) : "memory")
52 #endif
53
54 #define mb()   dmb()
55 #define wmb()  dmb()
56 #define rmb()  dmb()
57
58 #define ARM_HAVE_ATOMIC64
59
60 #define ATOMIC_ACQ_REL_LONG(NAME)                                       \
61 static __inline void                                                    \
62 atomic_##NAME##_acq_long(__volatile u_long *p, u_long v)                \
63 {                                                                       \
64         atomic_##NAME##_long(p, v);                                     \
65         dmb();                                                          \
66 }                                                                       \
67                                                                         \
68 static __inline  void                                                   \
69 atomic_##NAME##_rel_long(__volatile u_long *p, u_long v)                \
70 {                                                                       \
71         dmb();                                                          \
72         atomic_##NAME##_long(p, v);                                     \
73 }
74
75 #define ATOMIC_ACQ_REL(NAME, WIDTH)                                     \
76 static __inline  void                                                   \
77 atomic_##NAME##_acq_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
78 {                                                                       \
79         atomic_##NAME##_##WIDTH(p, v);                                  \
80         dmb();                                                          \
81 }                                                                       \
82                                                                         \
83 static __inline  void                                                   \
84 atomic_##NAME##_rel_##WIDTH(__volatile uint##WIDTH##_t *p, uint##WIDTH##_t v)\
85 {                                                                       \
86         dmb();                                                          \
87         atomic_##NAME##_##WIDTH(p, v);                                  \
88 }
89
90 static __inline void
91 atomic_add_32(volatile uint32_t *p, uint32_t val)
92 {
93         uint32_t tmp = 0, tmp2 = 0;
94
95         __asm __volatile(
96             "1: ldrex   %0, [%2]        \n"
97             "   add     %0, %0, %3      \n"
98             "   strex   %1, %0, [%2]    \n"
99             "   cmp     %1, #0          \n"
100             "   it      ne              \n"
101             "   bne     1b              \n"
102             : "=&r" (tmp), "+r" (tmp2)
103             ,"+r" (p), "+r" (val) : : "cc", "memory");
104 }
105
106 static __inline void
107 atomic_add_64(volatile uint64_t *p, uint64_t val)
108 {
109         uint64_t tmp;
110         uint32_t exflag;
111
112         __asm __volatile(
113             "1:                                                 \n"
114             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
115             "   adds    %Q[tmp], %Q[val]                        \n"
116             "   adc     %R[tmp], %R[tmp], %R[val]               \n"
117             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
118             "   teq     %[exf], #0                              \n"
119             "   it      ne                                      \n"
120             "   bne     1b                                      \n"
121             : [exf] "=&r" (exflag),
122               [tmp] "=&r" (tmp)
123             : [ptr] "r"   (p),
124               [val] "r"   (val)
125             : "cc", "memory");
126 }
127
128 static __inline void
129 atomic_add_long(volatile u_long *p, u_long val)
130 {
131
132         atomic_add_32((volatile uint32_t *)p, val);
133 }
134
135 ATOMIC_ACQ_REL(add, 32)
136 ATOMIC_ACQ_REL(add, 64)
137 ATOMIC_ACQ_REL_LONG(add)
138
139 static __inline void
140 atomic_clear_32(volatile uint32_t *address, uint32_t setmask)
141 {
142         uint32_t tmp = 0, tmp2 = 0;
143
144         __asm __volatile(
145             "1: ldrex   %0, [%2]        \n"
146             "   bic     %0, %0, %3      \n"
147             "   strex   %1, %0, [%2]    \n"
148             "   cmp     %1, #0          \n"
149             "   it      ne              \n"
150             "   bne     1b              \n"
151             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
152             : : "cc", "memory");
153 }
154
155 static __inline void
156 atomic_clear_64(volatile uint64_t *p, uint64_t val)
157 {
158         uint64_t tmp;
159         uint32_t exflag;
160
161         __asm __volatile(
162             "1:                                                 \n"
163             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
164             "   bic     %Q[tmp], %Q[val]                        \n"
165             "   bic     %R[tmp], %R[val]                        \n"
166             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
167             "   teq     %[exf], #0                              \n"
168             "   it      ne                                      \n"
169             "   bne     1b                                      \n"
170             : [exf] "=&r" (exflag),
171               [tmp] "=&r" (tmp)
172             : [ptr] "r"   (p),
173               [val] "r"   (val)
174             : "cc", "memory");
175 }
176
177 static __inline void
178 atomic_clear_long(volatile u_long *address, u_long setmask)
179 {
180
181         atomic_clear_32((volatile uint32_t *)address, setmask);
182 }
183
184 ATOMIC_ACQ_REL(clear, 32)
185 ATOMIC_ACQ_REL(clear, 64)
186 ATOMIC_ACQ_REL_LONG(clear)
187
188 #define ATOMIC_FCMPSET_CODE(RET, TYPE, SUF)                   \
189     {                                                         \
190         TYPE tmp;                                             \
191                                                               \
192         __asm __volatile(                                     \
193             "1: ldrex" SUF "   %[tmp], [%[ptr]]          \n"  \
194             "   ldr" SUF "     %[ret], [%[oldv]]         \n"  \
195             "   teq            %[tmp], %[ret]            \n"  \
196             "   ittee          ne                        \n"  \
197             "   str" SUF "ne   %[tmp], [%[oldv]]         \n"  \
198             "   movne          %[ret], #0                \n"  \
199             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n"  \
200             "   eorseq         %[ret], #1                \n"  \
201             "   beq            1b                        \n"  \
202             : [ret] "=&r" (RET),                              \
203               [tmp] "=&r" (tmp)                               \
204             : [ptr] "r"   (_ptr),                             \
205               [oldv] "r"  (_old),                             \
206               [newv] "r"  (_new)                              \
207             : "cc", "memory");                                \
208     }
209
210 #define ATOMIC_FCMPSET_CODE64(RET)                                 \
211     {                                                              \
212         uint64_t cmp, tmp;                                         \
213                                                                    \
214         __asm __volatile(                                          \
215             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n"  \
216             "   ldrd     %Q[cmp], %R[cmp], [%[oldv]]          \n"  \
217             "   teq      %Q[tmp], %Q[cmp]                     \n"  \
218             "   it       eq                                   \n"  \
219             "   teqeq    %R[tmp], %R[cmp]                     \n"  \
220             "   ittee    ne                                   \n"  \
221             "   movne    %[ret], #0                           \n"  \
222             "   strdne   %[cmp], [%[oldv]]                    \n"  \
223             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n"  \
224             "   eorseq   %[ret], #1                           \n"  \
225             "   beq      1b                                   \n"  \
226             : [ret] "=&r" (RET),                                   \
227               [cmp] "=&r" (cmp),                                   \
228               [tmp] "=&r" (tmp)                                    \
229             : [ptr] "r"   (_ptr),                                  \
230               [oldv] "r"  (_old),                                  \
231               [newv] "r"  (_new)                                   \
232             : "cc", "memory");                                     \
233     }
234
235 static __inline int
236 atomic_fcmpset_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
237 {
238         int ret;
239
240         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
241         return (ret);
242 }
243 #define atomic_fcmpset_8        atomic_fcmpset_8
244
245 static __inline int
246 atomic_fcmpset_acq_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
247 {
248         int ret;
249
250         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
251         dmb();
252         return (ret);
253 }
254
255 static __inline int
256 atomic_fcmpset_rel_8(volatile uint8_t *_ptr, uint8_t *_old, uint8_t _new)
257 {
258         int ret;
259
260         dmb();
261         ATOMIC_FCMPSET_CODE(ret, uint8_t, "b");
262         return (ret);
263 }
264
265 static __inline int
266 atomic_fcmpset_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
267 {
268         int ret;
269
270         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
271         return (ret);
272 }
273 #define atomic_fcmpset_16       atomic_fcmpset_16
274
275 static __inline int
276 atomic_fcmpset_acq_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
277 {
278         int ret;
279
280         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
281         dmb();
282         return (ret);
283 }
284
285 static __inline int
286 atomic_fcmpset_rel_16(volatile uint16_t *_ptr, uint16_t *_old, uint16_t _new)
287 {
288         int ret;
289
290         dmb();
291         ATOMIC_FCMPSET_CODE(ret, uint16_t, "h");
292         return (ret);
293 }
294
295 static __inline int
296 atomic_fcmpset_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
297 {
298         int ret;
299
300         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
301         return (ret);
302 }
303
304 static __inline int
305 atomic_fcmpset_acq_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
306 {
307         int ret;
308
309         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
310         dmb();
311         return (ret);
312 }
313
314 static __inline int
315 atomic_fcmpset_rel_32(volatile uint32_t *_ptr, uint32_t *_old, uint32_t _new)
316 {
317         int ret;
318
319         dmb();
320         ATOMIC_FCMPSET_CODE(ret, uint32_t, "");
321         return (ret);
322 }
323
324 static __inline int
325 atomic_fcmpset_long(volatile u_long *_ptr, u_long *_old, u_long _new)
326 {
327         int ret;
328
329         ATOMIC_FCMPSET_CODE(ret, u_long, "");
330         return (ret);
331 }
332
333 static __inline int
334 atomic_fcmpset_acq_long(volatile u_long *_ptr, u_long *_old, u_long _new)
335 {
336         int ret;
337
338         ATOMIC_FCMPSET_CODE(ret, u_long, "");
339         dmb();
340         return (ret);
341 }
342
343 static __inline int
344 atomic_fcmpset_rel_long(volatile u_long *_ptr, u_long *_old, u_long _new)
345 {
346         int ret;
347
348         dmb();
349         ATOMIC_FCMPSET_CODE(ret, u_long, "");
350         return (ret);
351 }
352
353 static __inline int
354 atomic_fcmpset_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
355 {
356         int ret;
357
358         ATOMIC_FCMPSET_CODE64(ret);
359         return (ret);
360 }
361
362 static __inline int
363 atomic_fcmpset_acq_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
364 {
365         int ret;
366
367         ATOMIC_FCMPSET_CODE64(ret);
368         dmb();
369         return (ret);
370 }
371
372 static __inline int
373 atomic_fcmpset_rel_64(volatile uint64_t *_ptr, uint64_t *_old, uint64_t _new)
374 {
375         int ret;
376
377         dmb();
378         ATOMIC_FCMPSET_CODE64(ret);
379         return (ret);
380 }
381
382 #define ATOMIC_CMPSET_CODE(RET, SUF)                         \
383     {                                                        \
384         __asm __volatile(                                    \
385             "1: ldrex" SUF "   %[ret], [%[ptr]]          \n" \
386             "   teq            %[ret], %[oldv]           \n" \
387             "   itee           ne                        \n" \
388             "   movne          %[ret], #0                \n" \
389             "   strex" SUF "eq %[ret], %[newv], [%[ptr]] \n" \
390             "   eorseq         %[ret], #1                \n" \
391             "   beq            1b                        \n" \
392             : [ret] "=&r" (RET)                              \
393             : [ptr] "r"   (_ptr),                            \
394               [oldv] "r"  (_old),                            \
395               [newv] "r"  (_new)                             \
396             : "cc", "memory");                               \
397     }
398
399 #define ATOMIC_CMPSET_CODE64(RET)                                 \
400     {                                                             \
401         uint64_t tmp;                                             \
402                                                                   \
403         __asm __volatile(                                         \
404             "1: ldrexd   %Q[tmp], %R[tmp], [%[ptr]]           \n" \
405             "   teq      %Q[tmp], %Q[oldv]                    \n" \
406             "   it       eq                                   \n" \
407             "   teqeq    %R[tmp], %R[oldv]                    \n" \
408             "   itee     ne                                   \n" \
409             "   movne    %[ret], #0                           \n" \
410             "   strexdeq %[ret], %Q[newv], %R[newv], [%[ptr]] \n" \
411             "   eorseq   %[ret], #1                           \n" \
412             "   beq      1b                                   \n" \
413             : [ret] "=&r" (RET),                                  \
414               [tmp] "=&r" (tmp)                                   \
415             : [ptr] "r"   (_ptr),                                 \
416               [oldv] "r"  (_old),                                 \
417               [newv] "r"  (_new)                                  \
418             : "cc", "memory");                                    \
419     }
420
421 static __inline int
422 atomic_cmpset_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
423 {
424         int ret;
425
426         ATOMIC_CMPSET_CODE(ret, "b");
427         return (ret);
428 }
429 #define atomic_cmpset_8         atomic_cmpset_8
430
431 static __inline int
432 atomic_cmpset_acq_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
433 {
434         int ret;
435
436         ATOMIC_CMPSET_CODE(ret, "b");
437         dmb();
438         return (ret);
439 }
440
441 static __inline int
442 atomic_cmpset_rel_8(volatile uint8_t *_ptr, uint8_t _old, uint8_t _new)
443 {
444         int ret;
445
446         dmb();
447         ATOMIC_CMPSET_CODE(ret, "b");
448         return (ret);
449 }
450
451 static __inline int
452 atomic_cmpset_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
453 {
454         int ret;
455
456         ATOMIC_CMPSET_CODE(ret, "h");
457         return (ret);
458 }
459 #define atomic_cmpset_16        atomic_cmpset_16
460
461 static __inline int
462 atomic_cmpset_acq_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
463 {
464         int ret;
465
466         ATOMIC_CMPSET_CODE(ret, "h");
467         dmb();
468         return (ret);
469 }
470
471 static __inline int
472 atomic_cmpset_rel_16(volatile uint16_t *_ptr, uint16_t _old, uint16_t _new)
473 {
474         int ret;
475
476         dmb();
477         ATOMIC_CMPSET_CODE(ret, "h");
478         return (ret);
479 }
480
481 static __inline int
482 atomic_cmpset_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
483 {
484         int ret;
485
486         ATOMIC_CMPSET_CODE(ret, "");
487         return (ret);
488 }
489
490 static __inline int
491 atomic_cmpset_acq_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
492 {
493         int ret;
494
495         ATOMIC_CMPSET_CODE(ret, "");
496         dmb();
497         return (ret);
498 }
499
500 static __inline int
501 atomic_cmpset_rel_32(volatile uint32_t *_ptr, uint32_t _old, uint32_t _new)
502 {
503         int ret;
504
505         dmb();
506         ATOMIC_CMPSET_CODE(ret, "");
507         return (ret);
508 }
509
510 static __inline int
511 atomic_cmpset_long(volatile u_long *_ptr, u_long _old, u_long _new)
512 {
513         int ret;
514
515         ATOMIC_CMPSET_CODE(ret, "");
516         return (ret);
517 }
518
519 static __inline int
520 atomic_cmpset_acq_long(volatile u_long *_ptr, u_long _old, u_long _new)
521 {
522         int ret;
523
524         ATOMIC_CMPSET_CODE(ret, "");
525         dmb();
526         return (ret);
527 }
528
529 static __inline int
530 atomic_cmpset_rel_long(volatile u_long *_ptr, u_long _old, u_long _new)
531 {
532         int ret;
533
534         dmb();
535         ATOMIC_CMPSET_CODE(ret, "");
536         return (ret);
537 }
538
539 static __inline int
540 atomic_cmpset_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
541 {
542         int ret;
543
544         ATOMIC_CMPSET_CODE64(ret);
545         return (ret);
546 }
547
548 static __inline int
549 atomic_cmpset_acq_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
550 {
551         int ret;
552
553         ATOMIC_CMPSET_CODE64(ret);
554         dmb();
555         return (ret);
556 }
557
558 static __inline int
559 atomic_cmpset_rel_64(volatile uint64_t *_ptr, uint64_t _old, uint64_t _new)
560 {
561         int ret;
562
563         dmb();
564         ATOMIC_CMPSET_CODE64(ret);
565         return (ret);
566 }
567
568 static __inline uint32_t
569 atomic_fetchadd_32(volatile uint32_t *p, uint32_t val)
570 {
571         uint32_t tmp = 0, tmp2 = 0, ret = 0;
572
573         __asm __volatile(
574             "1: ldrex   %0, [%3]        \n"
575             "   add     %1, %0, %4      \n"
576             "   strex   %2, %1, [%3]    \n"
577             "   cmp     %2, #0          \n"
578             "   it      ne              \n"
579             "   bne     1b              \n"
580             : "+r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
581             : : "cc", "memory");
582         return (ret);
583 }
584
585 static __inline uint64_t
586 atomic_fetchadd_64(volatile uint64_t *p, uint64_t val)
587 {
588         uint64_t ret, tmp;
589         uint32_t exflag;
590
591         __asm __volatile(
592             "1:                                                 \n"
593             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
594             "   adds    %Q[tmp], %Q[ret], %Q[val]               \n"
595             "   adc     %R[tmp], %R[ret], %R[val]               \n"
596             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
597             "   teq     %[exf], #0                              \n"
598             "   it      ne                                      \n"
599             "   bne     1b                                      \n"
600             : [ret] "=&r" (ret),
601               [exf] "=&r" (exflag),
602               [tmp] "=&r" (tmp)
603             : [ptr] "r"   (p),
604               [val] "r"   (val)
605             : "cc", "memory");
606         return (ret);
607 }
608
609 static __inline u_long
610 atomic_fetchadd_long(volatile u_long *p, u_long val)
611 {
612
613         return (atomic_fetchadd_32((volatile uint32_t *)p, val));
614 }
615
616 static __inline uint32_t
617 atomic_load_acq_32(volatile uint32_t *p)
618 {
619         uint32_t v;
620
621         v = *p;
622         dmb();
623         return (v);
624 }
625
626 static __inline uint64_t
627 atomic_load_64(volatile uint64_t *p)
628 {
629         uint64_t ret;
630
631         /*
632          * The only way to atomically load 64 bits is with LDREXD which puts the
633          * exclusive monitor into the exclusive state, so reset it to open state
634          * with CLREX because we don't actually need to store anything.
635          */
636         __asm __volatile(
637             "ldrexd     %Q[ret], %R[ret], [%[ptr]]      \n"
638             "clrex                                      \n"
639             : [ret] "=&r" (ret)
640             : [ptr] "r"   (p)
641             : "cc", "memory");
642         return (ret);
643 }
644
645 static __inline uint64_t
646 atomic_load_acq_64(volatile uint64_t *p)
647 {
648         uint64_t ret;
649
650         ret = atomic_load_64(p);
651         dmb();
652         return (ret);
653 }
654
655 static __inline u_long
656 atomic_load_acq_long(volatile u_long *p)
657 {
658         u_long v;
659
660         v = *p;
661         dmb();
662         return (v);
663 }
664
665 static __inline uint32_t
666 atomic_readandclear_32(volatile uint32_t *p)
667 {
668         uint32_t ret, tmp = 0, tmp2 = 0;
669
670         __asm __volatile(
671             "1: ldrex   %0, [%3]        \n"
672             "   mov     %1, #0          \n"
673             "   strex   %2, %1, [%3]    \n"
674             "   cmp     %2, #0          \n"
675             "   it      ne              \n"
676             "   bne     1b              \n"
677             : "=r" (ret), "=&r" (tmp), "+r" (tmp2), "+r" (p)
678             : : "cc", "memory");
679         return (ret);
680 }
681
682 static __inline uint64_t
683 atomic_readandclear_64(volatile uint64_t *p)
684 {
685         uint64_t ret, tmp;
686         uint32_t exflag;
687
688         __asm __volatile(
689             "1:                                                 \n"
690             "   ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
691             "   mov     %Q[tmp], #0                             \n"
692             "   mov     %R[tmp], #0                             \n"
693             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
694             "   teq     %[exf], #0                              \n"
695             "   it      ne                                      \n"
696             "   bne     1b                                      \n"
697             : [ret] "=&r" (ret),
698               [exf] "=&r" (exflag),
699               [tmp] "=&r" (tmp)
700             : [ptr] "r"   (p)
701             : "cc", "memory");
702         return (ret);
703 }
704
705 static __inline u_long
706 atomic_readandclear_long(volatile u_long *p)
707 {
708
709         return (atomic_readandclear_32((volatile uint32_t *)p));
710 }
711
712 static __inline void
713 atomic_set_32(volatile uint32_t *address, uint32_t setmask)
714 {
715         uint32_t tmp = 0, tmp2 = 0;
716
717         __asm __volatile(
718             "1: ldrex   %0, [%2]        \n"
719             "   orr     %0, %0, %3      \n"
720             "   strex   %1, %0, [%2]    \n"
721             "   cmp     %1, #0          \n"
722             "   it      ne              \n"
723             "   bne     1b              \n"
724             : "=&r" (tmp), "+r" (tmp2), "+r" (address), "+r" (setmask)
725             : : "cc", "memory");
726 }
727
728 static __inline void
729 atomic_set_64(volatile uint64_t *p, uint64_t val)
730 {
731         uint64_t tmp;
732         uint32_t exflag;
733
734         __asm __volatile(
735             "1:                                                 \n"
736             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
737             "   orr     %Q[tmp], %Q[val]                        \n"
738             "   orr     %R[tmp], %R[val]                        \n"
739             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
740             "   teq     %[exf], #0                              \n"
741             "   it      ne                                      \n"
742             "   bne     1b                                      \n"
743             : [exf] "=&r" (exflag),
744               [tmp] "=&r" (tmp)
745             : [ptr] "r"   (p),
746               [val] "r"   (val)
747             : "cc", "memory");
748 }
749
750 static __inline void
751 atomic_set_long(volatile u_long *address, u_long setmask)
752 {
753
754         atomic_set_32((volatile uint32_t *)address, setmask);
755 }
756
757 ATOMIC_ACQ_REL(set, 32)
758 ATOMIC_ACQ_REL(set, 64)
759 ATOMIC_ACQ_REL_LONG(set)
760
761 static __inline void
762 atomic_subtract_32(volatile uint32_t *p, uint32_t val)
763 {
764         uint32_t tmp = 0, tmp2 = 0;
765
766         __asm __volatile(
767             "1: ldrex   %0, [%2]        \n"
768             "   sub     %0, %0, %3      \n"
769             "   strex   %1, %0, [%2]    \n"
770             "   cmp     %1, #0          \n"
771             "   it      ne              \n"
772             "   bne     1b              \n"
773             : "=&r" (tmp), "+r" (tmp2), "+r" (p), "+r" (val)
774             : : "cc", "memory");
775 }
776
777 static __inline void
778 atomic_subtract_64(volatile uint64_t *p, uint64_t val)
779 {
780         uint64_t tmp;
781         uint32_t exflag;
782
783         __asm __volatile(
784             "1:                                                 \n"
785             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
786             "   subs    %Q[tmp], %Q[val]                        \n"
787             "   sbc     %R[tmp], %R[tmp], %R[val]               \n"
788             "   strexd  %[exf], %Q[tmp], %R[tmp], [%[ptr]]      \n"
789             "   teq     %[exf], #0                              \n"
790             "   it      ne                                      \n"
791             "   bne     1b                                      \n"
792             : [exf] "=&r" (exflag),
793               [tmp] "=&r" (tmp)
794             : [ptr] "r"   (p),
795               [val] "r"   (val)
796             : "cc", "memory");
797 }
798
799 static __inline void
800 atomic_subtract_long(volatile u_long *p, u_long val)
801 {
802
803         atomic_subtract_32((volatile uint32_t *)p, val);
804 }
805
806 ATOMIC_ACQ_REL(subtract, 32)
807 ATOMIC_ACQ_REL(subtract, 64)
808 ATOMIC_ACQ_REL_LONG(subtract)
809
810 static __inline void
811 atomic_store_64(volatile uint64_t *p, uint64_t val)
812 {
813         uint64_t tmp;
814         uint32_t exflag;
815
816         /*
817          * The only way to atomically store 64 bits is with STREXD, which will
818          * succeed only if paired up with a preceeding LDREXD using the same
819          * address, so we read and discard the existing value before storing.
820          */
821         __asm __volatile(
822             "1:                                                 \n"
823             "   ldrexd  %Q[tmp], %R[tmp], [%[ptr]]              \n"
824             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
825             "   teq     %[exf], #0                              \n"
826             "   it      ne                                      \n"
827             "   bne     1b                                      \n"
828             : [tmp] "=&r" (tmp),
829               [exf] "=&r" (exflag)
830             : [ptr] "r"   (p),
831               [val] "r"   (val)
832             : "cc", "memory");
833 }
834
835 static __inline void
836 atomic_store_rel_32(volatile uint32_t *p, uint32_t v)
837 {
838
839         dmb();
840         *p = v;
841 }
842
843 static __inline void
844 atomic_store_rel_64(volatile uint64_t *p, uint64_t val)
845 {
846
847         dmb();
848         atomic_store_64(p, val);
849 }
850
851 static __inline void
852 atomic_store_rel_long(volatile u_long *p, u_long v)
853 {
854
855         dmb();
856         *p = v;
857 }
858
859 static __inline int
860 atomic_testandclear_32(volatile uint32_t *ptr, u_int bit)
861 {
862         int newv, oldv, result;
863
864         __asm __volatile(
865             "   mov     ip, #1                                  \n"
866             "   lsl     ip, ip, %[bit]                          \n"
867             /*  Done with %[bit] as input, reuse below as output. */
868             "1:                                                 \n"
869             "   ldrex   %[oldv], [%[ptr]]                       \n"
870             "   bic     %[newv], %[oldv], ip                    \n"
871             "   strex   %[bit], %[newv], [%[ptr]]               \n"
872             "   teq     %[bit], #0                              \n"
873             "   it      ne                                      \n"
874             "   bne     1b                                      \n"
875             "   ands    %[bit], %[oldv], ip                     \n"
876             "   it      ne                                      \n"
877             "   movne   %[bit], #1                              \n"
878             : [bit]  "=&r"   (result),
879               [oldv] "=&r"   (oldv),
880               [newv] "=&r"   (newv)
881             : [ptr]  "r"     (ptr),
882                      "[bit]" (bit & 0x1f)
883             : "cc", "ip", "memory");
884
885         return (result);
886 }
887
888 static __inline int
889 atomic_testandclear_int(volatile u_int *p, u_int v)
890 {
891
892         return (atomic_testandclear_32((volatile uint32_t *)p, v));
893 }
894
895 static __inline int
896 atomic_testandclear_long(volatile u_long *p, u_int v)
897 {
898
899         return (atomic_testandclear_32((volatile uint32_t *)p, v));
900 }
901 #define atomic_testandclear_long        atomic_testandclear_long
902
903
904 static __inline int
905 atomic_testandclear_64(volatile uint64_t *p, u_int v)
906 {
907         volatile uint32_t *p32;
908
909         p32 = (volatile uint32_t *)p;
910         /*
911          * Assume little-endian,
912          * atomic_testandclear_32() uses only last 5 bits of v
913          */
914         if ((v & 0x20) != 0)
915                 p32++;
916         return (atomic_testandclear_32(p32, v));
917 }
918
919 static __inline int
920 atomic_testandset_32(volatile uint32_t *ptr, u_int bit)
921 {
922         int newv, oldv, result;
923
924         __asm __volatile(
925             "   mov     ip, #1                                  \n"
926             "   lsl     ip, ip, %[bit]                          \n"
927             /*  Done with %[bit] as input, reuse below as output. */
928             "1:                                                 \n"
929             "   ldrex   %[oldv], [%[ptr]]                       \n"
930             "   orr     %[newv], %[oldv], ip                    \n"
931             "   strex   %[bit], %[newv], [%[ptr]]               \n"
932             "   teq     %[bit], #0                              \n"
933             "   it      ne                                      \n"
934             "   bne     1b                                      \n"
935             "   ands    %[bit], %[oldv], ip                     \n"
936             "   it      ne                                      \n"
937             "   movne   %[bit], #1                              \n"
938             : [bit]  "=&r"   (result),
939               [oldv] "=&r"   (oldv),
940               [newv] "=&r"   (newv)
941             : [ptr]  "r"     (ptr),
942                      "[bit]" (bit & 0x1f)
943             : "cc", "ip", "memory");
944
945         return (result);
946 }
947
948 static __inline int
949 atomic_testandset_int(volatile u_int *p, u_int v)
950 {
951
952         return (atomic_testandset_32((volatile uint32_t *)p, v));
953 }
954
955 static __inline int
956 atomic_testandset_long(volatile u_long *p, u_int v)
957 {
958
959         return (atomic_testandset_32((volatile uint32_t *)p, v));
960 }
961 #define atomic_testandset_long  atomic_testandset_long
962
963 static __inline int
964 atomic_testandset_64(volatile uint64_t *p, u_int v)
965 {
966         volatile uint32_t *p32;
967
968         p32 = (volatile uint32_t *)p;
969         /*
970          * Assume little-endian,
971          * atomic_testandset_32() uses only last 5 bits of v
972          */
973         if ((v & 0x20) != 0)
974                 p32++;
975         return (atomic_testandset_32(p32, v));
976 }
977
978 static __inline uint32_t
979 atomic_swap_32(volatile uint32_t *p, uint32_t v)
980 {
981         uint32_t ret, exflag;
982
983         __asm __volatile(
984             "1: ldrex   %[ret], [%[ptr]]                \n"
985             "   strex   %[exf], %[val], [%[ptr]]        \n"
986             "   teq     %[exf], #0                      \n"
987             "   it      ne                              \n"
988             "   bne     1b                              \n"
989             : [ret] "=&r"  (ret),
990               [exf] "=&r" (exflag)
991             : [val] "r"  (v),
992               [ptr] "r"  (p)
993             : "cc", "memory");
994         return (ret);
995 }
996
997 static __inline u_long
998 atomic_swap_long(volatile u_long *p, u_long v)
999 {
1000
1001         return (atomic_swap_32((volatile uint32_t *)p, v));
1002 }
1003
1004 static __inline uint64_t
1005 atomic_swap_64(volatile uint64_t *p, uint64_t v)
1006 {
1007         uint64_t ret;
1008         uint32_t exflag;
1009
1010         __asm __volatile(
1011             "1: ldrexd  %Q[ret], %R[ret], [%[ptr]]              \n"
1012             "   strexd  %[exf], %Q[val], %R[val], [%[ptr]]      \n"
1013             "   teq     %[exf], #0                              \n"
1014             "   it      ne                                      \n"
1015             "   bne     1b                                      \n"
1016             : [ret] "=&r" (ret),
1017               [exf] "=&r" (exflag)
1018             : [val] "r"   (v),
1019               [ptr] "r"   (p)
1020             : "cc", "memory");
1021         return (ret);
1022 }
1023
1024 #undef ATOMIC_ACQ_REL
1025 #undef ATOMIC_ACQ_REL_LONG
1026
1027 static __inline void
1028 atomic_thread_fence_acq(void)
1029 {
1030
1031         dmb();
1032 }
1033
1034 static __inline void
1035 atomic_thread_fence_rel(void)
1036 {
1037
1038         dmb();
1039 }
1040
1041 static __inline void
1042 atomic_thread_fence_acq_rel(void)
1043 {
1044
1045         dmb();
1046 }
1047
1048 static __inline void
1049 atomic_thread_fence_seq_cst(void)
1050 {
1051
1052         dmb();
1053 }
1054
1055 #define atomic_clear_ptr                atomic_clear_32
1056 #define atomic_clear_acq_ptr            atomic_clear_acq_32
1057 #define atomic_clear_rel_ptr            atomic_clear_rel_32
1058 #define atomic_set_ptr                  atomic_set_32
1059 #define atomic_set_acq_ptr              atomic_set_acq_32
1060 #define atomic_set_rel_ptr              atomic_set_rel_32
1061 #define atomic_fcmpset_ptr              atomic_fcmpset_32
1062 #define atomic_fcmpset_rel_ptr          atomic_fcmpset_rel_32
1063 #define atomic_fcmpset_acq_ptr          atomic_fcmpset_acq_32
1064 #define atomic_cmpset_ptr               atomic_cmpset_32
1065 #define atomic_cmpset_acq_ptr           atomic_cmpset_acq_32
1066 #define atomic_cmpset_rel_ptr           atomic_cmpset_rel_32
1067 #define atomic_load_acq_ptr             atomic_load_acq_32
1068 #define atomic_store_rel_ptr            atomic_store_rel_32
1069 #define atomic_swap_ptr                 atomic_swap_32
1070 #define atomic_readandclear_ptr         atomic_readandclear_32
1071
1072 #define atomic_add_int                  atomic_add_32
1073 #define atomic_add_acq_int              atomic_add_acq_32
1074 #define atomic_add_rel_int              atomic_add_rel_32
1075 #define atomic_subtract_int             atomic_subtract_32
1076 #define atomic_subtract_acq_int         atomic_subtract_acq_32
1077 #define atomic_subtract_rel_int         atomic_subtract_rel_32
1078 #define atomic_clear_int                atomic_clear_32
1079 #define atomic_clear_acq_int            atomic_clear_acq_32
1080 #define atomic_clear_rel_int            atomic_clear_rel_32
1081 #define atomic_set_int                  atomic_set_32
1082 #define atomic_set_acq_int              atomic_set_acq_32
1083 #define atomic_set_rel_int              atomic_set_rel_32
1084 #define atomic_fcmpset_int              atomic_fcmpset_32
1085 #define atomic_fcmpset_acq_int          atomic_fcmpset_acq_32
1086 #define atomic_fcmpset_rel_int          atomic_fcmpset_rel_32
1087 #define atomic_cmpset_int               atomic_cmpset_32
1088 #define atomic_cmpset_acq_int           atomic_cmpset_acq_32
1089 #define atomic_cmpset_rel_int           atomic_cmpset_rel_32
1090 #define atomic_fetchadd_int             atomic_fetchadd_32
1091 #define atomic_readandclear_int         atomic_readandclear_32
1092 #define atomic_load_acq_int             atomic_load_acq_32
1093 #define atomic_store_rel_int            atomic_store_rel_32
1094 #define atomic_swap_int                 atomic_swap_32
1095
1096 /*
1097  * For:
1098  *  - atomic_load_acq_8
1099  *  - atomic_load_acq_16
1100  *  - atomic_testandset_acq_long
1101  */
1102 #include <sys/_atomic_subword.h>
1103
1104 #endif /* _MACHINE_ATOMIC_H_ */