2 * kmp_atomic.cpp -- ATOMIC implementation routines
5 //===----------------------------------------------------------------------===//
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
11 //===----------------------------------------------------------------------===//
13 #include "kmp_atomic.h"
14 #include "kmp.h" // TRUE, asm routines prototypes
16 typedef unsigned char uchar;
17 typedef unsigned short ushort;
20 @defgroup ATOMIC_OPS Atomic Operations
21 These functions are used for implementing the many different varieties of atomic
24 The compiler is at liberty to inline atomic operations that are naturally
25 supported by the target architecture. For instance on IA-32 architecture an
26 atomic like this can be inlined
32 using the single instruction: `lock; incl s`
34 However the runtime does provide entrypoints for these operations to support
35 compilers that choose not to inline them. (For instance,
36 `__kmpc_atomic_fixed4_add` could be used to perform the increment above.)
38 The names of the functions are encoded by using the data type name and the
39 operation name, as in these tables.
41 Data Type | Data type encoding
42 -----------|---------------
53 float 10 (8087 eighty bit float) | `float10`
54 complex<float> | `cmplx4`
55 complex<double> | `cmplx8`
56 complex<float10> | `cmplx10`
59 Operation | Operation encoding
60 ----------|-------------------
78 For non-commutative operations, `_rev` can also be added for the reversed
79 operation. For the functions that capture the result, the suffix `_cpt` is
84 The general form of an atomic function that just performs an update (without a
87 void __kmpc_atomic_<datatype>_<operation>( ident_t *id_ref, int gtid, TYPE *
90 @param ident_t a pointer to source location
91 @param gtid the global thread id
92 @param lhs a pointer to the left operand
93 @param rhs the right operand
97 The capture functions perform an atomic update and return a result, which is
98 either the value before the capture, or that after. They take an additional
99 argument to determine which result is returned.
100 Their general form is therefore
102 TYPE __kmpc_atomic_<datatype>_<operation>_cpt( ident_t *id_ref, int gtid, TYPE *
103 lhs, TYPE rhs, int flag );
105 @param ident_t a pointer to source location
106 @param gtid the global thread id
107 @param lhs a pointer to the left operand
108 @param rhs the right operand
109 @param flag one if the result is to be captured *after* the operation, zero if
112 The one set of exceptions to this is the `complex<float>` type where the value
113 is not returned, rather an extra argument pointer is passed.
117 void __kmpc_atomic_cmplx4_<op>_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 *
118 lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag );
121 Read and Write Operations
122 =========================
123 The OpenMP<sup>*</sup> standard now supports atomic operations that simply
124 ensure that the value is read or written atomically, with no modification
125 performed. In many cases on IA-32 architecture these operations can be inlined
126 since the architecture guarantees that no tearing occurs on aligned objects
127 accessed with a single memory operation of up to 64 bits in size.
129 The general form of the read operations is
131 TYPE __kmpc_atomic_<type>_rd ( ident_t *id_ref, int gtid, TYPE * loc );
134 For the write operations the form is
136 void __kmpc_atomic_<type>_wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs
140 Full list of functions
141 ======================
142 This leads to the generation of 376 atomic functions, as follows.
144 Functions for integers
145 ---------------------
146 There are versions here for integers of size 1,2,4 and 8 bytes both signed and
147 unsigned (where that matters).
149 __kmpc_atomic_fixed1_add
150 __kmpc_atomic_fixed1_add_cpt
151 __kmpc_atomic_fixed1_add_fp
152 __kmpc_atomic_fixed1_andb
153 __kmpc_atomic_fixed1_andb_cpt
154 __kmpc_atomic_fixed1_andl
155 __kmpc_atomic_fixed1_andl_cpt
156 __kmpc_atomic_fixed1_div
157 __kmpc_atomic_fixed1_div_cpt
158 __kmpc_atomic_fixed1_div_cpt_rev
159 __kmpc_atomic_fixed1_div_float8
160 __kmpc_atomic_fixed1_div_fp
161 __kmpc_atomic_fixed1_div_rev
162 __kmpc_atomic_fixed1_eqv
163 __kmpc_atomic_fixed1_eqv_cpt
164 __kmpc_atomic_fixed1_max
165 __kmpc_atomic_fixed1_max_cpt
166 __kmpc_atomic_fixed1_min
167 __kmpc_atomic_fixed1_min_cpt
168 __kmpc_atomic_fixed1_mul
169 __kmpc_atomic_fixed1_mul_cpt
170 __kmpc_atomic_fixed1_mul_float8
171 __kmpc_atomic_fixed1_mul_fp
172 __kmpc_atomic_fixed1_neqv
173 __kmpc_atomic_fixed1_neqv_cpt
174 __kmpc_atomic_fixed1_orb
175 __kmpc_atomic_fixed1_orb_cpt
176 __kmpc_atomic_fixed1_orl
177 __kmpc_atomic_fixed1_orl_cpt
178 __kmpc_atomic_fixed1_rd
179 __kmpc_atomic_fixed1_shl
180 __kmpc_atomic_fixed1_shl_cpt
181 __kmpc_atomic_fixed1_shl_cpt_rev
182 __kmpc_atomic_fixed1_shl_rev
183 __kmpc_atomic_fixed1_shr
184 __kmpc_atomic_fixed1_shr_cpt
185 __kmpc_atomic_fixed1_shr_cpt_rev
186 __kmpc_atomic_fixed1_shr_rev
187 __kmpc_atomic_fixed1_sub
188 __kmpc_atomic_fixed1_sub_cpt
189 __kmpc_atomic_fixed1_sub_cpt_rev
190 __kmpc_atomic_fixed1_sub_fp
191 __kmpc_atomic_fixed1_sub_rev
192 __kmpc_atomic_fixed1_swp
193 __kmpc_atomic_fixed1_wr
194 __kmpc_atomic_fixed1_xor
195 __kmpc_atomic_fixed1_xor_cpt
196 __kmpc_atomic_fixed1u_add_fp
197 __kmpc_atomic_fixed1u_sub_fp
198 __kmpc_atomic_fixed1u_mul_fp
199 __kmpc_atomic_fixed1u_div
200 __kmpc_atomic_fixed1u_div_cpt
201 __kmpc_atomic_fixed1u_div_cpt_rev
202 __kmpc_atomic_fixed1u_div_fp
203 __kmpc_atomic_fixed1u_div_rev
204 __kmpc_atomic_fixed1u_shr
205 __kmpc_atomic_fixed1u_shr_cpt
206 __kmpc_atomic_fixed1u_shr_cpt_rev
207 __kmpc_atomic_fixed1u_shr_rev
208 __kmpc_atomic_fixed2_add
209 __kmpc_atomic_fixed2_add_cpt
210 __kmpc_atomic_fixed2_add_fp
211 __kmpc_atomic_fixed2_andb
212 __kmpc_atomic_fixed2_andb_cpt
213 __kmpc_atomic_fixed2_andl
214 __kmpc_atomic_fixed2_andl_cpt
215 __kmpc_atomic_fixed2_div
216 __kmpc_atomic_fixed2_div_cpt
217 __kmpc_atomic_fixed2_div_cpt_rev
218 __kmpc_atomic_fixed2_div_float8
219 __kmpc_atomic_fixed2_div_fp
220 __kmpc_atomic_fixed2_div_rev
221 __kmpc_atomic_fixed2_eqv
222 __kmpc_atomic_fixed2_eqv_cpt
223 __kmpc_atomic_fixed2_max
224 __kmpc_atomic_fixed2_max_cpt
225 __kmpc_atomic_fixed2_min
226 __kmpc_atomic_fixed2_min_cpt
227 __kmpc_atomic_fixed2_mul
228 __kmpc_atomic_fixed2_mul_cpt
229 __kmpc_atomic_fixed2_mul_float8
230 __kmpc_atomic_fixed2_mul_fp
231 __kmpc_atomic_fixed2_neqv
232 __kmpc_atomic_fixed2_neqv_cpt
233 __kmpc_atomic_fixed2_orb
234 __kmpc_atomic_fixed2_orb_cpt
235 __kmpc_atomic_fixed2_orl
236 __kmpc_atomic_fixed2_orl_cpt
237 __kmpc_atomic_fixed2_rd
238 __kmpc_atomic_fixed2_shl
239 __kmpc_atomic_fixed2_shl_cpt
240 __kmpc_atomic_fixed2_shl_cpt_rev
241 __kmpc_atomic_fixed2_shl_rev
242 __kmpc_atomic_fixed2_shr
243 __kmpc_atomic_fixed2_shr_cpt
244 __kmpc_atomic_fixed2_shr_cpt_rev
245 __kmpc_atomic_fixed2_shr_rev
246 __kmpc_atomic_fixed2_sub
247 __kmpc_atomic_fixed2_sub_cpt
248 __kmpc_atomic_fixed2_sub_cpt_rev
249 __kmpc_atomic_fixed2_sub_fp
250 __kmpc_atomic_fixed2_sub_rev
251 __kmpc_atomic_fixed2_swp
252 __kmpc_atomic_fixed2_wr
253 __kmpc_atomic_fixed2_xor
254 __kmpc_atomic_fixed2_xor_cpt
255 __kmpc_atomic_fixed2u_add_fp
256 __kmpc_atomic_fixed2u_sub_fp
257 __kmpc_atomic_fixed2u_mul_fp
258 __kmpc_atomic_fixed2u_div
259 __kmpc_atomic_fixed2u_div_cpt
260 __kmpc_atomic_fixed2u_div_cpt_rev
261 __kmpc_atomic_fixed2u_div_fp
262 __kmpc_atomic_fixed2u_div_rev
263 __kmpc_atomic_fixed2u_shr
264 __kmpc_atomic_fixed2u_shr_cpt
265 __kmpc_atomic_fixed2u_shr_cpt_rev
266 __kmpc_atomic_fixed2u_shr_rev
267 __kmpc_atomic_fixed4_add
268 __kmpc_atomic_fixed4_add_cpt
269 __kmpc_atomic_fixed4_add_fp
270 __kmpc_atomic_fixed4_andb
271 __kmpc_atomic_fixed4_andb_cpt
272 __kmpc_atomic_fixed4_andl
273 __kmpc_atomic_fixed4_andl_cpt
274 __kmpc_atomic_fixed4_div
275 __kmpc_atomic_fixed4_div_cpt
276 __kmpc_atomic_fixed4_div_cpt_rev
277 __kmpc_atomic_fixed4_div_float8
278 __kmpc_atomic_fixed4_div_fp
279 __kmpc_atomic_fixed4_div_rev
280 __kmpc_atomic_fixed4_eqv
281 __kmpc_atomic_fixed4_eqv_cpt
282 __kmpc_atomic_fixed4_max
283 __kmpc_atomic_fixed4_max_cpt
284 __kmpc_atomic_fixed4_min
285 __kmpc_atomic_fixed4_min_cpt
286 __kmpc_atomic_fixed4_mul
287 __kmpc_atomic_fixed4_mul_cpt
288 __kmpc_atomic_fixed4_mul_float8
289 __kmpc_atomic_fixed4_mul_fp
290 __kmpc_atomic_fixed4_neqv
291 __kmpc_atomic_fixed4_neqv_cpt
292 __kmpc_atomic_fixed4_orb
293 __kmpc_atomic_fixed4_orb_cpt
294 __kmpc_atomic_fixed4_orl
295 __kmpc_atomic_fixed4_orl_cpt
296 __kmpc_atomic_fixed4_rd
297 __kmpc_atomic_fixed4_shl
298 __kmpc_atomic_fixed4_shl_cpt
299 __kmpc_atomic_fixed4_shl_cpt_rev
300 __kmpc_atomic_fixed4_shl_rev
301 __kmpc_atomic_fixed4_shr
302 __kmpc_atomic_fixed4_shr_cpt
303 __kmpc_atomic_fixed4_shr_cpt_rev
304 __kmpc_atomic_fixed4_shr_rev
305 __kmpc_atomic_fixed4_sub
306 __kmpc_atomic_fixed4_sub_cpt
307 __kmpc_atomic_fixed4_sub_cpt_rev
308 __kmpc_atomic_fixed4_sub_fp
309 __kmpc_atomic_fixed4_sub_rev
310 __kmpc_atomic_fixed4_swp
311 __kmpc_atomic_fixed4_wr
312 __kmpc_atomic_fixed4_xor
313 __kmpc_atomic_fixed4_xor_cpt
314 __kmpc_atomic_fixed4u_add_fp
315 __kmpc_atomic_fixed4u_sub_fp
316 __kmpc_atomic_fixed4u_mul_fp
317 __kmpc_atomic_fixed4u_div
318 __kmpc_atomic_fixed4u_div_cpt
319 __kmpc_atomic_fixed4u_div_cpt_rev
320 __kmpc_atomic_fixed4u_div_fp
321 __kmpc_atomic_fixed4u_div_rev
322 __kmpc_atomic_fixed4u_shr
323 __kmpc_atomic_fixed4u_shr_cpt
324 __kmpc_atomic_fixed4u_shr_cpt_rev
325 __kmpc_atomic_fixed4u_shr_rev
326 __kmpc_atomic_fixed8_add
327 __kmpc_atomic_fixed8_add_cpt
328 __kmpc_atomic_fixed8_add_fp
329 __kmpc_atomic_fixed8_andb
330 __kmpc_atomic_fixed8_andb_cpt
331 __kmpc_atomic_fixed8_andl
332 __kmpc_atomic_fixed8_andl_cpt
333 __kmpc_atomic_fixed8_div
334 __kmpc_atomic_fixed8_div_cpt
335 __kmpc_atomic_fixed8_div_cpt_rev
336 __kmpc_atomic_fixed8_div_float8
337 __kmpc_atomic_fixed8_div_fp
338 __kmpc_atomic_fixed8_div_rev
339 __kmpc_atomic_fixed8_eqv
340 __kmpc_atomic_fixed8_eqv_cpt
341 __kmpc_atomic_fixed8_max
342 __kmpc_atomic_fixed8_max_cpt
343 __kmpc_atomic_fixed8_min
344 __kmpc_atomic_fixed8_min_cpt
345 __kmpc_atomic_fixed8_mul
346 __kmpc_atomic_fixed8_mul_cpt
347 __kmpc_atomic_fixed8_mul_float8
348 __kmpc_atomic_fixed8_mul_fp
349 __kmpc_atomic_fixed8_neqv
350 __kmpc_atomic_fixed8_neqv_cpt
351 __kmpc_atomic_fixed8_orb
352 __kmpc_atomic_fixed8_orb_cpt
353 __kmpc_atomic_fixed8_orl
354 __kmpc_atomic_fixed8_orl_cpt
355 __kmpc_atomic_fixed8_rd
356 __kmpc_atomic_fixed8_shl
357 __kmpc_atomic_fixed8_shl_cpt
358 __kmpc_atomic_fixed8_shl_cpt_rev
359 __kmpc_atomic_fixed8_shl_rev
360 __kmpc_atomic_fixed8_shr
361 __kmpc_atomic_fixed8_shr_cpt
362 __kmpc_atomic_fixed8_shr_cpt_rev
363 __kmpc_atomic_fixed8_shr_rev
364 __kmpc_atomic_fixed8_sub
365 __kmpc_atomic_fixed8_sub_cpt
366 __kmpc_atomic_fixed8_sub_cpt_rev
367 __kmpc_atomic_fixed8_sub_fp
368 __kmpc_atomic_fixed8_sub_rev
369 __kmpc_atomic_fixed8_swp
370 __kmpc_atomic_fixed8_wr
371 __kmpc_atomic_fixed8_xor
372 __kmpc_atomic_fixed8_xor_cpt
373 __kmpc_atomic_fixed8u_add_fp
374 __kmpc_atomic_fixed8u_sub_fp
375 __kmpc_atomic_fixed8u_mul_fp
376 __kmpc_atomic_fixed8u_div
377 __kmpc_atomic_fixed8u_div_cpt
378 __kmpc_atomic_fixed8u_div_cpt_rev
379 __kmpc_atomic_fixed8u_div_fp
380 __kmpc_atomic_fixed8u_div_rev
381 __kmpc_atomic_fixed8u_shr
382 __kmpc_atomic_fixed8u_shr_cpt
383 __kmpc_atomic_fixed8u_shr_cpt_rev
384 __kmpc_atomic_fixed8u_shr_rev
387 Functions for floating point
388 ----------------------------
389 There are versions here for floating point numbers of size 4, 8, 10 and 16
390 bytes. (Ten byte floats are used by X87, but are now rare).
392 __kmpc_atomic_float4_add
393 __kmpc_atomic_float4_add_cpt
394 __kmpc_atomic_float4_add_float8
395 __kmpc_atomic_float4_add_fp
396 __kmpc_atomic_float4_div
397 __kmpc_atomic_float4_div_cpt
398 __kmpc_atomic_float4_div_cpt_rev
399 __kmpc_atomic_float4_div_float8
400 __kmpc_atomic_float4_div_fp
401 __kmpc_atomic_float4_div_rev
402 __kmpc_atomic_float4_max
403 __kmpc_atomic_float4_max_cpt
404 __kmpc_atomic_float4_min
405 __kmpc_atomic_float4_min_cpt
406 __kmpc_atomic_float4_mul
407 __kmpc_atomic_float4_mul_cpt
408 __kmpc_atomic_float4_mul_float8
409 __kmpc_atomic_float4_mul_fp
410 __kmpc_atomic_float4_rd
411 __kmpc_atomic_float4_sub
412 __kmpc_atomic_float4_sub_cpt
413 __kmpc_atomic_float4_sub_cpt_rev
414 __kmpc_atomic_float4_sub_float8
415 __kmpc_atomic_float4_sub_fp
416 __kmpc_atomic_float4_sub_rev
417 __kmpc_atomic_float4_swp
418 __kmpc_atomic_float4_wr
419 __kmpc_atomic_float8_add
420 __kmpc_atomic_float8_add_cpt
421 __kmpc_atomic_float8_add_fp
422 __kmpc_atomic_float8_div
423 __kmpc_atomic_float8_div_cpt
424 __kmpc_atomic_float8_div_cpt_rev
425 __kmpc_atomic_float8_div_fp
426 __kmpc_atomic_float8_div_rev
427 __kmpc_atomic_float8_max
428 __kmpc_atomic_float8_max_cpt
429 __kmpc_atomic_float8_min
430 __kmpc_atomic_float8_min_cpt
431 __kmpc_atomic_float8_mul
432 __kmpc_atomic_float8_mul_cpt
433 __kmpc_atomic_float8_mul_fp
434 __kmpc_atomic_float8_rd
435 __kmpc_atomic_float8_sub
436 __kmpc_atomic_float8_sub_cpt
437 __kmpc_atomic_float8_sub_cpt_rev
438 __kmpc_atomic_float8_sub_fp
439 __kmpc_atomic_float8_sub_rev
440 __kmpc_atomic_float8_swp
441 __kmpc_atomic_float8_wr
442 __kmpc_atomic_float10_add
443 __kmpc_atomic_float10_add_cpt
444 __kmpc_atomic_float10_add_fp
445 __kmpc_atomic_float10_div
446 __kmpc_atomic_float10_div_cpt
447 __kmpc_atomic_float10_div_cpt_rev
448 __kmpc_atomic_float10_div_fp
449 __kmpc_atomic_float10_div_rev
450 __kmpc_atomic_float10_mul
451 __kmpc_atomic_float10_mul_cpt
452 __kmpc_atomic_float10_mul_fp
453 __kmpc_atomic_float10_rd
454 __kmpc_atomic_float10_sub
455 __kmpc_atomic_float10_sub_cpt
456 __kmpc_atomic_float10_sub_cpt_rev
457 __kmpc_atomic_float10_sub_fp
458 __kmpc_atomic_float10_sub_rev
459 __kmpc_atomic_float10_swp
460 __kmpc_atomic_float10_wr
461 __kmpc_atomic_float16_add
462 __kmpc_atomic_float16_add_cpt
463 __kmpc_atomic_float16_div
464 __kmpc_atomic_float16_div_cpt
465 __kmpc_atomic_float16_div_cpt_rev
466 __kmpc_atomic_float16_div_rev
467 __kmpc_atomic_float16_max
468 __kmpc_atomic_float16_max_cpt
469 __kmpc_atomic_float16_min
470 __kmpc_atomic_float16_min_cpt
471 __kmpc_atomic_float16_mul
472 __kmpc_atomic_float16_mul_cpt
473 __kmpc_atomic_float16_rd
474 __kmpc_atomic_float16_sub
475 __kmpc_atomic_float16_sub_cpt
476 __kmpc_atomic_float16_sub_cpt_rev
477 __kmpc_atomic_float16_sub_rev
478 __kmpc_atomic_float16_swp
479 __kmpc_atomic_float16_wr
482 Functions for Complex types
483 ---------------------------
484 Functions for complex types whose component floating point variables are of size
485 4,8,10 or 16 bytes. The names here are based on the size of the component float,
486 *not* the size of the complex type. So `__kmpc_atomic_cmplx8_add` is an
487 operation on a `complex<double>` or `complex(kind=8)`, *not* `complex<float>`.
490 __kmpc_atomic_cmplx4_add
491 __kmpc_atomic_cmplx4_add_cmplx8
492 __kmpc_atomic_cmplx4_add_cpt
493 __kmpc_atomic_cmplx4_div
494 __kmpc_atomic_cmplx4_div_cmplx8
495 __kmpc_atomic_cmplx4_div_cpt
496 __kmpc_atomic_cmplx4_div_cpt_rev
497 __kmpc_atomic_cmplx4_div_rev
498 __kmpc_atomic_cmplx4_mul
499 __kmpc_atomic_cmplx4_mul_cmplx8
500 __kmpc_atomic_cmplx4_mul_cpt
501 __kmpc_atomic_cmplx4_rd
502 __kmpc_atomic_cmplx4_sub
503 __kmpc_atomic_cmplx4_sub_cmplx8
504 __kmpc_atomic_cmplx4_sub_cpt
505 __kmpc_atomic_cmplx4_sub_cpt_rev
506 __kmpc_atomic_cmplx4_sub_rev
507 __kmpc_atomic_cmplx4_swp
508 __kmpc_atomic_cmplx4_wr
509 __kmpc_atomic_cmplx8_add
510 __kmpc_atomic_cmplx8_add_cpt
511 __kmpc_atomic_cmplx8_div
512 __kmpc_atomic_cmplx8_div_cpt
513 __kmpc_atomic_cmplx8_div_cpt_rev
514 __kmpc_atomic_cmplx8_div_rev
515 __kmpc_atomic_cmplx8_mul
516 __kmpc_atomic_cmplx8_mul_cpt
517 __kmpc_atomic_cmplx8_rd
518 __kmpc_atomic_cmplx8_sub
519 __kmpc_atomic_cmplx8_sub_cpt
520 __kmpc_atomic_cmplx8_sub_cpt_rev
521 __kmpc_atomic_cmplx8_sub_rev
522 __kmpc_atomic_cmplx8_swp
523 __kmpc_atomic_cmplx8_wr
524 __kmpc_atomic_cmplx10_add
525 __kmpc_atomic_cmplx10_add_cpt
526 __kmpc_atomic_cmplx10_div
527 __kmpc_atomic_cmplx10_div_cpt
528 __kmpc_atomic_cmplx10_div_cpt_rev
529 __kmpc_atomic_cmplx10_div_rev
530 __kmpc_atomic_cmplx10_mul
531 __kmpc_atomic_cmplx10_mul_cpt
532 __kmpc_atomic_cmplx10_rd
533 __kmpc_atomic_cmplx10_sub
534 __kmpc_atomic_cmplx10_sub_cpt
535 __kmpc_atomic_cmplx10_sub_cpt_rev
536 __kmpc_atomic_cmplx10_sub_rev
537 __kmpc_atomic_cmplx10_swp
538 __kmpc_atomic_cmplx10_wr
539 __kmpc_atomic_cmplx16_add
540 __kmpc_atomic_cmplx16_add_cpt
541 __kmpc_atomic_cmplx16_div
542 __kmpc_atomic_cmplx16_div_cpt
543 __kmpc_atomic_cmplx16_div_cpt_rev
544 __kmpc_atomic_cmplx16_div_rev
545 __kmpc_atomic_cmplx16_mul
546 __kmpc_atomic_cmplx16_mul_cpt
547 __kmpc_atomic_cmplx16_rd
548 __kmpc_atomic_cmplx16_sub
549 __kmpc_atomic_cmplx16_sub_cpt
550 __kmpc_atomic_cmplx16_sub_cpt_rev
551 __kmpc_atomic_cmplx16_swp
552 __kmpc_atomic_cmplx16_wr
565 #ifndef KMP_GOMP_COMPAT
566 int __kmp_atomic_mode = 1; // Intel perf
568 int __kmp_atomic_mode = 2; // GOMP compatibility
569 #endif /* KMP_GOMP_COMPAT */
573 // Control access to all user coded atomics in Gnu compat mode
574 kmp_atomic_lock_t __kmp_atomic_lock;
575 // Control access to all user coded atomics for 1-byte fixed data types
576 kmp_atomic_lock_t __kmp_atomic_lock_1i;
577 // Control access to all user coded atomics for 2-byte fixed data types
578 kmp_atomic_lock_t __kmp_atomic_lock_2i;
579 // Control access to all user coded atomics for 4-byte fixed data types
580 kmp_atomic_lock_t __kmp_atomic_lock_4i;
581 // Control access to all user coded atomics for kmp_real32 data type
582 kmp_atomic_lock_t __kmp_atomic_lock_4r;
583 // Control access to all user coded atomics for 8-byte fixed data types
584 kmp_atomic_lock_t __kmp_atomic_lock_8i;
585 // Control access to all user coded atomics for kmp_real64 data type
586 kmp_atomic_lock_t __kmp_atomic_lock_8r;
587 // Control access to all user coded atomics for complex byte data type
588 kmp_atomic_lock_t __kmp_atomic_lock_8c;
589 // Control access to all user coded atomics for long double data type
590 kmp_atomic_lock_t __kmp_atomic_lock_10r;
591 // Control access to all user coded atomics for _Quad data type
592 kmp_atomic_lock_t __kmp_atomic_lock_16r;
593 // Control access to all user coded atomics for double complex data type
594 kmp_atomic_lock_t __kmp_atomic_lock_16c;
595 // Control access to all user coded atomics for long double complex type
596 kmp_atomic_lock_t __kmp_atomic_lock_20c;
597 // Control access to all user coded atomics for _Quad complex data type
598 kmp_atomic_lock_t __kmp_atomic_lock_32c;
601 Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug
602 on *_32 and *_32e. This is just a temporary workaround for the problem. It
603 seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines
604 in assembler language. */
605 #define KMP_ATOMIC_VOLATILE volatile
607 #if (KMP_ARCH_X86) && KMP_HAVE_QUAD
609 static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
612 static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
615 static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
618 static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) {
621 static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) {
622 return lhs.q < rhs.q;
624 static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) {
625 return lhs.q > rhs.q;
628 static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
631 static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
634 static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
637 static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) {
640 static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) {
641 return lhs.q < rhs.q;
643 static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) {
644 return lhs.q > rhs.q;
647 static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
650 static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
653 static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
656 static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) {
660 static inline void operator+=(kmp_cmplx128_a16_t &lhs,
661 kmp_cmplx128_a16_t &rhs) {
664 static inline void operator-=(kmp_cmplx128_a16_t &lhs,
665 kmp_cmplx128_a16_t &rhs) {
668 static inline void operator*=(kmp_cmplx128_a16_t &lhs,
669 kmp_cmplx128_a16_t &rhs) {
672 static inline void operator/=(kmp_cmplx128_a16_t &lhs,
673 kmp_cmplx128_a16_t &rhs) {
677 #endif // (KMP_ARCH_X86) && KMP_HAVE_QUAD
679 // ATOMIC implementation routines -----------------------------------------
680 // One routine for each operation and operand type.
681 // All routines declarations looks like
682 // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs );
684 #define KMP_CHECK_GTID \
685 if (gtid == KMP_GTID_UNKNOWN) { \
686 gtid = __kmp_entry_gtid(); \
687 } // check and get gtid when needed
689 // Beginning of a definition (provides name, parameters, gebug trace)
690 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
692 // OP_ID - operation identifier (add, sub, mul, ...)
693 // TYPE - operands' type
694 #define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
695 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
696 TYPE *lhs, TYPE rhs) { \
697 KMP_DEBUG_ASSERT(__kmp_init_serial); \
698 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
700 // ------------------------------------------------------------------------
701 // Lock variables used for critical sections for various size operands
702 #define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat
703 #define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char
704 #define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short
705 #define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int
706 #define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float
707 #define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int
708 #define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double
709 #define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex
710 #define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double
711 #define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad
712 #define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex
713 #define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex
714 #define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex
716 // ------------------------------------------------------------------------
717 // Operation on *lhs, rhs bound by critical section
718 // OP - operator (it's supposed to contain an assignment)
719 // LCK_ID - lock identifier
720 // Note: don't check gtid as it should always be valid
721 // 1, 2-byte - expect valid parameter, other - check before this macro
722 #define OP_CRITICAL(OP, LCK_ID) \
723 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
727 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
729 // ------------------------------------------------------------------------
730 // For GNU compatibility, we may need to use a critical section,
731 // even though it is not required by the ISA.
733 // On IA-32 architecture, all atomic operations except for fixed 4 byte add,
734 // sub, and bitwise logical ops, and 1 & 2 byte logical ops use a common
735 // critical section. On Intel(R) 64, all atomic operations are done with fetch
736 // and add or compare and exchange. Therefore, the FLAG parameter to this
737 // macro is either KMP_ARCH_X86 or 0 (or 1, for Intel-specific extension which
738 // require a critical section, where we predict that they will be implemented
739 // in the Gnu codegen by calling GOMP_atomic_start() / GOMP_atomic_end()).
741 // When the OP_GOMP_CRITICAL macro is used in a *CRITICAL* macro construct,
742 // the FLAG parameter should always be 1. If we know that we will be using
743 // a critical section, then we want to make certain that we use the generic
744 // lock __kmp_atomic_lock to protect the atomic update, and not of of the
745 // locks that are specialized based upon the size or type of the data.
747 // If FLAG is 0, then we are relying on dead code elimination by the build
748 // compiler to get rid of the useless block of code, and save a needless
749 // branch at runtime.
751 #ifdef KMP_GOMP_COMPAT
752 #define OP_GOMP_CRITICAL(OP, FLAG) \
753 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
755 OP_CRITICAL(OP, 0); \
759 #define OP_GOMP_CRITICAL(OP, FLAG)
760 #endif /* KMP_GOMP_COMPAT */
763 #define KMP_DO_PAUSE _mm_delay_32(1)
765 #define KMP_DO_PAUSE KMP_CPU_PAUSE()
768 // ------------------------------------------------------------------------
769 // Operation on *lhs, rhs using "compare_and_store" routine
770 // TYPE - operands' type
771 // BITS - size in bits, used to distinguish low level calls
773 #define OP_CMPXCHG(TYPE, BITS, OP) \
775 TYPE old_value, new_value; \
776 old_value = *(TYPE volatile *)lhs; \
777 new_value = old_value OP rhs; \
778 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
779 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
780 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
783 old_value = *(TYPE volatile *)lhs; \
784 new_value = old_value OP rhs; \
790 // workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32
791 // and win_32e are affected (I verified the asm). Compiler ignores the volatile
792 // qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the
793 // compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of
795 #define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
799 kmp_int##BITS *vvv; \
801 struct _sss old_value, new_value; \
802 old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \
803 new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \
804 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
805 new_value.cmp = old_value.cmp OP rhs; \
806 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
807 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \
808 *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \
811 *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \
812 new_value.cmp = old_value.cmp OP rhs; \
815 // end of the first part of the workaround for C78287
816 #endif // USE_CMPXCHG_FIX
818 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
820 // ------------------------------------------------------------------------
821 // X86 or X86_64: no alignment problems ====================================
822 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
824 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
825 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
826 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
827 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
829 // -------------------------------------------------------------------------
830 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
832 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
834 OP_CMPXCHG(TYPE, BITS, OP) \
837 // -------------------------------------------------------------------------
838 // workaround for C78287 (complex(kind=4) data type)
839 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
841 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
842 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
843 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
845 // end of the second part of the workaround for C78287
846 #endif // USE_CMPXCHG_FIX
849 // -------------------------------------------------------------------------
850 // Code for other architectures that don't handle unaligned accesses.
851 #define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
853 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
854 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
855 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
856 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
857 KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
860 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
863 // -------------------------------------------------------------------------
864 #define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
866 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
867 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
868 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
869 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
872 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
876 // -------------------------------------------------------------------------
877 // workaround for C78287 (complex(kind=4) data type)
878 #define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \
880 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
881 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
882 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
883 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
886 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
889 // end of the second part of the workaround for C78287
890 #endif // USE_CMPXCHG_FIX
891 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
893 // Routines for ATOMIC 4-byte operands addition and subtraction
894 ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3,
895 0) // __kmpc_atomic_fixed4_add
896 ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3,
897 0) // __kmpc_atomic_fixed4_sub
899 ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3,
900 KMP_ARCH_X86) // __kmpc_atomic_float4_add
901 ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3,
902 KMP_ARCH_X86) // __kmpc_atomic_float4_sub
904 // Routines for ATOMIC 8-byte operands addition and subtraction
905 ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7,
906 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add
907 ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7,
908 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub
910 ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7,
911 KMP_ARCH_X86) // __kmpc_atomic_float8_add
912 ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7,
913 KMP_ARCH_X86) // __kmpc_atomic_float8_sub
915 // ------------------------------------------------------------------------
916 // Entries definition for integer operands
917 // TYPE_ID - operands type and size (fixed4, float4)
918 // OP_ID - operation identifier (add, sub, mul, ...)
919 // TYPE - operand type
920 // BITS - size in bits, used to distinguish low level calls
921 // OP - operator (used in critical section)
922 // LCK_ID - lock identifier, used to possibly distinguish lock variable
923 // MASK - used for alignment check
925 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,MASK,GOMP_FLAG
926 // ------------------------------------------------------------------------
927 // Routines for ATOMIC integer operands, other operators
928 // ------------------------------------------------------------------------
929 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
930 ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0,
931 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add
932 ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0,
933 0) // __kmpc_atomic_fixed1_andb
934 ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0,
935 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div
936 ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0,
937 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div
938 ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0,
939 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul
940 ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0,
941 0) // __kmpc_atomic_fixed1_orb
942 ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0,
943 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl
944 ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0,
945 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr
946 ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0,
947 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr
948 ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0,
949 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub
950 ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0,
951 0) // __kmpc_atomic_fixed1_xor
952 ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1,
953 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add
954 ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1,
955 0) // __kmpc_atomic_fixed2_andb
956 ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1,
957 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div
958 ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1,
959 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div
960 ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1,
961 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul
962 ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1,
963 0) // __kmpc_atomic_fixed2_orb
964 ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1,
965 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl
966 ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1,
967 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr
968 ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1,
969 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr
970 ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1,
971 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub
972 ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1,
973 0) // __kmpc_atomic_fixed2_xor
974 ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3,
975 0) // __kmpc_atomic_fixed4_andb
976 ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3,
977 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div
978 ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3,
979 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div
980 ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3,
981 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul
982 ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3,
983 0) // __kmpc_atomic_fixed4_orb
984 ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3,
985 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl
986 ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3,
987 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr
988 ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3,
989 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr
990 ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3,
991 0) // __kmpc_atomic_fixed4_xor
992 ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7,
993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb
994 ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7,
995 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div
996 ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7,
997 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div
998 ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7,
999 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul
1000 ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7,
1001 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb
1002 ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7,
1003 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl
1004 ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7,
1005 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr
1006 ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7,
1007 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr
1008 ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7,
1009 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor
1010 ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3,
1011 KMP_ARCH_X86) // __kmpc_atomic_float4_div
1012 ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3,
1013 KMP_ARCH_X86) // __kmpc_atomic_float4_mul
1014 ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7,
1015 KMP_ARCH_X86) // __kmpc_atomic_float8_div
1016 ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7,
1017 KMP_ARCH_X86) // __kmpc_atomic_float8_mul
1018 // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG
1020 /* ------------------------------------------------------------------------ */
1021 /* Routines for C/C++ Reduction operators && and || */
1023 // ------------------------------------------------------------------------
1024 // Need separate macros for &&, || because there is no combined assignment
1025 // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used
1026 #define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1027 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1028 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1029 OP_CRITICAL(= *lhs OP, LCK_ID) \
1032 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1034 // ------------------------------------------------------------------------
1035 // X86 or X86_64: no alignment problems ===================================
1036 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1037 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1038 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1039 OP_CMPXCHG(TYPE, BITS, OP) \
1043 // ------------------------------------------------------------------------
1044 // Code for other architectures that don't handle unaligned accesses.
1045 #define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \
1046 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1047 OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \
1048 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1049 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1052 OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \
1055 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1057 ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0,
1058 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl
1059 ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0,
1060 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl
1061 ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1,
1062 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl
1063 ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1,
1064 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl
1065 ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3,
1066 0) // __kmpc_atomic_fixed4_andl
1067 ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3,
1068 0) // __kmpc_atomic_fixed4_orl
1069 ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7,
1070 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl
1071 ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7,
1072 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl
1074 /* ------------------------------------------------------------------------- */
1075 /* Routines for Fortran operators that matched no one in C: */
1076 /* MAX, MIN, .EQV., .NEQV. */
1077 /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */
1078 /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */
1080 // -------------------------------------------------------------------------
1081 // MIN and MAX need separate macros
1082 // OP - operator to check if we need any actions?
1083 #define MIN_MAX_CRITSECT(OP, LCK_ID) \
1084 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1086 if (*lhs OP rhs) { /* still need actions? */ \
1089 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1091 // -------------------------------------------------------------------------
1092 #ifdef KMP_GOMP_COMPAT
1093 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \
1094 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1096 MIN_MAX_CRITSECT(OP, 0); \
1100 #define GOMP_MIN_MAX_CRITSECT(OP, FLAG)
1101 #endif /* KMP_GOMP_COMPAT */
1103 // -------------------------------------------------------------------------
1104 #define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1106 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1109 old_value = temp_val; \
1110 while (old_value OP rhs && /* still need actions? */ \
1111 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
1112 (kmp_int##BITS *)lhs, \
1113 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1114 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
1117 old_value = temp_val; \
1121 // -------------------------------------------------------------------------
1122 // 1-byte, 2-byte operands - use critical section
1123 #define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1124 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1125 if (*lhs OP rhs) { /* need actions? */ \
1126 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1127 MIN_MAX_CRITSECT(OP, LCK_ID) \
1131 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1133 // -------------------------------------------------------------------------
1134 // X86 or X86_64: no alignment problems ====================================
1135 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1137 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1138 if (*lhs OP rhs) { \
1139 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1140 MIN_MAX_CMPXCHG(TYPE, BITS, OP) \
1145 // -------------------------------------------------------------------------
1146 // Code for other architectures that don't handle unaligned accesses.
1147 #define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1149 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1150 if (*lhs OP rhs) { \
1151 GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \
1152 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1153 MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1156 MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \
1160 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1162 MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0,
1163 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max
1164 MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0,
1165 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min
1166 MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1,
1167 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max
1168 MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1,
1169 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min
1170 MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3,
1171 0) // __kmpc_atomic_fixed4_max
1172 MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3,
1173 0) // __kmpc_atomic_fixed4_min
1174 MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7,
1175 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max
1176 MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7,
1177 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min
1178 MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3,
1179 KMP_ARCH_X86) // __kmpc_atomic_float4_max
1180 MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3,
1181 KMP_ARCH_X86) // __kmpc_atomic_float4_min
1182 MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7,
1183 KMP_ARCH_X86) // __kmpc_atomic_float8_max
1184 MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7,
1185 KMP_ARCH_X86) // __kmpc_atomic_float8_min
1187 MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r,
1188 1) // __kmpc_atomic_float16_max
1189 MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r,
1190 1) // __kmpc_atomic_float16_min
1192 MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r,
1193 1) // __kmpc_atomic_float16_max_a16
1194 MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r,
1195 1) // __kmpc_atomic_float16_min_a16
1196 #endif // (KMP_ARCH_X86)
1197 #endif // KMP_HAVE_QUAD
1198 // ------------------------------------------------------------------------
1199 // Need separate macros for .EQV. because of the need of complement (~)
1200 // OP ignored for critical sections, ^=~ used instead
1201 #define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1202 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1203 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1204 OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \
1207 // ------------------------------------------------------------------------
1208 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1209 // ------------------------------------------------------------------------
1210 // X86 or X86_64: no alignment problems ===================================
1211 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1213 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1214 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \
1215 OP_CMPXCHG(TYPE, BITS, OP) \
1217 // ------------------------------------------------------------------------
1219 // ------------------------------------------------------------------------
1220 // Code for other architectures that don't handle unaligned accesses.
1221 #define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \
1223 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1224 OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \
1225 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1226 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1229 OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \
1232 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1234 ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0,
1235 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv
1236 ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1,
1237 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv
1238 ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3,
1239 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv
1240 ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7,
1241 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv
1242 ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0,
1243 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv
1244 ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1,
1245 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv
1246 ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3,
1247 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv
1248 ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7,
1249 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv
1251 // ------------------------------------------------------------------------
1252 // Routines for Extended types: long double, _Quad, complex flavours (use
1253 // critical section)
1254 // TYPE_ID, OP_ID, TYPE - detailed above
1256 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1257 #define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1258 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
1259 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1260 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1263 /* ------------------------------------------------------------------------- */
1264 // routines for long double type
1265 ATOMIC_CRITICAL(float10, add, long double, +, 10r,
1266 1) // __kmpc_atomic_float10_add
1267 ATOMIC_CRITICAL(float10, sub, long double, -, 10r,
1268 1) // __kmpc_atomic_float10_sub
1269 ATOMIC_CRITICAL(float10, mul, long double, *, 10r,
1270 1) // __kmpc_atomic_float10_mul
1271 ATOMIC_CRITICAL(float10, div, long double, /, 10r,
1272 1) // __kmpc_atomic_float10_div
1274 // routines for _Quad type
1275 ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r,
1276 1) // __kmpc_atomic_float16_add
1277 ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r,
1278 1) // __kmpc_atomic_float16_sub
1279 ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r,
1280 1) // __kmpc_atomic_float16_mul
1281 ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r,
1282 1) // __kmpc_atomic_float16_div
1284 ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r,
1285 1) // __kmpc_atomic_float16_add_a16
1286 ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r,
1287 1) // __kmpc_atomic_float16_sub_a16
1288 ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r,
1289 1) // __kmpc_atomic_float16_mul_a16
1290 ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r,
1291 1) // __kmpc_atomic_float16_div_a16
1292 #endif // (KMP_ARCH_X86)
1293 #endif // KMP_HAVE_QUAD
1294 // routines for complex types
1297 // workaround for C78287 (complex(kind=4) data type)
1298 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7,
1299 1) // __kmpc_atomic_cmplx4_add
1300 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7,
1301 1) // __kmpc_atomic_cmplx4_sub
1302 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7,
1303 1) // __kmpc_atomic_cmplx4_mul
1304 ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7,
1305 1) // __kmpc_atomic_cmplx4_div
1306 // end of the workaround for C78287
1308 ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add
1309 ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub
1310 ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul
1311 ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div
1312 #endif // USE_CMPXCHG_FIX
1314 ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add
1315 ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub
1316 ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul
1317 ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div
1318 ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c,
1319 1) // __kmpc_atomic_cmplx10_add
1320 ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c,
1321 1) // __kmpc_atomic_cmplx10_sub
1322 ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c,
1323 1) // __kmpc_atomic_cmplx10_mul
1324 ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c,
1325 1) // __kmpc_atomic_cmplx10_div
1327 ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c,
1328 1) // __kmpc_atomic_cmplx16_add
1329 ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c,
1330 1) // __kmpc_atomic_cmplx16_sub
1331 ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c,
1332 1) // __kmpc_atomic_cmplx16_mul
1333 ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c,
1334 1) // __kmpc_atomic_cmplx16_div
1336 ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c,
1337 1) // __kmpc_atomic_cmplx16_add_a16
1338 ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1339 1) // __kmpc_atomic_cmplx16_sub_a16
1340 ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c,
1341 1) // __kmpc_atomic_cmplx16_mul_a16
1342 ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1343 1) // __kmpc_atomic_cmplx16_div_a16
1344 #endif // (KMP_ARCH_X86)
1345 #endif // KMP_HAVE_QUAD
1347 // OpenMP 4.0: x = expr binop x for non-commutative operations.
1348 // Supported only on IA-32 architecture and Intel(R) 64
1349 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1351 // ------------------------------------------------------------------------
1352 // Operation on *lhs, rhs bound by critical section
1353 // OP - operator (it's supposed to contain an assignment)
1354 // LCK_ID - lock identifier
1355 // Note: don't check gtid as it should always be valid
1356 // 1, 2-byte - expect valid parameter, other - check before this macro
1357 #define OP_CRITICAL_REV(OP, LCK_ID) \
1358 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1360 (*lhs) = (rhs)OP(*lhs); \
1362 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1364 #ifdef KMP_GOMP_COMPAT
1365 #define OP_GOMP_CRITICAL_REV(OP, FLAG) \
1366 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1368 OP_CRITICAL_REV(OP, 0); \
1372 #define OP_GOMP_CRITICAL_REV(OP, FLAG)
1373 #endif /* KMP_GOMP_COMPAT */
1375 // Beginning of a definition (provides name, parameters, gebug trace)
1376 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1378 // OP_ID - operation identifier (add, sub, mul, ...)
1379 // TYPE - operands' type
1380 #define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1381 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \
1382 TYPE *lhs, TYPE rhs) { \
1383 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1384 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid));
1386 // ------------------------------------------------------------------------
1387 // Operation on *lhs, rhs using "compare_and_store" routine
1388 // TYPE - operands' type
1389 // BITS - size in bits, used to distinguish low level calls
1391 // Note: temp_val introduced in order to force the compiler to read
1392 // *lhs only once (w/o it the compiler reads *lhs twice)
1393 #define OP_CMPXCHG_REV(TYPE, BITS, OP) \
1395 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1396 TYPE old_value, new_value; \
1398 old_value = temp_val; \
1399 new_value = rhs OP old_value; \
1400 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
1401 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
1402 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
1406 old_value = temp_val; \
1407 new_value = rhs OP old_value; \
1411 // -------------------------------------------------------------------------
1412 #define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \
1413 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1414 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1415 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1418 // ------------------------------------------------------------------------
1419 // Entries definition for integer operands
1420 // TYPE_ID - operands type and size (fixed4, float4)
1421 // OP_ID - operation identifier (add, sub, mul, ...)
1422 // TYPE - operand type
1423 // BITS - size in bits, used to distinguish low level calls
1424 // OP - operator (used in critical section)
1425 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1427 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID,GOMP_FLAG
1428 // ------------------------------------------------------------------------
1429 // Routines for ATOMIC integer operands, other operators
1430 // ------------------------------------------------------------------------
1431 // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG
1432 ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i,
1433 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev
1434 ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i,
1435 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev
1436 ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i,
1437 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev
1438 ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i,
1439 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev
1440 ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i,
1441 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev
1442 ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i,
1443 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev
1445 ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i,
1446 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev
1447 ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i,
1448 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev
1449 ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i,
1450 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev
1451 ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i,
1452 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev
1453 ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i,
1454 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev
1455 ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i,
1456 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev
1458 ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i,
1459 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev
1460 ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i,
1461 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev
1462 ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i,
1463 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev
1464 ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i,
1465 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev
1466 ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i,
1467 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev
1468 ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i,
1469 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev
1471 ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i,
1472 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev
1473 ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i,
1474 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev
1475 ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i,
1476 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev
1477 ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i,
1478 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev
1479 ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i,
1480 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev
1481 ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i,
1482 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev
1484 ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r,
1485 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev
1486 ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r,
1487 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev
1489 ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r,
1490 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev
1491 ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r,
1492 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev
1493 // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG
1495 // ------------------------------------------------------------------------
1496 // Routines for Extended types: long double, _Quad, complex flavours (use
1497 // critical section)
1498 // TYPE_ID, OP_ID, TYPE - detailed above
1500 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1501 #define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1502 ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \
1503 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1504 OP_CRITICAL_REV(OP, LCK_ID) \
1507 /* ------------------------------------------------------------------------- */
1508 // routines for long double type
1509 ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r,
1510 1) // __kmpc_atomic_float10_sub_rev
1511 ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r,
1512 1) // __kmpc_atomic_float10_div_rev
1514 // routines for _Quad type
1515 ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r,
1516 1) // __kmpc_atomic_float16_sub_rev
1517 ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r,
1518 1) // __kmpc_atomic_float16_div_rev
1520 ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r,
1521 1) // __kmpc_atomic_float16_sub_a16_rev
1522 ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r,
1523 1) // __kmpc_atomic_float16_div_a16_rev
1524 #endif // KMP_ARCH_X86
1525 #endif // KMP_HAVE_QUAD
1527 // routines for complex types
1528 ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c,
1529 1) // __kmpc_atomic_cmplx4_sub_rev
1530 ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c,
1531 1) // __kmpc_atomic_cmplx4_div_rev
1532 ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c,
1533 1) // __kmpc_atomic_cmplx8_sub_rev
1534 ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c,
1535 1) // __kmpc_atomic_cmplx8_div_rev
1536 ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c,
1537 1) // __kmpc_atomic_cmplx10_sub_rev
1538 ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c,
1539 1) // __kmpc_atomic_cmplx10_div_rev
1541 ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c,
1542 1) // __kmpc_atomic_cmplx16_sub_rev
1543 ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c,
1544 1) // __kmpc_atomic_cmplx16_div_rev
1546 ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c,
1547 1) // __kmpc_atomic_cmplx16_sub_a16_rev
1548 ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c,
1549 1) // __kmpc_atomic_cmplx16_div_a16_rev
1550 #endif // KMP_ARCH_X86
1551 #endif // KMP_HAVE_QUAD
1553 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
1554 // End of OpenMP 4.0: x = expr binop x for non-commutative operations.
1556 /* ------------------------------------------------------------------------ */
1557 /* Routines for mixed types of LHS and RHS, when RHS is "larger" */
1558 /* Note: in order to reduce the total number of types combinations */
1559 /* it is supposed that compiler converts RHS to longest floating type,*/
1560 /* that is _Quad, before call to any of these routines */
1561 /* Conversion to _Quad will be done by the compiler during calculation, */
1562 /* conversion back to TYPE - before the assignment, like: */
1563 /* *lhs = (TYPE)( (_Quad)(*lhs) OP rhs ) */
1564 /* Performance penalty expected because of SW emulation use */
1565 /* ------------------------------------------------------------------------ */
1567 #define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1568 void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
1569 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \
1570 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1572 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
1575 // -------------------------------------------------------------------------
1576 #define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \
1578 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1579 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \
1580 OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \
1583 // -------------------------------------------------------------------------
1584 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1585 // -------------------------------------------------------------------------
1586 // X86 or X86_64: no alignment problems ====================================
1587 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1588 LCK_ID, MASK, GOMP_FLAG) \
1589 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1590 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1591 OP_CMPXCHG(TYPE, BITS, OP) \
1593 // -------------------------------------------------------------------------
1595 // ------------------------------------------------------------------------
1596 // Code for other architectures that don't handle unaligned accesses.
1597 #define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1598 LCK_ID, MASK, GOMP_FLAG) \
1599 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1600 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1601 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1602 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1605 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1608 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1610 // -------------------------------------------------------------------------
1611 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1612 // -------------------------------------------------------------------------
1613 #define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
1614 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
1615 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1616 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1617 OP_CMPXCHG_REV(TYPE, BITS, OP) \
1619 #define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
1620 LCK_ID, GOMP_FLAG) \
1621 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1622 OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \
1623 OP_CRITICAL_REV(OP, LCK_ID) \
1625 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1628 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0,
1629 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8
1630 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0,
1631 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8
1632 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1,
1633 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8
1634 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1,
1635 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8
1636 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3,
1637 0) // __kmpc_atomic_fixed4_mul_float8
1638 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3,
1639 0) // __kmpc_atomic_fixed4_div_float8
1640 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7,
1641 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8
1642 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7,
1643 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8
1644 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3,
1645 KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8
1646 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3,
1647 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8
1648 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3,
1649 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8
1650 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3,
1651 KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8
1653 // RHS=float16 (deprecated, to be removed when we are sure the compiler does not
1656 ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0,
1657 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp
1658 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0,
1659 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp
1660 ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0,
1661 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp
1662 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0,
1663 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp
1664 ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0,
1665 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp
1666 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0,
1667 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp
1668 ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0,
1669 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp
1670 ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0,
1671 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp
1673 ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1,
1674 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp
1675 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1,
1676 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp
1677 ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1,
1678 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp
1679 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1,
1680 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp
1681 ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1,
1682 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp
1683 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1,
1684 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp
1685 ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1,
1686 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp
1687 ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1,
1688 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp
1690 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3,
1691 0) // __kmpc_atomic_fixed4_add_fp
1692 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3,
1693 0) // __kmpc_atomic_fixed4u_add_fp
1694 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3,
1695 0) // __kmpc_atomic_fixed4_sub_fp
1696 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3,
1697 0) // __kmpc_atomic_fixed4u_sub_fp
1698 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3,
1699 0) // __kmpc_atomic_fixed4_mul_fp
1700 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3,
1701 0) // __kmpc_atomic_fixed4u_mul_fp
1702 ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3,
1703 0) // __kmpc_atomic_fixed4_div_fp
1704 ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3,
1705 0) // __kmpc_atomic_fixed4u_div_fp
1707 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7,
1708 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp
1709 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7,
1710 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp
1711 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7,
1712 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp
1713 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7,
1714 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp
1715 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7,
1716 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp
1717 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7,
1718 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp
1719 ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7,
1720 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp
1721 ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7,
1722 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp
1724 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3,
1725 KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp
1726 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3,
1727 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp
1728 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3,
1729 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp
1730 ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3,
1731 KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp
1733 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7,
1734 KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp
1735 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7,
1736 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp
1737 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7,
1738 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp
1739 ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7,
1740 KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp
1742 ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r,
1743 1) // __kmpc_atomic_float10_add_fp
1744 ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r,
1745 1) // __kmpc_atomic_float10_sub_fp
1746 ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r,
1747 1) // __kmpc_atomic_float10_mul_fp
1748 ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r,
1749 1) // __kmpc_atomic_float10_div_fp
1751 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1752 // Reverse operations
1753 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0,
1754 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp
1755 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0,
1756 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp
1757 ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0,
1758 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp
1759 ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0,
1760 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp
1762 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1,
1763 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp
1764 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1,
1765 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp
1766 ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1,
1767 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp
1768 ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1,
1769 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp
1771 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1772 0) // __kmpc_atomic_fixed4_sub_rev_fp
1773 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3,
1774 0) // __kmpc_atomic_fixed4u_sub_rev_fp
1775 ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3,
1776 0) // __kmpc_atomic_fixed4_div_rev_fp
1777 ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3,
1778 0) // __kmpc_atomic_fixed4u_div_rev_fp
1780 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1781 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp
1782 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7,
1783 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp
1784 ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7,
1785 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp
1786 ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7,
1787 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp
1789 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3,
1790 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp
1791 ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3,
1792 KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp
1794 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7,
1795 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp
1796 ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7,
1797 KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp
1799 ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r,
1800 1) // __kmpc_atomic_float10_sub_rev_fp
1801 ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r,
1802 1) // __kmpc_atomic_float10_div_rev_fp
1803 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1805 #endif // KMP_HAVE_QUAD
1807 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1808 // ------------------------------------------------------------------------
1809 // X86 or X86_64: no alignment problems ====================================
1811 // workaround for C78287 (complex(kind=4) data type)
1812 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1813 LCK_ID, MASK, GOMP_FLAG) \
1814 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1815 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1816 OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \
1818 // end of the second part of the workaround for C78287
1820 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1821 LCK_ID, MASK, GOMP_FLAG) \
1822 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1823 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1824 OP_CMPXCHG(TYPE, BITS, OP) \
1826 #endif // USE_CMPXCHG_FIX
1828 // ------------------------------------------------------------------------
1829 // Code for other architectures that don't handle unaligned accesses.
1830 #define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \
1831 LCK_ID, MASK, GOMP_FLAG) \
1832 ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \
1833 OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \
1834 if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \
1835 OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \
1838 OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \
1841 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1843 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c,
1844 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8
1845 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c,
1846 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8
1847 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c,
1848 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8
1849 ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c,
1850 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8
1852 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64
1853 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1855 // ------------------------------------------------------------------------
1856 // Atomic READ routines
1858 // ------------------------------------------------------------------------
1859 // Beginning of a definition (provides name, parameters, gebug trace)
1860 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
1862 // OP_ID - operation identifier (add, sub, mul, ...)
1863 // TYPE - operands' type
1864 #define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
1865 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
1867 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1868 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1870 // ------------------------------------------------------------------------
1871 // Operation on *lhs, rhs using "compare_and_store_ret" routine
1872 // TYPE - operands' type
1873 // BITS - size in bits, used to distinguish low level calls
1875 // Note: temp_val introduced in order to force the compiler to read
1876 // *lhs only once (w/o it the compiler reads *lhs twice)
1877 // TODO: check if it is still necessary
1878 // Return old value regardless of the result of "compare & swap# operation
1879 #define OP_CMPXCHG_READ(TYPE, BITS, OP) \
1881 TYPE KMP_ATOMIC_VOLATILE temp_val; \
1884 kmp_int##BITS i_val; \
1886 union f_i_union old_value; \
1888 old_value.f_val = temp_val; \
1889 old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \
1890 (kmp_int##BITS *)loc, \
1891 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \
1892 *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \
1893 new_value = old_value.f_val; \
1897 // -------------------------------------------------------------------------
1898 // Operation on *lhs, rhs bound by critical section
1899 // OP - operator (it's supposed to contain an assignment)
1900 // LCK_ID - lock identifier
1901 // Note: don't check gtid as it should always be valid
1902 // 1, 2-byte - expect valid parameter, other - check before this macro
1903 #define OP_CRITICAL_READ(OP, LCK_ID) \
1904 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1906 new_value = (*loc); \
1908 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1910 // -------------------------------------------------------------------------
1911 #ifdef KMP_GOMP_COMPAT
1912 #define OP_GOMP_CRITICAL_READ(OP, FLAG) \
1913 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1915 OP_CRITICAL_READ(OP, 0); \
1919 #define OP_GOMP_CRITICAL_READ(OP, FLAG)
1920 #endif /* KMP_GOMP_COMPAT */
1922 // -------------------------------------------------------------------------
1923 #define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1924 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1926 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1927 new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \
1930 // -------------------------------------------------------------------------
1931 #define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
1932 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1934 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \
1935 OP_CMPXCHG_READ(TYPE, BITS, OP) \
1937 // ------------------------------------------------------------------------
1938 // Routines for Extended types: long double, _Quad, complex flavours (use
1939 // critical section)
1940 // TYPE_ID, OP_ID, TYPE - detailed above
1942 // LCK_ID - lock identifier, used to possibly distinguish lock variable
1943 #define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1944 ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \
1946 OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \
1947 OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \
1951 // ------------------------------------------------------------------------
1952 // Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return
1953 // value doesn't work.
1954 // Let's return the read value through the additional parameter.
1955 #if (KMP_OS_WINDOWS)
1957 #define OP_CRITICAL_READ_WRK(OP, LCK_ID) \
1958 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
1962 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
1963 // ------------------------------------------------------------------------
1964 #ifdef KMP_GOMP_COMPAT
1965 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \
1966 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
1968 OP_CRITICAL_READ_WRK(OP, 0); \
1971 #define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG)
1972 #endif /* KMP_GOMP_COMPAT */
1973 // ------------------------------------------------------------------------
1974 #define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1975 void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \
1977 KMP_DEBUG_ASSERT(__kmp_init_serial); \
1978 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
1980 // ------------------------------------------------------------------------
1981 #define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
1982 ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \
1983 OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \
1984 OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \
1987 #endif // KMP_OS_WINDOWS
1989 // ------------------------------------------------------------------------
1990 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
1991 ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd
1992 ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +,
1993 KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd
1994 ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +,
1995 KMP_ARCH_X86) // __kmpc_atomic_float4_rd
1996 ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +,
1997 KMP_ARCH_X86) // __kmpc_atomic_float8_rd
1999 // !!! TODO: Remove lock operations for "char" since it can't be non-atomic
2000 ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +,
2001 KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd
2002 ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +,
2003 KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd
2005 ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r,
2006 1) // __kmpc_atomic_float10_rd
2008 ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r,
2009 1) // __kmpc_atomic_float16_rd
2010 #endif // KMP_HAVE_QUAD
2012 // Fix for CQ220361 on Windows* OS
2013 #if (KMP_OS_WINDOWS)
2014 ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c,
2015 1) // __kmpc_atomic_cmplx4_rd
2017 ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c,
2018 1) // __kmpc_atomic_cmplx4_rd
2019 #endif // (KMP_OS_WINDOWS)
2020 ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c,
2021 1) // __kmpc_atomic_cmplx8_rd
2022 ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c,
2023 1) // __kmpc_atomic_cmplx10_rd
2025 ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c,
2026 1) // __kmpc_atomic_cmplx16_rd
2028 ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r,
2029 1) // __kmpc_atomic_float16_a16_rd
2030 ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c,
2031 1) // __kmpc_atomic_cmplx16_a16_rd
2032 #endif // (KMP_ARCH_X86)
2033 #endif // KMP_HAVE_QUAD
2035 // ------------------------------------------------------------------------
2036 // Atomic WRITE routines
2038 #define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2039 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2040 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2041 KMP_XCHG_FIXED##BITS(lhs, rhs); \
2043 // ------------------------------------------------------------------------
2044 #define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2045 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2046 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2047 KMP_XCHG_REAL##BITS(lhs, rhs); \
2050 // ------------------------------------------------------------------------
2051 // Operation on *lhs, rhs using "compare_and_store" routine
2052 // TYPE - operands' type
2053 // BITS - size in bits, used to distinguish low level calls
2055 // Note: temp_val introduced in order to force the compiler to read
2056 // *lhs only once (w/o it the compiler reads *lhs twice)
2057 #define OP_CMPXCHG_WR(TYPE, BITS, OP) \
2059 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2060 TYPE old_value, new_value; \
2062 old_value = temp_val; \
2064 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2065 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2066 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2070 old_value = temp_val; \
2075 // -------------------------------------------------------------------------
2076 #define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2077 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2078 OP_GOMP_CRITICAL(OP, GOMP_FLAG) \
2079 OP_CMPXCHG_WR(TYPE, BITS, OP) \
2082 // ------------------------------------------------------------------------
2083 // Routines for Extended types: long double, _Quad, complex flavours (use
2084 // critical section)
2085 // TYPE_ID, OP_ID, TYPE - detailed above
2087 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2088 #define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2089 ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \
2090 OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \
2091 OP_CRITICAL(OP, LCK_ID) /* send assignment */ \
2093 // -------------------------------------------------------------------------
2095 ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =,
2096 KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr
2097 ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =,
2098 KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr
2099 ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =,
2100 KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr
2102 ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =,
2103 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2105 ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =,
2106 KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr
2107 #endif // (KMP_ARCH_X86)
2109 ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =,
2110 KMP_ARCH_X86) // __kmpc_atomic_float4_wr
2112 ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =,
2113 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2115 ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =,
2116 KMP_ARCH_X86) // __kmpc_atomic_float8_wr
2117 #endif // (KMP_ARCH_X86)
2119 ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r,
2120 1) // __kmpc_atomic_float10_wr
2122 ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r,
2123 1) // __kmpc_atomic_float16_wr
2124 #endif // KMP_HAVE_QUAD
2125 ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr
2126 ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c,
2127 1) // __kmpc_atomic_cmplx8_wr
2128 ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c,
2129 1) // __kmpc_atomic_cmplx10_wr
2131 ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c,
2132 1) // __kmpc_atomic_cmplx16_wr
2134 ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r,
2135 1) // __kmpc_atomic_float16_a16_wr
2136 ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c,
2137 1) // __kmpc_atomic_cmplx16_a16_wr
2138 #endif // (KMP_ARCH_X86)
2139 #endif // KMP_HAVE_QUAD
2141 // ------------------------------------------------------------------------
2142 // Atomic CAPTURE routines
2144 // Beginning of a definition (provides name, parameters, gebug trace)
2145 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2147 // OP_ID - operation identifier (add, sub, mul, ...)
2148 // TYPE - operands' type
2149 #define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \
2150 RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \
2151 TYPE *lhs, TYPE rhs, int flag) { \
2152 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2153 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2155 // -------------------------------------------------------------------------
2156 // Operation on *lhs, rhs bound by critical section
2157 // OP - operator (it's supposed to contain an assignment)
2158 // LCK_ID - lock identifier
2159 // Note: don't check gtid as it should always be valid
2160 // 1, 2-byte - expect valid parameter, other - check before this macro
2161 #define OP_CRITICAL_CPT(OP, LCK_ID) \
2162 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2166 new_value = (*lhs); \
2168 new_value = (*lhs); \
2172 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2175 // ------------------------------------------------------------------------
2176 #ifdef KMP_GOMP_COMPAT
2177 #define OP_GOMP_CRITICAL_CPT(OP, FLAG) \
2178 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2180 OP_CRITICAL_CPT(OP## =, 0); \
2183 #define OP_GOMP_CRITICAL_CPT(OP, FLAG)
2184 #endif /* KMP_GOMP_COMPAT */
2186 // ------------------------------------------------------------------------
2187 // Operation on *lhs, rhs using "compare_and_store" routine
2188 // TYPE - operands' type
2189 // BITS - size in bits, used to distinguish low level calls
2191 // Note: temp_val introduced in order to force the compiler to read
2192 // *lhs only once (w/o it the compiler reads *lhs twice)
2193 #define OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2195 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2196 TYPE old_value, new_value; \
2198 old_value = temp_val; \
2199 new_value = old_value OP rhs; \
2200 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2201 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2202 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2206 old_value = temp_val; \
2207 new_value = old_value OP rhs; \
2215 // -------------------------------------------------------------------------
2216 #define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2217 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2219 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2220 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2223 // -------------------------------------------------------------------------
2224 #define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2225 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2226 TYPE old_value, new_value; \
2227 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2228 /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \
2229 old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \
2231 return old_value OP rhs; \
2235 // -------------------------------------------------------------------------
2237 ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +,
2238 0) // __kmpc_atomic_fixed4_add_cpt
2239 ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -,
2240 0) // __kmpc_atomic_fixed4_sub_cpt
2241 ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +,
2242 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt
2243 ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -,
2244 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt
2246 ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +,
2247 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt
2248 ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -,
2249 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt
2250 ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +,
2251 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt
2252 ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -,
2253 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt
2255 // ------------------------------------------------------------------------
2256 // Entries definition for integer operands
2257 // TYPE_ID - operands type and size (fixed4, float4)
2258 // OP_ID - operation identifier (add, sub, mul, ...)
2259 // TYPE - operand type
2260 // BITS - size in bits, used to distinguish low level calls
2261 // OP - operator (used in critical section)
2262 // TYPE_ID,OP_ID, TYPE, BITS,OP,GOMP_FLAG
2263 // ------------------------------------------------------------------------
2264 // Routines for ATOMIC integer operands, other operators
2265 // ------------------------------------------------------------------------
2266 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2267 ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +,
2268 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt
2269 ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &,
2270 0) // __kmpc_atomic_fixed1_andb_cpt
2271 ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /,
2272 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt
2273 ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /,
2274 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt
2275 ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *,
2276 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt
2277 ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |,
2278 0) // __kmpc_atomic_fixed1_orb_cpt
2279 ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<,
2280 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt
2281 ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>,
2282 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt
2283 ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>,
2284 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt
2285 ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -,
2286 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt
2287 ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^,
2288 0) // __kmpc_atomic_fixed1_xor_cpt
2289 ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +,
2290 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt
2291 ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &,
2292 0) // __kmpc_atomic_fixed2_andb_cpt
2293 ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /,
2294 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt
2295 ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /,
2296 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt
2297 ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *,
2298 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt
2299 ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |,
2300 0) // __kmpc_atomic_fixed2_orb_cpt
2301 ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<,
2302 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt
2303 ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>,
2304 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt
2305 ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>,
2306 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt
2307 ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -,
2308 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt
2309 ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^,
2310 0) // __kmpc_atomic_fixed2_xor_cpt
2311 ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &,
2312 0) // __kmpc_atomic_fixed4_andb_cpt
2313 ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /,
2314 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt
2315 ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /,
2316 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt
2317 ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *,
2318 KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt
2319 ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |,
2320 0) // __kmpc_atomic_fixed4_orb_cpt
2321 ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<,
2322 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt
2323 ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>,
2324 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt
2325 ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>,
2326 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt
2327 ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^,
2328 0) // __kmpc_atomic_fixed4_xor_cpt
2329 ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &,
2330 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt
2331 ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /,
2332 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt
2333 ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /,
2334 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt
2335 ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *,
2336 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt
2337 ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |,
2338 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt
2339 ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<,
2340 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt
2341 ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>,
2342 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt
2343 ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>,
2344 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt
2345 ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^,
2346 KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt
2347 ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /,
2348 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt
2349 ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *,
2350 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt
2351 ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /,
2352 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt
2353 ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *,
2354 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt
2355 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2357 // CAPTURE routines for mixed types RHS=float16
2360 // Beginning of a definition (provides name, parameters, gebug trace)
2361 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
2363 // OP_ID - operation identifier (add, sub, mul, ...)
2364 // TYPE - operands' type
2365 #define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2366 TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \
2367 ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \
2368 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2370 ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \
2373 // -------------------------------------------------------------------------
2374 #define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
2375 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
2376 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2378 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \
2379 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2382 // -------------------------------------------------------------------------
2383 #define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
2384 LCK_ID, GOMP_FLAG) \
2385 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
2387 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2388 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2391 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0,
2392 KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp
2393 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0,
2394 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp
2395 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2396 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp
2397 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0,
2398 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp
2399 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2400 KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp
2401 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0,
2402 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp
2403 ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0,
2404 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp
2405 ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0,
2406 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp
2408 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1,
2409 KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp
2410 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1,
2411 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp
2412 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2413 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp
2414 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1,
2415 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp
2416 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2417 KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp
2418 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1,
2419 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp
2420 ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1,
2421 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp
2422 ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1,
2423 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp
2425 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2426 0) // __kmpc_atomic_fixed4_add_cpt_fp
2427 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3,
2428 0) // __kmpc_atomic_fixed4u_add_cpt_fp
2429 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2430 0) // __kmpc_atomic_fixed4_sub_cpt_fp
2431 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3,
2432 0) // __kmpc_atomic_fixed4u_sub_cpt_fp
2433 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2434 0) // __kmpc_atomic_fixed4_mul_cpt_fp
2435 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3,
2436 0) // __kmpc_atomic_fixed4u_mul_cpt_fp
2437 ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2438 0) // __kmpc_atomic_fixed4_div_cpt_fp
2439 ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3,
2440 0) // __kmpc_atomic_fixed4u_div_cpt_fp
2442 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2443 KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp
2444 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7,
2445 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp
2446 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2447 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp
2448 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7,
2449 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp
2450 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2451 KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp
2452 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7,
2453 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp
2454 ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2455 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp
2456 ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7,
2457 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp
2459 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3,
2460 KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp
2461 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3,
2462 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp
2463 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3,
2464 KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp
2465 ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3,
2466 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp
2468 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7,
2469 KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp
2470 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7,
2471 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp
2472 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7,
2473 KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp
2474 ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7,
2475 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp
2477 ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r,
2478 1) // __kmpc_atomic_float10_add_cpt_fp
2479 ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r,
2480 1) // __kmpc_atomic_float10_sub_cpt_fp
2481 ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r,
2482 1) // __kmpc_atomic_float10_mul_cpt_fp
2483 ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r,
2484 1) // __kmpc_atomic_float10_div_cpt_fp
2486 #endif // KMP_HAVE_QUAD
2488 // ------------------------------------------------------------------------
2489 // Routines for C/C++ Reduction operators && and ||
2491 // -------------------------------------------------------------------------
2492 // Operation on *lhs, rhs bound by critical section
2493 // OP - operator (it's supposed to contain an assignment)
2494 // LCK_ID - lock identifier
2495 // Note: don't check gtid as it should always be valid
2496 // 1, 2-byte - expect valid parameter, other - check before this macro
2497 #define OP_CRITICAL_L_CPT(OP, LCK_ID) \
2498 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2503 new_value = (*lhs); \
2505 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid);
2507 // ------------------------------------------------------------------------
2508 #ifdef KMP_GOMP_COMPAT
2509 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \
2510 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2512 OP_CRITICAL_L_CPT(OP, 0); \
2516 #define OP_GOMP_CRITICAL_L_CPT(OP, FLAG)
2517 #endif /* KMP_GOMP_COMPAT */
2519 // ------------------------------------------------------------------------
2520 // Need separate macros for &&, || because there is no combined assignment
2521 #define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2522 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2524 OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \
2525 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2528 ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&,
2529 KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt
2530 ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||,
2531 KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt
2532 ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&,
2533 KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt
2534 ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||,
2535 KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt
2536 ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&,
2537 0) // __kmpc_atomic_fixed4_andl_cpt
2538 ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||,
2539 0) // __kmpc_atomic_fixed4_orl_cpt
2540 ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&,
2541 KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt
2542 ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||,
2543 KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt
2545 // -------------------------------------------------------------------------
2546 // Routines for Fortran operators that matched no one in C:
2547 // MAX, MIN, .EQV., .NEQV.
2548 // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt
2549 // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt
2551 // -------------------------------------------------------------------------
2552 // MIN and MAX need separate macros
2553 // OP - operator to check if we need any actions?
2554 #define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2555 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2557 if (*lhs OP rhs) { /* still need actions? */ \
2563 new_value = old_value; \
2567 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2570 // -------------------------------------------------------------------------
2571 #ifdef KMP_GOMP_COMPAT
2572 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \
2573 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2575 MIN_MAX_CRITSECT_CPT(OP, 0); \
2578 #define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG)
2579 #endif /* KMP_GOMP_COMPAT */
2581 // -------------------------------------------------------------------------
2582 #define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2584 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2585 /*TYPE old_value; */ \
2587 old_value = temp_val; \
2588 while (old_value OP rhs && /* still need actions? */ \
2589 !KMP_COMPARE_AND_STORE_ACQ##BITS( \
2590 (kmp_int##BITS *)lhs, \
2591 *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2592 *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \
2595 old_value = temp_val; \
2603 // -------------------------------------------------------------------------
2604 // 1-byte, 2-byte operands - use critical section
2605 #define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2606 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2607 TYPE new_value, old_value; \
2608 if (*lhs OP rhs) { /* need actions? */ \
2609 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2610 MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \
2615 #define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2616 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2617 TYPE new_value, old_value; \
2618 if (*lhs OP rhs) { \
2619 GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \
2620 MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \
2625 MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <,
2626 KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt
2627 MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >,
2628 KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt
2629 MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <,
2630 KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt
2631 MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >,
2632 KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt
2633 MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <,
2634 0) // __kmpc_atomic_fixed4_max_cpt
2635 MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >,
2636 0) // __kmpc_atomic_fixed4_min_cpt
2637 MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <,
2638 KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt
2639 MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >,
2640 KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt
2641 MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <,
2642 KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt
2643 MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >,
2644 KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt
2645 MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <,
2646 KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt
2647 MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >,
2648 KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt
2650 MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r,
2651 1) // __kmpc_atomic_float16_max_cpt
2652 MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r,
2653 1) // __kmpc_atomic_float16_min_cpt
2655 MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r,
2656 1) // __kmpc_atomic_float16_max_a16_cpt
2657 MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r,
2658 1) // __kmpc_atomic_float16_mix_a16_cpt
2659 #endif // (KMP_ARCH_X86)
2660 #endif // KMP_HAVE_QUAD
2662 // ------------------------------------------------------------------------
2663 #ifdef KMP_GOMP_COMPAT
2664 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \
2665 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2667 OP_CRITICAL_CPT(OP, 0); \
2670 #define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG)
2671 #endif /* KMP_GOMP_COMPAT */
2672 // ------------------------------------------------------------------------
2673 #define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2674 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2676 OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \
2677 OP_CMPXCHG_CPT(TYPE, BITS, OP) \
2680 // ------------------------------------------------------------------------
2682 ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^,
2683 KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt
2684 ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^,
2685 KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt
2686 ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^,
2687 KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt
2688 ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^,
2689 KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt
2690 ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~,
2691 KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt
2692 ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~,
2693 KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt
2694 ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~,
2695 KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt
2696 ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~,
2697 KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt
2699 // ------------------------------------------------------------------------
2700 // Routines for Extended types: long double, _Quad, complex flavours (use
2701 // critical section)
2702 // TYPE_ID, OP_ID, TYPE - detailed above
2704 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2705 #define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2706 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2708 OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \
2709 OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \
2712 // ------------------------------------------------------------------------
2713 // Workaround for cmplx4. Regular routines with return value don't work
2714 // on Win_32e. Let's return captured values through the additional parameter.
2715 #define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \
2716 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2726 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2728 // ------------------------------------------------------------------------
2730 #ifdef KMP_GOMP_COMPAT
2731 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \
2732 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2734 OP_CRITICAL_CPT_WRK(OP## =, 0); \
2737 #define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG)
2738 #endif /* KMP_GOMP_COMPAT */
2739 // ------------------------------------------------------------------------
2741 #define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2742 void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \
2743 TYPE rhs, TYPE *out, int flag) { \
2744 KMP_DEBUG_ASSERT(__kmp_init_serial); \
2745 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid));
2746 // ------------------------------------------------------------------------
2748 #define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2749 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
2750 OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \
2751 OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \
2753 // The end of workaround for cmplx4
2755 /* ------------------------------------------------------------------------- */
2756 // routines for long double type
2757 ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r,
2758 1) // __kmpc_atomic_float10_add_cpt
2759 ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r,
2760 1) // __kmpc_atomic_float10_sub_cpt
2761 ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r,
2762 1) // __kmpc_atomic_float10_mul_cpt
2763 ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r,
2764 1) // __kmpc_atomic_float10_div_cpt
2766 // routines for _Quad type
2767 ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r,
2768 1) // __kmpc_atomic_float16_add_cpt
2769 ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r,
2770 1) // __kmpc_atomic_float16_sub_cpt
2771 ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r,
2772 1) // __kmpc_atomic_float16_mul_cpt
2773 ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r,
2774 1) // __kmpc_atomic_float16_div_cpt
2776 ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r,
2777 1) // __kmpc_atomic_float16_add_a16_cpt
2778 ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r,
2779 1) // __kmpc_atomic_float16_sub_a16_cpt
2780 ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r,
2781 1) // __kmpc_atomic_float16_mul_a16_cpt
2782 ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r,
2783 1) // __kmpc_atomic_float16_div_a16_cpt
2784 #endif // (KMP_ARCH_X86)
2785 #endif // KMP_HAVE_QUAD
2787 // routines for complex types
2789 // cmplx4 routines to return void
2790 ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c,
2791 1) // __kmpc_atomic_cmplx4_add_cpt
2792 ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c,
2793 1) // __kmpc_atomic_cmplx4_sub_cpt
2794 ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c,
2795 1) // __kmpc_atomic_cmplx4_mul_cpt
2796 ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c,
2797 1) // __kmpc_atomic_cmplx4_div_cpt
2799 ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c,
2800 1) // __kmpc_atomic_cmplx8_add_cpt
2801 ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c,
2802 1) // __kmpc_atomic_cmplx8_sub_cpt
2803 ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c,
2804 1) // __kmpc_atomic_cmplx8_mul_cpt
2805 ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c,
2806 1) // __kmpc_atomic_cmplx8_div_cpt
2807 ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c,
2808 1) // __kmpc_atomic_cmplx10_add_cpt
2809 ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c,
2810 1) // __kmpc_atomic_cmplx10_sub_cpt
2811 ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c,
2812 1) // __kmpc_atomic_cmplx10_mul_cpt
2813 ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c,
2814 1) // __kmpc_atomic_cmplx10_div_cpt
2816 ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c,
2817 1) // __kmpc_atomic_cmplx16_add_cpt
2818 ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c,
2819 1) // __kmpc_atomic_cmplx16_sub_cpt
2820 ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c,
2821 1) // __kmpc_atomic_cmplx16_mul_cpt
2822 ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c,
2823 1) // __kmpc_atomic_cmplx16_div_cpt
2825 ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c,
2826 1) // __kmpc_atomic_cmplx16_add_a16_cpt
2827 ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c,
2828 1) // __kmpc_atomic_cmplx16_sub_a16_cpt
2829 ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c,
2830 1) // __kmpc_atomic_cmplx16_mul_a16_cpt
2831 ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c,
2832 1) // __kmpc_atomic_cmplx16_div_a16_cpt
2833 #endif // (KMP_ARCH_X86)
2834 #endif // KMP_HAVE_QUAD
2836 // OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr
2837 // binop x; v = x; } for non-commutative operations.
2838 // Supported only on IA-32 architecture and Intel(R) 64
2840 // -------------------------------------------------------------------------
2841 // Operation on *lhs, rhs bound by critical section
2842 // OP - operator (it's supposed to contain an assignment)
2843 // LCK_ID - lock identifier
2844 // Note: don't check gtid as it should always be valid
2845 // 1, 2-byte - expect valid parameter, other - check before this macro
2846 #define OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2847 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2850 /*temp_val = (*lhs);*/ \
2851 (*lhs) = (rhs)OP(*lhs); \
2852 new_value = (*lhs); \
2854 new_value = (*lhs); \
2855 (*lhs) = (rhs)OP(*lhs); \
2857 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
2860 // ------------------------------------------------------------------------
2861 #ifdef KMP_GOMP_COMPAT
2862 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \
2863 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
2865 OP_CRITICAL_CPT_REV(OP, 0); \
2868 #define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG)
2869 #endif /* KMP_GOMP_COMPAT */
2871 // ------------------------------------------------------------------------
2872 // Operation on *lhs, rhs using "compare_and_store" routine
2873 // TYPE - operands' type
2874 // BITS - size in bits, used to distinguish low level calls
2876 // Note: temp_val introduced in order to force the compiler to read
2877 // *lhs only once (w/o it the compiler reads *lhs twice)
2878 #define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2880 TYPE KMP_ATOMIC_VOLATILE temp_val; \
2881 TYPE old_value, new_value; \
2883 old_value = temp_val; \
2884 new_value = rhs OP old_value; \
2885 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
2886 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
2887 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
2891 old_value = temp_val; \
2892 new_value = rhs OP old_value; \
2900 // -------------------------------------------------------------------------
2901 #define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \
2902 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2904 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2905 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
2908 ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /,
2909 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev
2910 ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /,
2911 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev
2912 ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<,
2913 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev
2914 ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>,
2915 KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev
2916 ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>,
2917 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev
2918 ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -,
2919 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev
2920 ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /,
2921 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev
2922 ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /,
2923 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev
2924 ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<,
2925 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev
2926 ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>,
2927 KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev
2928 ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>,
2929 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev
2930 ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -,
2931 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev
2932 ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /,
2933 KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev
2934 ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /,
2935 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev
2936 ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<,
2937 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev
2938 ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>,
2939 KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev
2940 ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>,
2941 KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev
2942 ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -,
2943 KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev
2944 ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /,
2945 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev
2946 ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /,
2947 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev
2948 ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<,
2949 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev
2950 ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>,
2951 KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev
2952 ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>,
2953 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev
2954 ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -,
2955 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev
2956 ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /,
2957 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev
2958 ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -,
2959 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev
2960 ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /,
2961 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev
2962 ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -,
2963 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev
2964 // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG
2966 // ------------------------------------------------------------------------
2967 // Routines for Extended types: long double, _Quad, complex flavours (use
2968 // critical section)
2969 // TYPE_ID, OP_ID, TYPE - detailed above
2971 // LCK_ID - lock identifier, used to possibly distinguish lock variable
2972 #define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \
2973 ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \
2975 /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \
2976 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
2977 OP_CRITICAL_CPT_REV(OP, LCK_ID) \
2980 /* ------------------------------------------------------------------------- */
2981 // routines for long double type
2982 ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r,
2983 1) // __kmpc_atomic_float10_sub_cpt_rev
2984 ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r,
2985 1) // __kmpc_atomic_float10_div_cpt_rev
2987 // routines for _Quad type
2988 ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r,
2989 1) // __kmpc_atomic_float16_sub_cpt_rev
2990 ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r,
2991 1) // __kmpc_atomic_float16_div_cpt_rev
2993 ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r,
2994 1) // __kmpc_atomic_float16_sub_a16_cpt_rev
2995 ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r,
2996 1) // __kmpc_atomic_float16_div_a16_cpt_rev
2997 #endif // (KMP_ARCH_X86)
2998 #endif // KMP_HAVE_QUAD
3000 // routines for complex types
3002 // ------------------------------------------------------------------------
3003 // Workaround for cmplx4. Regular routines with return value don't work
3004 // on Win_32e. Let's return captured values through the additional parameter.
3005 #define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3006 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3009 (*lhs) = (rhs)OP(*lhs); \
3013 (*lhs) = (rhs)OP(*lhs); \
3016 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3018 // ------------------------------------------------------------------------
3020 #ifdef KMP_GOMP_COMPAT
3021 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \
3022 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3024 OP_CRITICAL_CPT_REV_WRK(OP, 0); \
3027 #define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG)
3028 #endif /* KMP_GOMP_COMPAT */
3029 // ------------------------------------------------------------------------
3031 #define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \
3033 ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \
3034 OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \
3035 OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \
3037 // The end of workaround for cmplx4
3039 // !!! TODO: check if we need to return void for cmplx4 routines
3040 // cmplx4 routines to return void
3041 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c,
3042 1) // __kmpc_atomic_cmplx4_sub_cpt_rev
3043 ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c,
3044 1) // __kmpc_atomic_cmplx4_div_cpt_rev
3046 ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c,
3047 1) // __kmpc_atomic_cmplx8_sub_cpt_rev
3048 ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c,
3049 1) // __kmpc_atomic_cmplx8_div_cpt_rev
3050 ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c,
3051 1) // __kmpc_atomic_cmplx10_sub_cpt_rev
3052 ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c,
3053 1) // __kmpc_atomic_cmplx10_div_cpt_rev
3055 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c,
3056 1) // __kmpc_atomic_cmplx16_sub_cpt_rev
3057 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c,
3058 1) // __kmpc_atomic_cmplx16_div_cpt_rev
3060 ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c,
3061 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev
3062 ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c,
3063 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev
3064 #endif // (KMP_ARCH_X86)
3065 #endif // KMP_HAVE_QUAD
3067 // Capture reverse for mixed type: RHS=float16
3070 // Beginning of a definition (provides name, parameters, gebug trace)
3071 // TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned
3073 // OP_ID - operation identifier (add, sub, mul, ...)
3074 // TYPE - operands' type
3075 // -------------------------------------------------------------------------
3076 #define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \
3077 RTYPE, LCK_ID, MASK, GOMP_FLAG) \
3078 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3080 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \
3081 OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \
3084 // -------------------------------------------------------------------------
3085 #define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \
3086 LCK_ID, GOMP_FLAG) \
3087 ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \
3089 OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \
3090 OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \
3093 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3094 KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp
3095 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0,
3096 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp
3097 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3098 KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp
3099 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0,
3100 KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp
3102 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1,
3103 KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp
3104 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i,
3106 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp
3107 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1,
3108 KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp
3109 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i,
3111 KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp
3113 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i,
3114 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp
3115 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad,
3116 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp
3117 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i,
3118 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp
3119 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad,
3120 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp
3122 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i,
3124 KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp
3125 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad,
3127 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp
3128 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i,
3130 KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp
3131 ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad,
3133 KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp
3135 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad,
3137 KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp
3138 ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad,
3140 KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp
3142 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad,
3144 KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp
3145 ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad,
3147 KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp
3149 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad,
3150 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp
3151 ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad,
3152 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp
3154 #endif // KMP_HAVE_QUAD
3156 // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;}
3158 #define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3159 TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3161 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3162 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3164 #define CRITICAL_SWP(LCK_ID) \
3165 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3167 old_value = (*lhs); \
3170 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3173 // ------------------------------------------------------------------------
3174 #ifdef KMP_GOMP_COMPAT
3175 #define GOMP_CRITICAL_SWP(FLAG) \
3176 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3181 #define GOMP_CRITICAL_SWP(FLAG)
3182 #endif /* KMP_GOMP_COMPAT */
3184 #define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3185 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3187 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3188 old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \
3191 // ------------------------------------------------------------------------
3192 #define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3193 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3195 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3196 old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \
3200 // ------------------------------------------------------------------------
3201 #define CMPXCHG_SWP(TYPE, BITS) \
3203 TYPE KMP_ATOMIC_VOLATILE temp_val; \
3204 TYPE old_value, new_value; \
3206 old_value = temp_val; \
3208 while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \
3209 (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \
3210 *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \
3214 old_value = temp_val; \
3220 // -------------------------------------------------------------------------
3221 #define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \
3222 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3224 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3225 CMPXCHG_SWP(TYPE, BITS) \
3228 ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp
3229 ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp
3230 ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp
3232 ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32,
3233 KMP_ARCH_X86) // __kmpc_atomic_float4_swp
3236 ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64,
3237 KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3238 ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64,
3239 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3241 ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp
3242 ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64,
3243 KMP_ARCH_X86) // __kmpc_atomic_float8_swp
3244 #endif // (KMP_ARCH_X86)
3246 // ------------------------------------------------------------------------
3247 // Routines for Extended types: long double, _Quad, complex flavours (use
3248 // critical section)
3249 #define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3250 ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \
3252 GOMP_CRITICAL_SWP(GOMP_FLAG) \
3253 CRITICAL_SWP(LCK_ID) \
3256 // ------------------------------------------------------------------------
3257 // !!! TODO: check if we need to return void for cmplx4 routines
3258 // Workaround for cmplx4. Regular routines with return value don't work
3259 // on Win_32e. Let's return captured values through the additional parameter.
3261 #define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3262 void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \
3263 TYPE rhs, TYPE *out) { \
3264 KMP_DEBUG_ASSERT(__kmp_init_serial); \
3265 KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid));
3267 #define CRITICAL_SWP_WRK(LCK_ID) \
3268 __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3273 __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \
3275 // ------------------------------------------------------------------------
3277 #ifdef KMP_GOMP_COMPAT
3278 #define GOMP_CRITICAL_SWP_WRK(FLAG) \
3279 if ((FLAG) && (__kmp_atomic_mode == 2)) { \
3281 CRITICAL_SWP_WRK(0); \
3284 #define GOMP_CRITICAL_SWP_WRK(FLAG)
3285 #endif /* KMP_GOMP_COMPAT */
3286 // ------------------------------------------------------------------------
3288 #define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \
3289 ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \
3291 GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \
3292 CRITICAL_SWP_WRK(LCK_ID) \
3294 // The end of workaround for cmplx4
3296 ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp
3298 ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp
3299 #endif // KMP_HAVE_QUAD
3300 // cmplx4 routine to return void
3301 ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp
3303 // ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) //
3304 // __kmpc_atomic_cmplx4_swp
3306 ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp
3307 ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp
3309 ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp
3311 ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r,
3312 1) // __kmpc_atomic_float16_a16_swp
3313 ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c,
3314 1) // __kmpc_atomic_cmplx16_a16_swp
3315 #endif // (KMP_ARCH_X86)
3316 #endif // KMP_HAVE_QUAD
3318 // End of OpenMP 4.0 Capture
3320 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3324 /* ------------------------------------------------------------------------ */
3325 /* Generic atomic routines */
3327 void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3328 void (*f)(void *, void *, void *)) {
3329 KMP_DEBUG_ASSERT(__kmp_init_serial);
3332 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3333 FALSE /* must use lock */
3336 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3338 kmp_int8 old_value, new_value;
3340 old_value = *(kmp_int8 *)lhs;
3341 (*f)(&new_value, &old_value, rhs);
3343 /* TODO: Should this be acquire or release? */
3344 while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value,
3345 *(kmp_int8 *)&new_value)) {
3348 old_value = *(kmp_int8 *)lhs;
3349 (*f)(&new_value, &old_value, rhs);
3354 // All 1-byte data is of integer data type.
3356 #ifdef KMP_GOMP_COMPAT
3357 if (__kmp_atomic_mode == 2) {
3358 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3360 #endif /* KMP_GOMP_COMPAT */
3361 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3363 (*f)(lhs, lhs, rhs);
3365 #ifdef KMP_GOMP_COMPAT
3366 if (__kmp_atomic_mode == 2) {
3367 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3369 #endif /* KMP_GOMP_COMPAT */
3370 __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid);
3374 void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3375 void (*f)(void *, void *, void *)) {
3377 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3378 FALSE /* must use lock */
3379 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3380 TRUE /* no alignment problems */
3382 !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */
3383 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3385 kmp_int16 old_value, new_value;
3387 old_value = *(kmp_int16 *)lhs;
3388 (*f)(&new_value, &old_value, rhs);
3390 /* TODO: Should this be acquire or release? */
3391 while (!KMP_COMPARE_AND_STORE_ACQ16(
3392 (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) {
3395 old_value = *(kmp_int16 *)lhs;
3396 (*f)(&new_value, &old_value, rhs);
3401 // All 2-byte data is of integer data type.
3403 #ifdef KMP_GOMP_COMPAT
3404 if (__kmp_atomic_mode == 2) {
3405 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3407 #endif /* KMP_GOMP_COMPAT */
3408 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3410 (*f)(lhs, lhs, rhs);
3412 #ifdef KMP_GOMP_COMPAT
3413 if (__kmp_atomic_mode == 2) {
3414 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3416 #endif /* KMP_GOMP_COMPAT */
3417 __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid);
3421 void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3422 void (*f)(void *, void *, void *)) {
3423 KMP_DEBUG_ASSERT(__kmp_init_serial);
3426 // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints.
3427 // Gomp compatibility is broken if this routine is called for floats.
3428 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
3429 TRUE /* no alignment problems */
3431 !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */
3432 #endif // KMP_ARCH_X86 || KMP_ARCH_X86_64
3434 kmp_int32 old_value, new_value;
3436 old_value = *(kmp_int32 *)lhs;
3437 (*f)(&new_value, &old_value, rhs);
3439 /* TODO: Should this be acquire or release? */
3440 while (!KMP_COMPARE_AND_STORE_ACQ32(
3441 (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) {
3444 old_value = *(kmp_int32 *)lhs;
3445 (*f)(&new_value, &old_value, rhs);
3450 // Use __kmp_atomic_lock_4i for all 4-byte data,
3451 // even if it isn't of integer data type.
3453 #ifdef KMP_GOMP_COMPAT
3454 if (__kmp_atomic_mode == 2) {
3455 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3457 #endif /* KMP_GOMP_COMPAT */
3458 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3460 (*f)(lhs, lhs, rhs);
3462 #ifdef KMP_GOMP_COMPAT
3463 if (__kmp_atomic_mode == 2) {
3464 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3466 #endif /* KMP_GOMP_COMPAT */
3467 __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid);
3471 void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3472 void (*f)(void *, void *, void *)) {
3473 KMP_DEBUG_ASSERT(__kmp_init_serial);
3476 #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3477 FALSE /* must use lock */
3478 #elif KMP_ARCH_X86 || KMP_ARCH_X86_64
3479 TRUE /* no alignment problems */
3481 !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */
3482 #endif // KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT)
3484 kmp_int64 old_value, new_value;
3486 old_value = *(kmp_int64 *)lhs;
3487 (*f)(&new_value, &old_value, rhs);
3488 /* TODO: Should this be acquire or release? */
3489 while (!KMP_COMPARE_AND_STORE_ACQ64(
3490 (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) {
3493 old_value = *(kmp_int64 *)lhs;
3494 (*f)(&new_value, &old_value, rhs);
3499 // Use __kmp_atomic_lock_8i for all 8-byte data,
3500 // even if it isn't of integer data type.
3502 #ifdef KMP_GOMP_COMPAT
3503 if (__kmp_atomic_mode == 2) {
3504 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3506 #endif /* KMP_GOMP_COMPAT */
3507 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3509 (*f)(lhs, lhs, rhs);
3511 #ifdef KMP_GOMP_COMPAT
3512 if (__kmp_atomic_mode == 2) {
3513 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3515 #endif /* KMP_GOMP_COMPAT */
3516 __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid);
3520 void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3521 void (*f)(void *, void *, void *)) {
3522 KMP_DEBUG_ASSERT(__kmp_init_serial);
3524 #ifdef KMP_GOMP_COMPAT
3525 if (__kmp_atomic_mode == 2) {
3526 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3528 #endif /* KMP_GOMP_COMPAT */
3529 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3531 (*f)(lhs, lhs, rhs);
3533 #ifdef KMP_GOMP_COMPAT
3534 if (__kmp_atomic_mode == 2) {
3535 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3537 #endif /* KMP_GOMP_COMPAT */
3538 __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid);
3541 void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3542 void (*f)(void *, void *, void *)) {
3543 KMP_DEBUG_ASSERT(__kmp_init_serial);
3545 #ifdef KMP_GOMP_COMPAT
3546 if (__kmp_atomic_mode == 2) {
3547 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3549 #endif /* KMP_GOMP_COMPAT */
3550 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3552 (*f)(lhs, lhs, rhs);
3554 #ifdef KMP_GOMP_COMPAT
3555 if (__kmp_atomic_mode == 2) {
3556 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3558 #endif /* KMP_GOMP_COMPAT */
3559 __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid);
3562 void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3563 void (*f)(void *, void *, void *)) {
3564 KMP_DEBUG_ASSERT(__kmp_init_serial);
3566 #ifdef KMP_GOMP_COMPAT
3567 if (__kmp_atomic_mode == 2) {
3568 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3570 #endif /* KMP_GOMP_COMPAT */
3571 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3573 (*f)(lhs, lhs, rhs);
3575 #ifdef KMP_GOMP_COMPAT
3576 if (__kmp_atomic_mode == 2) {
3577 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3579 #endif /* KMP_GOMP_COMPAT */
3580 __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid);
3583 void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs,
3584 void (*f)(void *, void *, void *)) {
3585 KMP_DEBUG_ASSERT(__kmp_init_serial);
3587 #ifdef KMP_GOMP_COMPAT
3588 if (__kmp_atomic_mode == 2) {
3589 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3591 #endif /* KMP_GOMP_COMPAT */
3592 __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3594 (*f)(lhs, lhs, rhs);
3596 #ifdef KMP_GOMP_COMPAT
3597 if (__kmp_atomic_mode == 2) {
3598 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);
3600 #endif /* KMP_GOMP_COMPAT */
3601 __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid);
3604 // AC: same two routines as GOMP_atomic_start/end, but will be called by our
3605 // compiler; duplicated in order to not use 3-party names in pure Intel code
3606 // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin.
3607 void __kmpc_atomic_start(void) {
3608 int gtid = __kmp_entry_gtid();
3609 KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid));
3610 __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid);
3613 void __kmpc_atomic_end(void) {
3614 int gtid = __kmp_get_gtid();
3615 KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid));
3616 __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid);