3 * kmp_itt.inl -- Inline functions of ITT Notify.
6 //===----------------------------------------------------------------------===//
8 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
9 // See https://llvm.org/LICENSE.txt for license information.
10 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
12 //===----------------------------------------------------------------------===//
14 // Inline function definitions. This file should be included into kmp_itt.h file
15 // for production build (to let compliler inline functions) or into kmp_itt.c
16 // file for debug build (to reduce the number of files to recompile and save
23 extern kmp_bootstrap_lock_t __kmp_itt_debug_lock;
24 #define KMP_ITT_DEBUG_LOCK() \
25 { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); }
26 #define KMP_ITT_DEBUG_PRINT(...) \
28 fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \
29 fprintf(stderr, __VA_ARGS__); \
31 __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \
34 #define KMP_ITT_DEBUG_LOCK()
35 #define KMP_ITT_DEBUG_PRINT(...)
36 #endif // KMP_ITT_DEBUG
38 // Ensure that the functions are static if they're supposed to be being inlined.
39 // Otherwise they cannot be used in more than one file, since there will be
40 // multiple definitions.
44 #define LINKAGE static inline
47 // ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses
48 // this API to support user-defined synchronization primitives, but does not use
49 // ZCA; it would be safe to turn this off until wider support becomes available.
51 #ifdef __INTEL_COMPILER
52 #if __INTEL_COMPILER >= 1200
53 #undef __itt_sync_acquired
54 #undef __itt_sync_releasing
55 #define __itt_sync_acquired(addr) \
56 __notify_zc_intrinsic((char *)"sync_acquired", addr)
57 #define __itt_sync_releasing(addr) \
58 __notify_intrinsic((char *)"sync_releasing", addr)
63 static kmp_bootstrap_lock_t metadata_lock =
64 KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock);
66 /* Parallel region reporting.
67 * __kmp_itt_region_forking should be called by master thread of a team.
68 Exact moment of call does not matter, but it should be completed before any
69 thread of this team calls __kmp_itt_region_starting.
70 * __kmp_itt_region_starting should be called by each thread of a team just
71 before entering parallel region body.
72 * __kmp_itt_region_finished should be called by each thread of a team right
73 after returning from parallel region body.
74 * __kmp_itt_region_joined should be called by master thread of a team, after
75 all threads called __kmp_itt_region_finished.
77 Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can
78 execute some more user code -- such a thread can execute tasks.
80 Note: The overhead of logging region_starting and region_finished in each
81 thread is too large, so these calls are not used. */
83 LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) {
85 kmp_team_t *team = __kmp_team_from_gtid(gtid);
86 if (team->t.t_active_level > 1) {
87 // The frame notifications are only supported for the outermost teams.
90 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
92 // Use the reserved_2 field to store the index to the region domain.
93 // Assume that reserved_2 contains zero initially. Since zero is special
94 // value here, store the index into domain array increased by 1.
95 if (loc->reserved_2 == 0) {
96 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
98 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
99 if (frm >= KMP_MAX_FRAME_DOMAINS) {
100 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
101 return; // loc->reserved_2 is still 0
103 // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) {
104 // frm = loc->reserved_2 - 1; // get value saved by other thread
106 //} // AC: this block is to replace next unsynchronized line
108 // We need to save indexes for both region and barrier frames. We'll use
109 // loc->reserved_2 field but put region index to the low two bytes and
110 // barrier indexes to the high two bytes. It is OK because
111 // KMP_MAX_FRAME_DOMAINS = 512.
112 loc->reserved_2 |= (frm + 1); // save "new" value
114 // Transform compiler-generated region location into the format
115 // that the tools more or less standardized on:
116 // "<func>$omp$parallel@[file:]<line>[:<col>]"
118 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
119 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
120 team_size, str_loc.file, str_loc.line,
123 __itt_suppress_push(__itt_suppress_memory_errors);
124 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
125 __itt_suppress_pop();
127 __kmp_str_free(&buff);
129 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
130 int frm = KMP_TEST_THEN_INC32(
131 &__kmp_barrier_domain_count); // get "old" value
132 if (frm >= KMP_MAX_FRAME_DOMAINS) {
134 &__kmp_barrier_domain_count); // revert the count
135 return; // loc->reserved_2 is still 0
138 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
139 str_loc.file, str_loc.col);
140 __itt_suppress_push(__itt_suppress_memory_errors);
141 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
142 __itt_suppress_pop();
143 __kmp_str_free(&buff);
144 // Save the barrier frame index to the high two bytes.
145 loc->reserved_2 |= (frm + 1) << 16;
148 __kmp_str_loc_free(&str_loc);
149 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
151 } else { // Region domain exists for this location
152 // Check if team size was changed. Then create new region domain for this
154 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
155 if ((frm < KMP_MAX_FRAME_DOMAINS) &&
156 (__kmp_itt_region_team_size[frm] != team_size)) {
158 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
159 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
160 team_size, str_loc.file, str_loc.line,
163 __itt_suppress_push(__itt_suppress_memory_errors);
164 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
165 __itt_suppress_pop();
167 __kmp_str_free(&buff);
168 __kmp_str_loc_free(&str_loc);
169 __kmp_itt_region_team_size[frm] = team_size;
170 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
171 } else { // Team size was not changed. Use existing domain.
172 __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL);
175 KMP_ITT_DEBUG_LOCK();
176 KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid,
177 loc->reserved_2, loc);
180 } // __kmp_itt_region_forking
182 // -----------------------------------------------------------------------------
183 LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin,
184 __itt_timestamp end, int imbalance,
185 ident_t *loc, int team_size, int region) {
188 kmp_team_t *team = __kmp_team_from_gtid(gtid);
189 int serialized = (region == 2 ? 1 : 0);
190 if (team->t.t_active_level + serialized > 1) {
191 // The frame notifications are only supported for the outermost teams.
194 // Check region domain has not been created before. It's index is saved in
195 // the low two bytes.
196 if ((loc->reserved_2 & 0x0000FFFF) == 0) {
197 if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) {
199 KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value
200 if (frm >= KMP_MAX_FRAME_DOMAINS) {
201 KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count
202 return; // loc->reserved_2 is still 0
205 // We need to save indexes for both region and barrier frames. We'll use
206 // loc->reserved_2 field but put region index to the low two bytes and
207 // barrier indexes to the high two bytes. It is OK because
208 // KMP_MAX_FRAME_DOMAINS = 512.
209 loc->reserved_2 |= (frm + 1); // save "new" value
211 // Transform compiler-generated region location into the format
212 // that the tools more or less standardized on:
213 // "<func>$omp$parallel:team_size@[file:]<line>[:<col>]"
215 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
216 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
217 team_size, str_loc.file, str_loc.line,
220 __itt_suppress_push(__itt_suppress_memory_errors);
221 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
222 __itt_suppress_pop();
224 __kmp_str_free(&buff);
225 __kmp_str_loc_free(&str_loc);
226 __kmp_itt_region_team_size[frm] = team_size;
227 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
229 } else { // Region domain exists for this location
230 // Check if team size was changed. Then create new region domain for this
232 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
233 if ((frm < KMP_MAX_FRAME_DOMAINS) &&
234 (__kmp_itt_region_team_size[frm] != team_size)) {
236 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
237 buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func,
238 team_size, str_loc.file, str_loc.line,
241 __itt_suppress_push(__itt_suppress_memory_errors);
242 __kmp_itt_region_domains[frm] = __itt_domain_create(buff);
243 __itt_suppress_pop();
245 __kmp_str_free(&buff);
246 __kmp_str_loc_free(&str_loc);
247 __kmp_itt_region_team_size[frm] = team_size;
248 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
249 } else { // Team size was not changed. Use existing domain.
250 __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end);
253 KMP_ITT_DEBUG_LOCK();
255 "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n",
256 gtid, loc->reserved_2, region, loc, begin, end);
258 } else { // called for barrier reporting
260 if ((loc->reserved_2 & 0xFFFF0000) == 0) {
261 if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) {
262 int frm = KMP_TEST_THEN_INC32(
263 &__kmp_barrier_domain_count); // get "old" value
264 if (frm >= KMP_MAX_FRAME_DOMAINS) {
266 &__kmp_barrier_domain_count); // revert the count
267 return; // loc->reserved_2 is still 0
269 // Save the barrier frame index to the high two bytes.
270 loc->reserved_2 |= (frm + 1) << 16; // save "new" value
272 // Transform compiler-generated region location into the format
273 // that the tools more or less standardized on:
274 // "<func>$omp$frame@[file:]<line>[:<col>]"
275 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
277 char *buff_imb = NULL;
278 buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d",
279 str_loc.func, team_size, str_loc.file,
281 __itt_suppress_push(__itt_suppress_memory_errors);
282 __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb);
283 __itt_suppress_pop();
284 __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin,
286 __kmp_str_free(&buff_imb);
289 buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func,
290 str_loc.file, str_loc.col);
291 __itt_suppress_push(__itt_suppress_memory_errors);
292 __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff);
293 __itt_suppress_pop();
294 __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin,
296 __kmp_str_free(&buff);
298 __kmp_str_loc_free(&str_loc);
300 } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS
302 __itt_frame_submit_v3(
303 __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL,
306 __itt_frame_submit_v3(
307 __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL,
311 KMP_ITT_DEBUG_LOCK();
313 "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid,
314 loc->reserved_2, loc, begin, end);
318 } // __kmp_itt_frame_submit
320 // -----------------------------------------------------------------------------
321 LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin,
322 kmp_uint64 end, kmp_uint64 imbalance,
323 kmp_uint64 reduction) {
325 if (metadata_domain == NULL) {
326 __kmp_acquire_bootstrap_lock(&metadata_lock);
327 if (metadata_domain == NULL) {
328 __itt_suppress_push(__itt_suppress_memory_errors);
329 metadata_domain = __itt_domain_create("OMP Metadata");
330 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
331 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
332 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
333 __itt_suppress_pop();
335 __kmp_release_bootstrap_lock(&metadata_lock);
338 kmp_uint64 imbalance_data[4];
339 imbalance_data[0] = begin;
340 imbalance_data[1] = end;
341 imbalance_data[2] = imbalance;
342 imbalance_data[3] = reduction;
344 __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl,
345 __itt_metadata_u64, 4, imbalance_data);
347 } // __kmp_itt_metadata_imbalance
349 // -----------------------------------------------------------------------------
350 LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type,
351 kmp_uint64 iterations, kmp_uint64 chunk) {
353 if (metadata_domain == NULL) {
354 __kmp_acquire_bootstrap_lock(&metadata_lock);
355 if (metadata_domain == NULL) {
356 __itt_suppress_push(__itt_suppress_memory_errors);
357 metadata_domain = __itt_domain_create("OMP Metadata");
358 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
359 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
360 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
361 __itt_suppress_pop();
363 __kmp_release_bootstrap_lock(&metadata_lock);
366 // Parse line and column from psource string: ";file;func;line;col;;"
369 KMP_DEBUG_ASSERT(loc->psource);
371 s_line = strchr(CCAST(char *, loc->psource), ';');
373 s_line = strchr(loc->psource, ';');
375 KMP_DEBUG_ASSERT(s_line);
376 s_line = strchr(s_line + 1, ';'); // 2-nd semicolon
377 KMP_DEBUG_ASSERT(s_line);
378 s_line = strchr(s_line + 1, ';'); // 3-rd semicolon
379 KMP_DEBUG_ASSERT(s_line);
380 s_col = strchr(s_line + 1, ';'); // 4-th semicolon
381 KMP_DEBUG_ASSERT(s_col);
383 kmp_uint64 loop_data[5];
384 loop_data[0] = atoi(s_line + 1); // read line
385 loop_data[1] = atoi(s_col + 1); // read column
386 loop_data[2] = sched_type;
387 loop_data[3] = iterations;
388 loop_data[4] = chunk;
390 __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop,
391 __itt_metadata_u64, 5, loop_data);
393 } // __kmp_itt_metadata_loop
395 // -----------------------------------------------------------------------------
396 LINKAGE void __kmp_itt_metadata_single(ident_t *loc) {
398 if (metadata_domain == NULL) {
399 __kmp_acquire_bootstrap_lock(&metadata_lock);
400 if (metadata_domain == NULL) {
401 __itt_suppress_push(__itt_suppress_memory_errors);
402 metadata_domain = __itt_domain_create("OMP Metadata");
403 string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance");
404 string_handle_loop = __itt_string_handle_create("omp_metadata_loop");
405 string_handle_sngl = __itt_string_handle_create("omp_metadata_single");
406 __itt_suppress_pop();
408 __kmp_release_bootstrap_lock(&metadata_lock);
411 kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1);
412 kmp_uint64 single_data[2];
413 single_data[0] = str_loc.line;
414 single_data[1] = str_loc.col;
416 __kmp_str_loc_free(&str_loc);
418 __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl,
419 __itt_metadata_u64, 2, single_data);
421 } // __kmp_itt_metadata_single
423 // -----------------------------------------------------------------------------
424 LINKAGE void __kmp_itt_region_starting(int gtid) {
427 } // __kmp_itt_region_starting
429 // -----------------------------------------------------------------------------
430 LINKAGE void __kmp_itt_region_finished(int gtid) {
433 } // __kmp_itt_region_finished
435 // ----------------------------------------------------------------------------
436 LINKAGE void __kmp_itt_region_joined(int gtid) {
438 kmp_team_t *team = __kmp_team_from_gtid(gtid);
439 if (team->t.t_active_level > 1) {
440 // The frame notifications are only supported for the outermost teams.
443 ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident;
444 if (loc && loc->reserved_2) {
445 unsigned int frm = (loc->reserved_2 & 0x0000FFFF) - 1;
446 if (frm < KMP_MAX_FRAME_DOMAINS) {
447 KMP_ITT_DEBUG_LOCK();
448 __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL);
449 KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid,
450 loc->reserved_2, loc);
454 } // __kmp_itt_region_joined
456 /* Barriers reporting.
458 A barrier consists of two phases:
459 1. Gather -- master waits for arriving of all the worker threads; each
460 worker thread registers arrival and goes further.
461 2. Release -- each worker threads waits until master lets it go; master lets
464 Function should be called by each thread:
465 * __kmp_itt_barrier_starting() -- before arriving to the gather phase.
466 * __kmp_itt_barrier_middle() -- between gather and release phases.
467 * __kmp_itt_barrier_finished() -- after release phase.
469 Note: Call __kmp_itt_barrier_object() before call to
470 __kmp_itt_barrier_starting() and save result in local variable.
471 __kmp_itt_barrier_object(), being called too late (e. g. after gather phase)
472 would return itt sync object for the next barrier!
474 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
475 does not have barrier object or barrier data structure. Barrier is just a
476 counter in team and thread structures. We could use an address of team
477 structure as an barrier sync object, but ITT wants different objects for
478 different barriers (even whithin the same team). So let us use team address
479 as barrier sync object for the first barrier, then increase it by one for the
480 next barrier, and so on (but wrap it not to use addresses outside of team
483 void *__kmp_itt_barrier_object(int gtid, int bt, int set_name,
484 int delta // 0 (current barrier) is default
485 // value; specify -1 to get previous
490 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
491 kmp_team_t *team = thr->th.th_team;
493 // NOTE: If the function is called from __kmp_fork_barrier, team pointer can
494 // be NULL. This "if" helps to avoid crash. However, this is not complete
495 // solution, and reporting fork/join barriers to ITT should be revisited.
498 // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time.
499 // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter.
501 team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta;
502 // Now form the barrier id. Encode barrier type (bt) in barrier id too, so
503 // barriers of different types do not have the same ids.
504 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier);
505 // This conditon is a must (we would have zero divide otherwise).
506 KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier);
507 // More strong condition: make sure we have room at least for for two
508 // differtent ids (for each barrier type).
509 object = reinterpret_cast<void *>(
510 kmp_uintptr_t(team) +
511 counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier +
513 KMP_ITT_DEBUG_LOCK();
514 KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt,
518 ident_t const *loc = NULL;
519 char const *src = NULL;
520 char const *type = "OMP Barrier";
522 case bs_plain_barrier: {
523 // For plain barrier compiler calls __kmpc_barrier() function, which
524 // saves location in thr->th.th_ident.
525 loc = thr->th.th_ident;
526 // Get the barrier type from flags provided by compiler.
531 expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0;
532 impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0;
535 switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) {
536 case KMP_IDENT_BARRIER_IMPL_FOR: {
537 type = "OMP For Barrier";
539 case KMP_IDENT_BARRIER_IMPL_SECTIONS: {
540 type = "OMP Sections Barrier";
542 case KMP_IDENT_BARRIER_IMPL_SINGLE: {
543 type = "OMP Single Barrier";
545 case KMP_IDENT_BARRIER_IMPL_WORKSHARE: {
546 type = "OMP Workshare Barrier";
549 type = "OMP Implicit Barrier";
554 type = "OMP Explicit Barrier";
557 case bs_forkjoin_barrier: {
558 // In case of fork/join barrier we can read thr->th.th_ident, because it
559 // contains location of last passed construct (while join barrier is not
560 // such one). Use th_ident of master thread instead -- __kmp_join_call()
561 // called by the master thread saves location.
563 // AC: cannot read from master because __kmp_join_call may be not called
564 // yet, so we read the location from team. This is the same location.
565 // And team is valid at the enter to join barrier where this happens.
566 loc = team->t.t_ident;
570 type = "OMP Join Barrier";
573 KMP_ITT_DEBUG_LOCK();
574 __itt_sync_create(object, type, src, __itt_attr_barrier);
576 "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object,
582 } // __kmp_itt_barrier_object
584 // -----------------------------------------------------------------------------
585 void __kmp_itt_barrier_starting(int gtid, void *object) {
587 if (!KMP_MASTER_GTID(gtid)) {
588 KMP_ITT_DEBUG_LOCK();
589 __itt_sync_releasing(object);
590 KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object);
592 KMP_ITT_DEBUG_LOCK();
593 __itt_sync_prepare(object);
594 KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object);
596 } // __kmp_itt_barrier_starting
598 // -----------------------------------------------------------------------------
599 void __kmp_itt_barrier_middle(int gtid, void *object) {
601 if (KMP_MASTER_GTID(gtid)) {
602 KMP_ITT_DEBUG_LOCK();
603 __itt_sync_acquired(object);
604 KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object);
605 KMP_ITT_DEBUG_LOCK();
606 __itt_sync_releasing(object);
607 KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object);
611 } // __kmp_itt_barrier_middle
613 // -----------------------------------------------------------------------------
614 void __kmp_itt_barrier_finished(int gtid, void *object) {
616 if (KMP_MASTER_GTID(gtid)) {
618 KMP_ITT_DEBUG_LOCK();
619 __itt_sync_acquired(object);
620 KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object);
623 } // __kmp_itt_barrier_finished
625 /* Taskwait reporting.
626 ITT need an address (void *) to be specified as a sync object. OpenMP RTL
627 does not have taskwait structure, so we need to construct something. */
629 void *__kmp_itt_taskwait_object(int gtid) {
632 if (__itt_sync_create_ptr) {
633 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
634 kmp_taskdata_t *taskdata = thread->th.th_current_task;
635 object = reinterpret_cast<void *>(kmp_uintptr_t(taskdata) +
636 taskdata->td_taskwait_counter %
637 sizeof(kmp_taskdata_t));
641 } // __kmp_itt_taskwait_object
643 void __kmp_itt_taskwait_starting(int gtid, void *object) {
645 kmp_info_t *thread = __kmp_thread_from_gtid(gtid);
646 kmp_taskdata_t *taskdata = thread->th.th_current_task;
647 ident_t const *loc = taskdata->td_taskwait_ident;
648 char const *src = (loc == NULL ? NULL : loc->psource);
649 KMP_ITT_DEBUG_LOCK();
650 __itt_sync_create(object, "OMP Taskwait", src, 0);
651 KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n",
653 KMP_ITT_DEBUG_LOCK();
654 __itt_sync_prepare(object);
655 KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object);
657 } // __kmp_itt_taskwait_starting
659 void __kmp_itt_taskwait_finished(int gtid, void *object) {
661 KMP_ITT_DEBUG_LOCK();
662 __itt_sync_acquired(object);
663 KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object);
664 KMP_ITT_DEBUG_LOCK();
665 __itt_sync_destroy(object);
666 KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object);
668 } // __kmp_itt_taskwait_finished
671 Only those tasks are reported which are executed by a thread spinning at
672 barrier (or taskwait). Synch object passed to the function must be barrier of
673 taskwait the threads waiting at. */
675 void __kmp_itt_task_starting(
676 void *object // ITT sync object: barrier or taskwait.
679 if (object != NULL) {
680 KMP_ITT_DEBUG_LOCK();
681 __itt_sync_cancel(object);
682 KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object);
685 } // __kmp_itt_task_starting
687 // -----------------------------------------------------------------------------
688 void __kmp_itt_task_finished(
689 void *object // ITT sync object: barrier or taskwait.
692 KMP_ITT_DEBUG_LOCK();
693 __itt_sync_prepare(object);
694 KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object);
696 } // __kmp_itt_task_finished
699 * __kmp_itt_lock_creating( lock ) should be called *before* the first lock
700 operation (set/unset). It is not a real event shown to the user but just
701 setting a name for synchronization object. `lock' is an address of sync
702 object, the same address should be used in all subsequent calls.
703 * __kmp_itt_lock_acquiring() should be called before setting the lock.
704 * __kmp_itt_lock_acquired() should be called after setting the lock.
705 * __kmp_itt_lock_realeasing() should be called before unsetting the lock.
706 * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting
708 * __kmp_itt_lock_destroyed( lock ) should be called after the last lock
709 operation. After __kmp_itt_lock_destroyed() all the references to the same
710 address will be considered as another sync object, not related with the
713 #if KMP_USE_DYNAMIC_LOCK
714 // Takes location information directly
715 __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type,
716 const ident_t *loc) {
718 if (__itt_sync_create_ptr) {
719 char const *src = (loc == NULL ? NULL : loc->psource);
720 KMP_ITT_DEBUG_LOCK();
721 __itt_sync_create(lock, type, src, 0);
722 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
727 #else // KMP_USE_DYNAMIC_LOCK
728 // Internal guts -- common code for locks and critical sections, do not call
730 __kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) {
732 if (__itt_sync_create_ptr) {
733 ident_t const *loc = NULL;
734 if (__kmp_get_user_lock_location_ != NULL)
735 loc = __kmp_get_user_lock_location_((lock));
736 char const *src = (loc == NULL ? NULL : loc->psource);
737 KMP_ITT_DEBUG_LOCK();
738 __itt_sync_create(lock, type, src, 0);
739 KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type,
743 } // ___kmp_itt_lock_init
744 #endif // KMP_USE_DYNAMIC_LOCK
746 // Internal guts -- common code for locks and critical sections, do not call
748 __kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) {
750 KMP_ITT_DEBUG_LOCK();
751 __itt_sync_destroy(lock);
752 KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock);
754 } // ___kmp_itt_lock_fini
756 // -----------------------------------------------------------------------------
757 #if KMP_USE_DYNAMIC_LOCK
758 void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) {
759 ___kmp_itt_lock_init(lock, "OMP Lock", loc);
762 void __kmp_itt_lock_creating(kmp_user_lock_p lock) {
763 ___kmp_itt_lock_init(lock, "OMP Lock");
764 } // __kmp_itt_lock_creating
767 void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) {
768 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
769 // postpone lock object access
770 if (__itt_sync_prepare_ptr) {
771 if (KMP_EXTRACT_D_TAG(lock) == 0) {
772 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
773 __itt_sync_prepare(ilk->lock);
775 __itt_sync_prepare(lock);
779 __itt_sync_prepare(lock);
781 } // __kmp_itt_lock_acquiring
783 void __kmp_itt_lock_acquired(kmp_user_lock_p lock) {
784 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
785 // postpone lock object access
786 if (__itt_sync_acquired_ptr) {
787 if (KMP_EXTRACT_D_TAG(lock) == 0) {
788 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
789 __itt_sync_acquired(ilk->lock);
791 __itt_sync_acquired(lock);
795 __itt_sync_acquired(lock);
797 } // __kmp_itt_lock_acquired
799 void __kmp_itt_lock_releasing(kmp_user_lock_p lock) {
800 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
801 if (__itt_sync_releasing_ptr) {
802 if (KMP_EXTRACT_D_TAG(lock) == 0) {
803 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
804 __itt_sync_releasing(ilk->lock);
806 __itt_sync_releasing(lock);
810 __itt_sync_releasing(lock);
812 } // __kmp_itt_lock_releasing
814 void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) {
815 #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY
816 if (__itt_sync_cancel_ptr) {
817 if (KMP_EXTRACT_D_TAG(lock) == 0) {
818 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
819 __itt_sync_cancel(ilk->lock);
821 __itt_sync_cancel(lock);
825 __itt_sync_cancel(lock);
827 } // __kmp_itt_lock_cancelled
829 void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) {
830 ___kmp_itt_lock_fini(lock, "OMP Lock");
831 } // __kmp_itt_lock_destroyed
833 /* Critical reporting.
834 Critical sections are treated exactly as locks (but have different object
836 #if KMP_USE_DYNAMIC_LOCK
837 void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) {
838 ___kmp_itt_lock_init(lock, "OMP Critical", loc);
841 void __kmp_itt_critical_creating(kmp_user_lock_p lock) {
842 ___kmp_itt_lock_init(lock, "OMP Critical");
843 } // __kmp_itt_critical_creating
846 void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) {
847 __itt_sync_prepare(lock);
848 } // __kmp_itt_critical_acquiring
850 void __kmp_itt_critical_acquired(kmp_user_lock_p lock) {
851 __itt_sync_acquired(lock);
852 } // __kmp_itt_critical_acquired
854 void __kmp_itt_critical_releasing(kmp_user_lock_p lock) {
855 __itt_sync_releasing(lock);
856 } // __kmp_itt_critical_releasing
858 void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) {
859 ___kmp_itt_lock_fini(lock, "OMP Critical");
860 } // __kmp_itt_critical_destroyed
862 /* Single reporting. */
864 void __kmp_itt_single_start(int gtid) {
866 if (__itt_mark_create_ptr || KMP_ITT_DEBUG) {
867 kmp_info_t *thr = __kmp_thread_from_gtid((gtid));
868 ident_t *loc = thr->th.th_ident;
869 char const *src = (loc == NULL ? NULL : loc->psource);
871 __kmp_str_buf_init(&name);
872 __kmp_str_buf_print(&name, "OMP Single-%s", src);
873 KMP_ITT_DEBUG_LOCK();
874 thr->th.th_itt_mark_single = __itt_mark_create(name.str);
875 KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str,
876 thr->th.th_itt_mark_single);
877 __kmp_str_buf_free(&name);
878 KMP_ITT_DEBUG_LOCK();
879 __itt_mark(thr->th.th_itt_mark_single, NULL);
880 KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n",
881 thr->th.th_itt_mark_single);
884 } // __kmp_itt_single_start
886 void __kmp_itt_single_end(int gtid) {
888 __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single;
889 KMP_ITT_DEBUG_LOCK();
890 __itt_mark_off(mark);
891 KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark);
893 } // __kmp_itt_single_end
895 /* Ordered reporting.
896 * __kmp_itt_ordered_init is called by each thread *before* first using sync
897 object. ITT team would like it to be called once, but it requires extra
899 * __kmp_itt_ordered_prep is called when thread is going to enter ordered
900 section (before synchronization).
901 * __kmp_itt_ordered_start is called just before entering user code (after
903 * __kmp_itt_ordered_end is called after returning from user code.
905 Sync object is th->th.th_dispatch->th_dispatch_sh_current.
906 Events are not generated in case of serialized team. */
908 void __kmp_itt_ordered_init(int gtid) {
910 if (__itt_sync_create_ptr) {
911 kmp_info_t *thr = __kmp_thread_from_gtid(gtid);
912 ident_t const *loc = thr->th.th_ident;
913 char const *src = (loc == NULL ? NULL : loc->psource);
914 __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current,
915 "OMP Ordered", src, 0);
918 } // __kmp_itt_ordered_init
920 void __kmp_itt_ordered_prep(int gtid) {
922 if (__itt_sync_create_ptr) {
923 kmp_team_t *t = __kmp_team_from_gtid(gtid);
924 if (!t->t.t_serialized) {
925 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
926 __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current);
930 } // __kmp_itt_ordered_prep
932 void __kmp_itt_ordered_start(int gtid) {
934 if (__itt_sync_create_ptr) {
935 kmp_team_t *t = __kmp_team_from_gtid(gtid);
936 if (!t->t.t_serialized) {
937 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
938 __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current);
942 } // __kmp_itt_ordered_start
944 void __kmp_itt_ordered_end(int gtid) {
946 if (__itt_sync_create_ptr) {
947 kmp_team_t *t = __kmp_team_from_gtid(gtid);
948 if (!t->t.t_serialized) {
949 kmp_info_t *th = __kmp_thread_from_gtid(gtid);
950 __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current);
954 } // __kmp_itt_ordered_end
956 /* Threads reporting. */
958 void __kmp_itt_thread_ignore() {
960 } // __kmp_itt_thread_ignore
962 void __kmp_itt_thread_name(int gtid) {
964 if (__itt_thr_name_set_ptr) {
966 __kmp_str_buf_init(&name);
967 if (KMP_MASTER_GTID(gtid)) {
968 __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid);
970 __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid);
972 KMP_ITT_DEBUG_LOCK();
973 __itt_thr_name_set(name.str, name.used);
974 KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str);
975 __kmp_str_buf_free(&name);
978 } // __kmp_itt_thread_name
980 /* System object reporting.
981 ITT catches operations with system sync objects (like Windows* OS on IA-32
982 architecture API critical sections and events). We only need to specify
983 name ("OMP Scheduler") for the object to let ITT know it is an object used
984 by OpenMP RTL for internal purposes. */
986 void __kmp_itt_system_object_created(void *object, char const *name) {
988 KMP_ITT_DEBUG_LOCK();
989 __itt_sync_create(object, "OMP Scheduler", name, 0);
990 KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n",
993 } // __kmp_itt_system_object_created
995 /* Stack stitching api.
996 Master calls "create" and put the stitching id into team structure.
997 Workers read the stitching id and call "enter" / "leave" api.
998 Master calls "destroy" at the end of the parallel region. */
1000 __itt_caller __kmp_itt_stack_caller_create() {
1002 if (!__itt_stack_caller_create_ptr)
1004 KMP_ITT_DEBUG_LOCK();
1005 __itt_caller id = __itt_stack_caller_create();
1006 KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id);
1012 void __kmp_itt_stack_caller_destroy(__itt_caller id) {
1014 if (__itt_stack_caller_destroy_ptr) {
1015 KMP_ITT_DEBUG_LOCK();
1016 __itt_stack_caller_destroy(id);
1017 KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id);
1022 void __kmp_itt_stack_callee_enter(__itt_caller id) {
1024 if (__itt_stack_callee_enter_ptr) {
1025 KMP_ITT_DEBUG_LOCK();
1026 __itt_stack_callee_enter(id);
1027 KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id);
1032 void __kmp_itt_stack_callee_leave(__itt_caller id) {
1034 if (__itt_stack_callee_leave_ptr) {
1035 KMP_ITT_DEBUG_LOCK();
1036 __itt_stack_callee_leave(id);
1037 KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id);
1042 #endif /* USE_ITT_BUILD */