]> git.ipfire.org Git - thirdparty/gcc.git/blob - libgomp/oacc-profiling.c
tree-optimization/95495 - use SLP_TREE_REPRESENTATIVE in assertion
[thirdparty/gcc.git] / libgomp / oacc-profiling.c
1 /* OpenACC Profiling Interface
2
3 Copyright (C) 2019-2020 Free Software Foundation, Inc.
4
5 Contributed by Mentor, a Siemens Business.
6
7 This file is part of the GNU Offloading and Multi Processing Library
8 (libgomp).
9
10 Libgomp is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 3, or (at your option)
13 any later version.
14
15 Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
17 FOR A PARTICULAR PURPOSE. See the GNU General Public License for
18 more details.
19
20 Under Section 7 of GPL version 3, you are granted additional
21 permissions described in the GCC Runtime Library Exception, version
22 3.1, as published by the Free Software Foundation.
23
24 You should have received a copy of the GNU General Public License and
25 a copy of the GCC Runtime Library Exception along with this program;
26 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
27 <http://www.gnu.org/licenses/>. */
28
29 #define _GNU_SOURCE
30 #include "libgomp.h"
31 #include "oacc-int.h"
32 #include "secure_getenv.h"
33 #include "acc_prof.h"
34 #include <assert.h>
35 #ifdef HAVE_STRING_H
36 # include <string.h>
37 #endif
38 #ifdef PLUGIN_SUPPORT
39 # include <dlfcn.h>
40 #endif
41
42 #define STATIC_ASSERT(expr) _Static_assert (expr, "!(" #expr ")")
43
44 /* Statically assert that the layout of the common fields in the
45 'acc_event_info' variants matches. */
46 /* 'event_type' */
47 STATIC_ASSERT (offsetof (acc_event_info, event_type)
48 == offsetof (acc_event_info, data_event.event_type));
49 STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
50 == offsetof (acc_event_info, launch_event.event_type));
51 STATIC_ASSERT (offsetof (acc_event_info, data_event.event_type)
52 == offsetof (acc_event_info, other_event.event_type));
53 /* 'valid_bytes' */
54 STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
55 == offsetof (acc_event_info, launch_event.valid_bytes));
56 STATIC_ASSERT (offsetof (acc_event_info, data_event.valid_bytes)
57 == offsetof (acc_event_info, other_event.valid_bytes));
58 /* 'parent_construct' */
59 STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
60 == offsetof (acc_event_info, launch_event.parent_construct));
61 STATIC_ASSERT (offsetof (acc_event_info, data_event.parent_construct)
62 == offsetof (acc_event_info, other_event.parent_construct));
63 /* 'implicit' */
64 STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
65 == offsetof (acc_event_info, launch_event.implicit));
66 STATIC_ASSERT (offsetof (acc_event_info, data_event.implicit)
67 == offsetof (acc_event_info, other_event.implicit));
68 /* 'tool_info' */
69 STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
70 == offsetof (acc_event_info, launch_event.tool_info));
71 STATIC_ASSERT (offsetof (acc_event_info, data_event.tool_info)
72 == offsetof (acc_event_info, other_event.tool_info));
73
74 struct goacc_prof_callback_entry
75 {
76 acc_prof_callback cb;
77 int ref;
78 bool enabled;
79 struct goacc_prof_callback_entry *next;
80 };
81
82 /* Use a separate flag to minimize run-time performance impact for the (very
83 common) case that profiling is not enabled.
84
85 Once enabled, we're not going to disable this anymore, anywhere. We
86 probably could, by adding appropriate logic to 'acc_prof_register',
87 'acc_prof_unregister'. */
88 bool goacc_prof_enabled = false;
89
90 /* Global state for registered callbacks.
91 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
92 static bool goacc_prof_callbacks_enabled[acc_ev_last];
93 static struct goacc_prof_callback_entry *goacc_prof_callback_entries[acc_ev_last];
94 /* Lock used to protect access to 'goacc_prof_callbacks_enabled', and
95 'goacc_prof_callback_entries'. */
96 static gomp_mutex_t goacc_prof_lock;
97
98 void
99 goacc_profiling_initialize (void)
100 {
101 gomp_mutex_init (&goacc_prof_lock);
102
103 /* Initially, all callbacks for all events are enabled. */
104 for (int i = 0; i < acc_ev_last; ++i)
105 goacc_prof_callbacks_enabled[i] = true;
106
107
108 #ifdef PLUGIN_SUPPORT
109 char *acc_proflibs = secure_getenv ("ACC_PROFLIB");
110 while (acc_proflibs != NULL && acc_proflibs[0] != '\0')
111 {
112 char *acc_proflibs_sep = strchr (acc_proflibs, ';');
113 char *acc_proflib;
114 if (acc_proflibs_sep == acc_proflibs)
115 {
116 /* Stray ';' separator: make sure we don't 'dlopen' the main
117 program. */
118 acc_proflib = NULL;
119 }
120 else
121 {
122 if (acc_proflibs_sep != NULL)
123 {
124 /* Single out the first library. */
125 acc_proflib = gomp_malloc (acc_proflibs_sep - acc_proflibs + 1);
126 memcpy (acc_proflib, acc_proflibs,
127 acc_proflibs_sep - acc_proflibs);
128 acc_proflib[acc_proflibs_sep - acc_proflibs] = '\0';
129 }
130 else
131 {
132 /* No ';' separator, so only one library. */
133 acc_proflib = acc_proflibs;
134 }
135
136 gomp_debug (0, "%s: dlopen (\"%s\")\n", __FUNCTION__, acc_proflib);
137 void *dl_handle = dlopen (acc_proflib, RTLD_LAZY);
138 if (dl_handle != NULL)
139 {
140 typeof (&acc_register_library) a_r_l
141 = dlsym (dl_handle, "acc_register_library");
142 if (a_r_l == NULL)
143 goto dl_fail;
144 gomp_debug (0, " %s: calling %s:acc_register_library\n",
145 __FUNCTION__, acc_proflib);
146 a_r_l (acc_prof_register, acc_prof_unregister,
147 acc_prof_lookup);
148 }
149 else
150 {
151 dl_fail:
152 gomp_error ("while loading ACC_PROFLIB \"%s\": %s",
153 acc_proflib, dlerror ());
154 if (dl_handle != NULL)
155 {
156 int err = dlclose (dl_handle);
157 dl_handle = NULL;
158 if (err != 0)
159 goto dl_fail;
160 }
161 }
162 }
163
164 if (acc_proflib != acc_proflibs)
165 {
166 free (acc_proflib);
167
168 acc_proflibs = acc_proflibs_sep + 1;
169 }
170 else
171 acc_proflibs = NULL;
172 }
173 #endif /* PLUGIN_SUPPORT */
174 }
175
176 void
177 acc_prof_register (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
178 {
179 gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
180 __FUNCTION__, (int) ev, (void *) cb, (int) reg);
181
182
183 /* For any events to be dispatched, the user first has to register a
184 callback, which makes this here a good place for enabling the whole
185 machinery. */
186 if (!GOACC_PROF_ENABLED)
187 __atomic_store_n (&goacc_prof_enabled, true, MEMMODEL_RELEASE);
188
189
190 enum
191 {
192 EVENT_KIND_BOGUS,
193 EVENT_KIND_NORMAL,
194 /* As end events invoke callbacks in the reverse order, we register these
195 in the reverse order here. */
196 EVENT_KIND_END,
197 } event_kind = EVENT_KIND_BOGUS;
198 switch (ev)
199 {
200 case acc_ev_none:
201 case acc_ev_device_init_start:
202 case acc_ev_device_shutdown_start:
203 case acc_ev_runtime_shutdown:
204 case acc_ev_create:
205 case acc_ev_delete:
206 case acc_ev_alloc:
207 case acc_ev_free:
208 case acc_ev_enter_data_start:
209 case acc_ev_exit_data_start:
210 case acc_ev_update_start:
211 case acc_ev_compute_construct_start:
212 case acc_ev_enqueue_launch_start:
213 case acc_ev_enqueue_upload_start:
214 case acc_ev_enqueue_download_start:
215 case acc_ev_wait_start:
216 event_kind = EVENT_KIND_NORMAL;
217 break;
218 case acc_ev_device_init_end:
219 case acc_ev_device_shutdown_end:
220 case acc_ev_enter_data_end:
221 case acc_ev_exit_data_end:
222 case acc_ev_update_end:
223 case acc_ev_compute_construct_end:
224 case acc_ev_enqueue_launch_end:
225 case acc_ev_enqueue_upload_end:
226 case acc_ev_enqueue_download_end:
227 case acc_ev_wait_end:
228 event_kind = EVENT_KIND_END;
229 break;
230 case acc_ev_last:
231 break;
232 }
233 if (event_kind == EVENT_KIND_BOGUS)
234 {
235 /* Silently ignore. */
236 gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
237 return;
238 }
239
240 bool bogus = true;
241 switch (reg)
242 {
243 case acc_reg:
244 case acc_toggle:
245 case acc_toggle_per_thread:
246 bogus = false;
247 break;
248 }
249 if (bogus)
250 {
251 /* Silently ignore. */
252 gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
253 return;
254 }
255
256 /* Special cases. */
257 if (reg == acc_toggle)
258 {
259 if (cb == NULL)
260 {
261 gomp_debug (0, " globally enabling callbacks\n");
262 gomp_mutex_lock (&goacc_prof_lock);
263 /* For 'acc_ev_none', this acts as a global toggle. */
264 goacc_prof_callbacks_enabled[ev] = true;
265 gomp_mutex_unlock (&goacc_prof_lock);
266 return;
267 }
268 else if (ev == acc_ev_none && cb != NULL)
269 {
270 gomp_debug (0, " ignoring request\n");
271 return;
272 }
273 }
274 else if (reg == acc_toggle_per_thread)
275 {
276 if (ev == acc_ev_none && cb == NULL)
277 {
278 gomp_debug (0, " thread: enabling callbacks\n");
279 goacc_lazy_initialize ();
280 struct goacc_thread *thr = goacc_thread ();
281 thr->prof_callbacks_enabled = true;
282 return;
283 }
284 /* Silently ignore. */
285 gomp_debug (0, " ignoring bogus request\n");
286 return;
287 }
288
289 gomp_mutex_lock (&goacc_prof_lock);
290
291 struct goacc_prof_callback_entry *it, *it_p;
292 it = goacc_prof_callback_entries[ev];
293 it_p = NULL;
294 while (it)
295 {
296 if (it->cb == cb)
297 break;
298 it_p = it;
299 it = it->next;
300 }
301
302 switch (reg)
303 {
304 case acc_reg:
305 /* If we already have this callback registered, just increment its
306 reference count. */
307 if (it != NULL)
308 {
309 it->ref++;
310 gomp_debug (0, " already registered;"
311 " incrementing reference count to: %d\n", it->ref);
312 }
313 else
314 {
315 struct goacc_prof_callback_entry *e
316 = gomp_malloc (sizeof (struct goacc_prof_callback_entry));
317 e->cb = cb;
318 e->ref = 1;
319 e->enabled = true;
320 bool prepend = (event_kind == EVENT_KIND_END);
321 /* If we don't have any callback registered yet, also use the
322 'prepend' code path. */
323 if (it_p == NULL)
324 prepend = true;
325 if (prepend)
326 {
327 gomp_debug (0, " prepending\n");
328 e->next = goacc_prof_callback_entries[ev];
329 goacc_prof_callback_entries[ev] = e;
330 }
331 else
332 {
333 gomp_debug (0, " appending\n");
334 e->next = NULL;
335 it_p->next = e;
336 }
337 }
338 break;
339
340 case acc_toggle:
341 if (it == NULL)
342 {
343 gomp_debug (0, " ignoring request: is not registered\n");
344 break;
345 }
346 else
347 {
348 gomp_debug (0, " enabling\n");
349 it->enabled = true;
350 }
351 break;
352
353 case acc_toggle_per_thread:
354 __builtin_unreachable ();
355 }
356
357 gomp_mutex_unlock (&goacc_prof_lock);
358 }
359
360 void
361 acc_prof_unregister (acc_event_t ev, acc_prof_callback cb, acc_register_t reg)
362 {
363 gomp_debug (0, "%s: ev=%d, cb=%p, reg=%d\n",
364 __FUNCTION__, (int) ev, (void *) cb, (int) reg);
365
366 /* If profiling is not enabled, there cannot be anything to unregister. */
367 if (!GOACC_PROF_ENABLED)
368 return;
369
370 if (ev < acc_ev_none
371 || ev >= acc_ev_last)
372 {
373 /* Silently ignore. */
374 gomp_debug (0, " ignoring request for bogus 'acc_event_t'\n");
375 return;
376 }
377
378 bool bogus = true;
379 switch (reg)
380 {
381 case acc_reg:
382 case acc_toggle:
383 case acc_toggle_per_thread:
384 bogus = false;
385 break;
386 }
387 if (bogus)
388 {
389 /* Silently ignore. */
390 gomp_debug (0, " ignoring request with bogus 'acc_register_t'\n");
391 return;
392 }
393
394 /* Special cases. */
395 if (reg == acc_toggle)
396 {
397 if (cb == NULL)
398 {
399 gomp_debug (0, " globally disabling callbacks\n");
400 gomp_mutex_lock (&goacc_prof_lock);
401 /* For 'acc_ev_none', this acts as a global toggle. */
402 goacc_prof_callbacks_enabled[ev] = false;
403 gomp_mutex_unlock (&goacc_prof_lock);
404 return;
405 }
406 else if (ev == acc_ev_none && cb != NULL)
407 {
408 gomp_debug (0, " ignoring request\n");
409 return;
410 }
411 }
412 else if (reg == acc_toggle_per_thread)
413 {
414 if (ev == acc_ev_none && cb == NULL)
415 {
416 gomp_debug (0, " thread: disabling callbacks\n");
417 goacc_lazy_initialize ();
418 struct goacc_thread *thr = goacc_thread ();
419 thr->prof_callbacks_enabled = false;
420 return;
421 }
422 /* Silently ignore. */
423 gomp_debug (0, " ignoring bogus request\n");
424 return;
425 }
426
427 gomp_mutex_lock (&goacc_prof_lock);
428
429 struct goacc_prof_callback_entry *it, *it_p;
430 it = goacc_prof_callback_entries[ev];
431 it_p = NULL;
432 while (it)
433 {
434 if (it->cb == cb)
435 break;
436 it_p = it;
437 it = it->next;
438 }
439
440 switch (reg)
441 {
442 case acc_reg:
443 if (it == NULL)
444 {
445 /* Silently ignore. */
446 gomp_debug (0, " ignoring bogus request: is not registered\n");
447 break;
448 }
449 it->ref--;
450 gomp_debug (0, " decrementing reference count to: %d\n", it->ref);
451 if (it->ref == 0)
452 {
453 if (it_p == NULL)
454 goacc_prof_callback_entries[ev] = it->next;
455 else
456 it_p->next = it->next;
457 free (it);
458 }
459 break;
460
461 case acc_toggle:
462 if (it == NULL)
463 {
464 gomp_debug (0, " ignoring request: is not registered\n");
465 break;
466 }
467 else
468 {
469 gomp_debug (0, " disabling\n");
470 it->enabled = false;
471 }
472 break;
473
474 case acc_toggle_per_thread:
475 __builtin_unreachable ();
476 }
477
478 gomp_mutex_unlock (&goacc_prof_lock);
479 }
480
481 acc_query_fn
482 acc_prof_lookup (const char *name)
483 {
484 gomp_debug (0, "%s (%s)\n",
485 __FUNCTION__, name ?: "NULL");
486
487 return NULL;
488 }
489
490 void
491 acc_register_library (acc_prof_reg reg, acc_prof_reg unreg,
492 acc_prof_lookup_func lookup)
493 {
494 gomp_fatal ("TODO");
495 }
496
497 /* Prepare to dispatch events? */
498
499 bool
500 _goacc_profiling_dispatch_p (bool check_not_nested_p)
501 {
502 gomp_debug (0, "%s\n", __FUNCTION__);
503
504 bool ret;
505
506 struct goacc_thread *thr = goacc_thread ();
507 if (__builtin_expect (thr == NULL, false))
508 {
509 /* If we don't have any per-thread state yet, that means that per-thread
510 callback dispatch has not been explicitly disabled (which only a call
511 to 'acc_prof_unregister' with 'acc_toggle_per_thread' would do, and
512 that would have allocated per-thread state via
513 'goacc_lazy_initialize'); initially, all callbacks for all events are
514 enabled. */
515 gomp_debug (0, " %s: don't have any per-thread state yet\n", __FUNCTION__);
516 }
517 else
518 {
519 if (check_not_nested_p)
520 {
521 /* No nesting. */
522 assert (thr->prof_info == NULL);
523 assert (thr->api_info == NULL);
524 }
525
526 if (__builtin_expect (!thr->prof_callbacks_enabled, true))
527 {
528 gomp_debug (0, " %s: disabled for this thread\n", __FUNCTION__);
529 ret = false;
530 goto out;
531 }
532 }
533
534 gomp_mutex_lock (&goacc_prof_lock);
535
536 /* 'goacc_prof_callbacks_enabled[acc_ev_none]' acts as a global toggle. */
537 if (__builtin_expect (!goacc_prof_callbacks_enabled[acc_ev_none], true))
538 {
539 gomp_debug (0, " %s: disabled globally\n", __FUNCTION__);
540 ret = false;
541 goto out_unlock;
542 }
543 else
544 ret = true;
545
546 out_unlock:
547 gomp_mutex_unlock (&goacc_prof_lock);
548
549 out:
550 return ret;
551 }
552
553 /* Set up to dispatch events? */
554
555 bool
556 _goacc_profiling_setup_p (struct goacc_thread *thr,
557 acc_prof_info *prof_info, acc_api_info *api_info)
558 {
559 gomp_debug (0, "%s (%p)\n", __FUNCTION__, thr);
560
561 /* If we don't have any per-thread state yet, we can't register 'prof_info'
562 and 'api_info'. */
563 if (__builtin_expect (thr == NULL, false))
564 {
565 gomp_debug (0, "Can't dispatch OpenACC Profiling Interface events for"
566 " the current call, construct, or directive\n");
567 return false;
568 }
569
570 if (thr->prof_info != NULL)
571 {
572 /* Profiling has already been set up for an outer construct. In this
573 case, we continue to use the existing information, and thus return
574 'false' here.
575
576 This can happen, for example, for an 'enter data' directive, which
577 sets up profiling, then calls into 'acc_copyin', which should not
578 again set up profiling, should not overwrite the existing
579 information. */
580 return false;
581 }
582
583 thr->prof_info = prof_info;
584 thr->api_info = api_info;
585
586 /* Fill in some defaults. */
587
588 prof_info->event_type = -1; /* Must be set later. */
589 prof_info->valid_bytes = _ACC_PROF_INFO_VALID_BYTES;
590 prof_info->version = _ACC_PROF_INFO_VERSION;
591 if (thr->dev)
592 {
593 prof_info->device_type = acc_device_type (thr->dev->type);
594 prof_info->device_number = thr->dev->target_id;
595 }
596 else
597 {
598 prof_info->device_type = -1;
599 prof_info->device_number = -1;
600 }
601 prof_info->thread_id = -1;
602 prof_info->async = acc_async_sync;
603 prof_info->async_queue = prof_info->async;
604 prof_info->src_file = NULL;
605 prof_info->func_name = NULL;
606 prof_info->line_no = -1;
607 prof_info->end_line_no = -1;
608 prof_info->func_line_no = -1;
609 prof_info->func_end_line_no = -1;
610
611 api_info->device_api = acc_device_api_none;
612 api_info->valid_bytes = _ACC_API_INFO_VALID_BYTES;
613 api_info->device_type = prof_info->device_type;
614 api_info->vendor = -1;
615 api_info->device_handle = NULL;
616 api_info->context_handle = NULL;
617 api_info->async_handle = NULL;
618
619 return true;
620 }
621
622 /* Dispatch events.
623
624 This must only be called if 'GOACC_PROFILING_DISPATCH_P' or
625 'GOACC_PROFILING_SETUP_P' returned a true result. */
626
627 void
628 goacc_profiling_dispatch (acc_prof_info *prof_info, acc_event_info *event_info,
629 acc_api_info *apt_info)
630 {
631 acc_event_t event_type = event_info->event_type;
632 gomp_debug (0, "%s: event_type=%d\n", __FUNCTION__, (int) event_type);
633 assert (event_type > acc_ev_none
634 && event_type < acc_ev_last);
635
636 gomp_mutex_lock (&goacc_prof_lock);
637
638 if (!goacc_prof_callbacks_enabled[event_type])
639 {
640 gomp_debug (0, " disabled for this event type\n");
641
642 goto out_unlock;
643 }
644
645 for (struct goacc_prof_callback_entry *e
646 = goacc_prof_callback_entries[event_type];
647 e != NULL;
648 e = e->next)
649 {
650 if (!e->enabled)
651 {
652 gomp_debug (0, " disabled for callback %p\n", e->cb);
653 continue;
654 }
655
656 gomp_debug (0, " calling callback %p\n", e->cb);
657 e->cb (prof_info, event_info, apt_info);
658 }
659
660 out_unlock:
661 gomp_mutex_unlock (&goacc_prof_lock);
662 }