]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-profile.cc
combine: Fix ICE in try_combine on pr112494.c [PR112560]
[thirdparty/gcc.git] / gcc / tree-profile.cc
1 /* Calculate branch probabilities, and basic block execution counts.
2 Copyright (C) 1990-2024 Free Software Foundation, Inc.
3 Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
4 based on some ideas from Dain Samples of UC Berkeley.
5 Further mangling by Bob Manson, Cygnus Support.
6 Converted to use trees by Dale Johannesen, Apple Computer.
7
8 This file is part of GCC.
9
10 GCC is free software; you can redistribute it and/or modify it under
11 the terms of the GNU General Public License as published by the Free
12 Software Foundation; either version 3, or (at your option) any later
13 version.
14
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
18 for more details.
19
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING3. If not see
22 <http://www.gnu.org/licenses/>. */
23
24 /* Generate basic block profile instrumentation and auxiliary files.
25 Tree-based version. See profile.cc for overview. */
26
27 #include "config.h"
28 #include "system.h"
29 #include "coretypes.h"
30 #include "memmodel.h"
31 #include "backend.h"
32 #include "target.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "tree-pass.h"
37 #include "ssa.h"
38 #include "cgraph.h"
39 #include "coverage.h"
40 #include "diagnostic-core.h"
41 #include "fold-const.h"
42 #include "varasm.h"
43 #include "tree-nested.h"
44 #include "gimplify.h"
45 #include "gimple-iterator.h"
46 #include "gimplify-me.h"
47 #include "tree-cfg.h"
48 #include "tree-into-ssa.h"
49 #include "value-prof.h"
50 #include "profile.h"
51 #include "tree-cfgcleanup.h"
52 #include "stringpool.h"
53 #include "attribs.h"
54 #include "tree-pretty-print.h"
55 #include "langhooks.h"
56 #include "stor-layout.h"
57 #include "xregex.h"
58 #include "alloc-pool.h"
59 #include "symbol-summary.h"
60 #include "symtab-thunks.h"
61 #include "cfganal.h"
62
63 static GTY(()) tree gcov_type_node;
64 static GTY(()) tree tree_interval_profiler_fn;
65 static GTY(()) tree tree_pow2_profiler_fn;
66 static GTY(()) tree tree_topn_values_profiler_fn;
67 static GTY(()) tree tree_indirect_call_profiler_fn;
68 static GTY(()) tree tree_average_profiler_fn;
69 static GTY(()) tree tree_ior_profiler_fn;
70 static GTY(()) tree tree_time_profiler_counter;
71
72
73 static GTY(()) tree ic_tuple_var;
74 static GTY(()) tree ic_tuple_counters_field;
75 static GTY(()) tree ic_tuple_callee_field;
76
77 /* Types of counter update methods.
78
79 By default, the counter updates are done for a single threaded system
80 (COUNTER_UPDATE_SINGLE_THREAD).
81
82 If the user selected atomic profile counter updates
83 (-fprofile-update=atomic), then the counter updates will be done atomically
84 on a best-effort basis. One of three methods to do the counter updates is
85 selected according to the target capabilities.
86
87 Ideally, the counter updates are done through atomic operations in hardware
88 (COUNTER_UPDATE_ATOMIC_BUILTIN).
89
90 If the target supports only 32-bit atomic increments and gcov_type_node is a
91 64-bit integer type, then for the profile edge counters the increment is
92 performed through two separate 32-bit atomic increments
93 (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL). If the
94 target supports libatomic (targetm.have_libatomic), then other counter
95 updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT).
96 If the target does not support libatomic, then the other counter updates are
97 not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is
98 issued.
99
100 If the target does not support atomic operations in hardware, however, it
101 supports libatomic, then all updates are carried out by libatomic calls
102 (COUNTER_UPDATE_ATOMIC_BUILTIN). */
103 enum counter_update_method {
104 COUNTER_UPDATE_SINGLE_THREAD,
105 COUNTER_UPDATE_ATOMIC_BUILTIN,
106 COUNTER_UPDATE_ATOMIC_SPLIT,
107 COUNTER_UPDATE_ATOMIC_PARTIAL
108 };
109
110 static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD;
111
112 /* These functions support measuring modified conditition/decision coverage
113 (MC/DC). MC/DC requires all of the below during testing:
114
115 - Each entry and exit point is invoked
116 - Each decision takes every possible outcome
117 - Each condition in a decision takes every possible outcome
118 - Each condition in a decision is shown to independently affect the outcome
119 of the decision
120
121 Independence of a condition is shown by recording it being evaluated to a
122 value (true/false) and not being made irrelevant ("masked") by a later term.
123 This feature adds some instrumentation code, a few bitwise operators, that
124 records the branches taken in conditions and applies a filter for the
125 masking effect. Masking is essentially short-circuiting in reverse: a
126 condition does not contribute to the outcome if it would short circuit the
127 (sub) expression if it was evaluated right-to-left, (_ && false) and (_ ||
128 true).
129
130 The program is essentially rewritten this way:
131
132 - if (a || b) { fn () }
133 + if (a) { _t |= 0x1; goto _then; }
134 + else { _f |= 0x1;
135 + if (b) { _t |= 0x2; _mask |= 0x1; goto _then; }
136 + else { _f |= 0x2; goto _else; }
137 + _then:
138 + _gcov_t |= (_t & _mask);
139 + _gcov_f |= (_f & _mask);
140 + fn (); goto _end;
141 + _else:
142 + _gcov_t |= (_t & _mask);
143 + _gcov_f |= (_f & _mask);
144 + fn ();
145 + _end:
146
147 It is assumed the front end will provide discrimnators so that conditional
148 basic blocks (basic block with a conditional jump and outgoing true/false
149 edges) that belong to the same Boolean expression have the same
150 discriminator. Masking is determined by analyzing these expressions as a
151 reduced order binary decision diagram. */
152 namespace
153 {
154 /* Some context and reused instances between function calls. Large embedded
155 buffers are used to up-front request enough memory for most programs and
156 merge them into a single allocation at the cost of using more memory in the
157 average case. Some numbers from linux v5.13 which is assumed to be a
158 reasonably diverse code base: 75% of the functions in linux have less than
159 16 nodes in the CFG and approx 2.5% have more than 64 nodes. The functions
160 that go beyond a few dozen nodes tend to be very large (>100) and so 64
161 seems like a good balance.
162
163 This is really just a performance balance of the cost of allocation and
164 wasted memory. */
165 struct conds_ctx
166 {
167 /* This is both a reusable shared allocation which is also used to return
168 single expressions, which means it for most code should only hold a
169 couple of elements. */
170 auto_vec<basic_block, 64> blocks;
171
172 /* Index for the topological order indexed by basic_block->index to an
173 ordering so that expression (a || b && c) => top_index[a] < top_index[b]
174 < top_index[c]. */
175 auto_vec<int, 256> top_index;
176
177 /* Pre-allocate bitmaps and vectors for per-function book keeping. This is
178 pure instance reuse and the bitmaps carry no data between function
179 calls. */
180 auto_vec<basic_block, 64> B1;
181 auto_vec<basic_block, 64> B2;
182 auto_sbitmap G1;
183 auto_sbitmap G2;
184 auto_sbitmap G3;
185
186 explicit conds_ctx (unsigned size) noexcept (true) : G1 (size), G2 (size),
187 G3 (size)
188 {
189 }
190 };
191
192 /* Only instrument terms with fewer than number of bits in a (wide) gcov
193 integer, which is probably 64. The algorithm itself does not impose this
194 limitation, but it makes for a simpler implementation.
195
196 * Allocating the output data structure (coverage_counter_alloc ()) can
197 assume pairs of gcov_type_unsigned and not use a separate length field.
198 * A pair gcov_type_unsigned can be used as accumulators.
199 * Updating accumulators is can use the bitwise operations |=, &= and not
200 custom operators that work for arbitrary-sized bit-sets.
201
202 Most real-world code should be unaffected by this, but it is possible
203 (especially for generated code) to exceed this limit. */
204 #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node))
205 #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
206
207 /* Compare two basic blocks by their order in the expression i.e. for (a || b)
208 then topological_cmp (a, b, ...) < 0. The result is undefined if LHS, RHS
209 belong to different expressions. The TOP_INDEX argument should be the
210 top_index vector from ctx. */
211 int
212 topological_cmp (const void *lhs, const void *rhs, void *top_index)
213 {
214 const_basic_block l = *(const basic_block*) lhs;
215 const_basic_block r = *(const basic_block*) rhs;
216 const vec<int>* im = (const vec<int>*) top_index;
217 return (*im)[l->index] - (*im)[r->index];
218 }
219
220 /* Find the index of NEEDLE in BLOCKS; return -1 if not found. This has two
221 uses, sometimes for the index and sometimes for set member checks. Sets are
222 typically very small (number of conditions, >8 is uncommon) so linear search
223 should be very fast. */
224 int
225 index_of (const basic_block needle, array_slice<basic_block> blocks)
226 {
227 for (size_t i = 0; i < blocks.size (); i++)
228 if (blocks[i] == needle)
229 return int (i);
230 return -1;
231 }
232
233 /* Special cases of the single_*_p and single_*_edge functions in basic-block.h
234 that don't consider exception handling or other complex edges. This helps
235 create a view of the CFG with only normal edges - if a basic block has both
236 an outgoing fallthrough and exceptional edge, it should be considered a
237 single-successor. */
238 bool
239 single_p (const vec<edge, va_gc> *edges)
240 {
241 int n = EDGE_COUNT (edges);
242 if (n == 0)
243 return false;
244
245 for (edge e : edges)
246 if (e->flags & EDGE_COMPLEX)
247 n -= 1;
248
249 return n == 1;
250 }
251
252 /* Get the single, non-complex edge. Behavior is undefined edges have more
253 than 1 non-complex edges. */
254 edge
255 single_edge (const vec<edge, va_gc> *edges)
256 {
257 gcc_checking_assert (single_p (edges));
258 for (edge e : edges)
259 {
260 if (e->flags & EDGE_COMPLEX)
261 continue;
262 return e;
263 }
264 return NULL;
265 }
266
267 /* Sometimes, for example with function calls, goto labels, and C++
268 destructors, the CFG gets extra nodes that are essentially single-entry
269 single-exit in the middle of boolean expressions. For example:
270
271 x || can_throw (y)
272
273 A
274 /|
275 / |
276 B |
277 | |
278 C |
279 / \ |
280 / \|
281 F T
282
283 Without the extra node inserted by the function + exception it becomes a
284 proper 2-term graph, not 2 single-term graphs.
285
286 A
287 /|
288 C |
289 / \|
290 F T
291
292 This function finds the source edge of these paths. This is often the
293 identity function. */
294 edge
295 contract_edge_up (edge e)
296 {
297 while (true)
298 {
299 basic_block src = e->src;
300 if (!single_p (src->preds))
301 return e;
302 if (!single_p (src->succs))
303 return e;
304 e = single_edge (src->preds);
305 }
306 }
307
308 /* A simple struct for storing/returning outcome block pairs. Either both
309 blocks are set or both are NULL. */
310 struct outcomes
311 {
312 basic_block t = NULL;
313 basic_block f = NULL;
314
315 operator bool () const noexcept (true)
316 {
317 return t && f;
318 }
319 };
320
321 /* Get the true/false successors of a basic block. If b is not a conditional
322 block both edges are NULL. */
323 outcomes
324 conditional_succs (const basic_block b)
325 {
326 outcomes c;
327 for (edge e : b->succs)
328 {
329 if (e->flags & EDGE_TRUE_VALUE)
330 c.t = e->dest;
331 if (e->flags & EDGE_FALSE_VALUE)
332 c.f = e->dest;
333 }
334
335 gcc_assert ((c.t && c.f) || (!c.t && !c.f));
336 return c;
337 }
338
339 /* Get the index or offset of a conditional flag, 0 for true and 1 for false.
340 These indices carry no semantics but must be consistent as they are used to
341 index into data structures in code generation and gcov. */
342 unsigned
343 condition_index (unsigned flag)
344 {
345 return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1;
346 }
347
348 /* Returns the condition identifier for the basic block if set, otherwise 0.
349 This is only meaningful in GIMPLE and is used for condition coverage.
350
351 There may be conditions created that did not get an uid, such as those
352 implicitly created by destructors. We could include them in the condition
353 coverage for completeness (i.e. condition coverage implies (implicit) branch
354 coverage), but they have no natural buckets and should all be single-term.
355 For now these are ignored and given uid = 0, and branch coverage is left to
356 -fprofile-arcs.
357
358 Under optimization, COND_EXPRs may be folded, replaced with switches,
359 min-max, etc., which leaves ghost identifiers in basic blocks that do not
360 end with a conditional jump. They are not really meaningful for condition
361 coverage anymore, but since coverage is unreliable under optimization anyway
362 this is not a big problem. */
363 unsigned
364 condition_uid (struct function *fn, basic_block b)
365 {
366 gimple *stmt = gsi_stmt (gsi_last_bb (b));
367 if (!safe_is_a<gcond *> (stmt))
368 return 0;
369
370 unsigned *v = fn->cond_uids->get (as_a <gcond*> (stmt));
371 return v ? *v : 0;
372 }
373
374 /* Compute the masking table.
375
376 Masking and short circuiting are deeply connected - masking occurs when
377 control flow reaches a state that is also reachable with short circuiting.
378 In fact, masking corresponds to short circuiting for the reversed
379 expression. This means we can find the limits, the last term in preceeding
380 subexpressions, by following the edges that short circuit to the same
381 outcome. The algorithm treats the CFG as a reduced order binary decision
382 diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean
383 Function Manipulation (1987)).
384
385 In the simplest case a || b:
386
387 a
388 |\
389 | b
390 |/ \
391 T F
392
393 T has has multiple incoming edges and is the outcome of a short circuit,
394 with top = a, bot = b. The top node (a) is masked when the edge (b, T) is
395 taken.
396
397 The names "top" and "bot" refer to a pair of nodes with a shared
398 successor. The top is always the node corresponding to the left-most
399 operand of the two, and it holds that top < bot in a topological ordering.
400
401 Now consider (a && b) || (c && d) and its masking table:
402
403 a
404 |\
405 b \
406 |\|
407 | c
408 | |\
409 | d \
410 |/ \|
411 T F
412
413 a[0] = {}
414 a[1] = {}
415 b[0] = {a}
416 b[1] = {}
417 c[0] = {}
418 c[1] = {}
419 d[0] = {c}
420 d[1] = {a,b}
421
422 Note that 0 and 1 are indices and not boolean values - a[0] is the index in
423 the masking vector when a takes the true edge.
424
425 b[0] and d[0] are identical to the a || b example, and d[1] is the bot in
426 the triangle [d, b] -> T. b is the top node in the [d, b] relationship and
427 last term in (a && b). To find the other terms masked we use the fact that
428 all paths in an expression go through either of the outcomes, found by
429 collecting all non-complex edges that go out of the expression (the
430 neighborhood). In some cases the outgoing edge go through intermediate (or
431 bypass) nodes, and we collect these paths too (see contract_edge_up).
432
433 We find the terms by marking the outcomes (in this case c, T) and walk the
434 predecessors starting at top (in this case b) and masking nodes when both
435 successors are marked.
436
437 The masking table is represented as two bitfields per term in the expression
438 with the index corresponding to the term in the Boolean expression.
439 a || b && c becomes the term vector [a b c] and the masking table [a[0]
440 a[1] b[0] ...]. The kth bit of a masking vector is set if the the kth term
441 is masked by taking the edge.
442
443 The out masks are in uint64_t (the practical maximum for gcov_type_node for
444 any target) as it has to be big enough to store the target size gcov types
445 independent of the host. */
446 void
447 masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks,
448 array_slice<sbitmap> maps, array_slice<uint64_t> masks)
449 {
450 gcc_assert (blocks.is_valid ());
451 gcc_assert (!blocks.empty ());
452 gcc_assert (maps.is_valid ());
453 gcc_assert (masks.is_valid ());
454 gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS);
455
456 if (bitmap_count_bits (maps[0]) == 1)
457 return;
458
459 sbitmap marks = ctx.G1;
460 const sbitmap core = maps[0];
461 const sbitmap allg = maps[1];
462 vec<basic_block>& queue = ctx.B1;
463 vec<basic_block>& body = ctx.B2;
464 const vec<int>& top_index = ctx.top_index;
465
466 /* Set up for the iteration - include the outcome nodes in the traversal.
467 The algorithm compares pairs of nodes and is not really sensitive to
468 traversal order, but need to maintain topological order because the
469 index of masking nodes maps to the index in the accumulators. We must
470 also check the incoming-to-outcome pairs. These edges may in turn be
471 split (this happens with labels on top of then/else blocks) so we must
472 follow any single-in single-out path. The non-condition blocks do not
473 have to be in order as they are non-condition blocks and will not be
474 considered for the set-bit index. */
475 body.truncate (0);
476 body.reserve (blocks.size () + 2);
477 for (const basic_block b : blocks)
478 if (bitmap_bit_p (core, b->index))
479 body.quick_push (b);
480
481 for (basic_block b : blocks)
482 {
483 if (!bitmap_bit_p (core, b->index))
484 continue;
485
486 for (edge e : b->succs)
487 {
488 if (e->flags & EDGE_COMPLEX)
489 continue;
490 if (bitmap_bit_p (allg, e->dest->index))
491 continue;
492 body.safe_push (e->dest);
493
494 /* There may be multiple nodes between the condition edge and the
495 actual outcome, and we need to know when these paths join to
496 determine if there is short circuit/masking. This is
497 effectively creating a virtual edge from the condition node to
498 the real outcome. */
499 while (!(e->flags & EDGE_DFS_BACK) && single_p (e->dest->succs))
500 {
501 e = single_edge (e->dest->succs);
502 body.safe_push (e->dest);
503 }
504 }
505 }
506
507 /* Find the masking. The leftmost element cannot mask anything, so
508 start at 1. */
509 for (size_t i = 1; i != body.length (); i++)
510 {
511 const basic_block b = body[i];
512 for (edge e1 : b->preds)
513 for (edge e2 : b->preds)
514 {
515 if (e1 == e2)
516 continue;
517 if ((e1->flags | e2->flags) & EDGE_COMPLEX)
518 continue;
519
520 edge etop = contract_edge_up (e1);
521 edge ebot = contract_edge_up (e2);
522 gcc_assert (etop != ebot);
523
524 const basic_block top = etop->src;
525 const basic_block bot = ebot->src;
526 const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION;
527 if (!cond)
528 continue;
529 if (top_index[top->index] > top_index[bot->index])
530 continue;
531 if (!bitmap_bit_p (core, top->index))
532 continue;
533 if (!bitmap_bit_p (core, bot->index))
534 continue;
535
536 outcomes out = conditional_succs (top);
537 gcc_assert (out);
538 bitmap_clear (marks);
539 bitmap_set_bit (marks, out.t->index);
540 bitmap_set_bit (marks, out.f->index);
541 queue.truncate (0);
542 queue.safe_push (top);
543
544 // The edge bot -> outcome triggers the masking
545 const int m = 2*index_of (bot, body) + condition_index (cond);
546 gcc_assert (m >= 0);
547 while (!queue.is_empty ())
548 {
549 basic_block q = queue.pop ();
550 /* q may have been processed & completed by being added to the
551 queue multiple times, so check that there is still work to
552 do before continuing. */
553 if (bitmap_bit_p (marks, q->index))
554 continue;
555
556 outcomes succs = conditional_succs (q);
557 if (!bitmap_bit_p (marks, succs.t->index))
558 continue;
559 if (!bitmap_bit_p (marks, succs.f->index))
560 continue;
561
562 const int index = index_of (q, body);
563 gcc_assert (index != -1);
564 masks[m] |= uint64_t (1) << index;
565 bitmap_set_bit (marks, q->index);
566
567 for (edge e : q->preds)
568 {
569 e = contract_edge_up (e);
570 if (e->flags & EDGE_DFS_BACK)
571 continue;
572 if (bitmap_bit_p (marks, e->src->index))
573 continue;
574 if (!bitmap_bit_p (core, e->src->index))
575 continue;
576 queue.safe_push (e->src);
577 }
578 }
579 }
580 }
581 }
582
583 /* Emit LHS = RHS on edges. This is just a short hand that automates the
584 building of the assign and immediately puts it on the edge, which becomes
585 noisy. */
586 tree
587 emit_assign (edge e, tree lhs, tree rhs)
588 {
589 gassign *w = gimple_build_assign (lhs, rhs);
590 gsi_insert_on_edge (e, w);
591 return lhs;
592 }
593
594 /* Emit lhs = RHS on edges. The lhs is created. */
595 tree
596 emit_assign (edge e, tree rhs)
597 {
598 return emit_assign (e, make_ssa_name (gcov_type_node), rhs);
599 }
600
601 /* Emit LHS = OP1 <OP> OP2 on edges. */
602 tree
603 emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE)
604 {
605 tree lhs = make_ssa_name (gcov_type_node);
606 gassign *w = gimple_build_assign (lhs, op, op1, op2);
607 gsi_insert_on_edge (e, w);
608 return lhs;
609 }
610
611 /* Visitor for make_top_index. */
612 void
613 make_top_index_visit (basic_block b, vec<basic_block>& L, vec<int>& marks)
614 {
615 if (marks[b->index])
616 return;
617
618 /* Follow the false edge first, if it exists, so that true paths are given
619 the lower index in the ordering. Any iteration order
620 would yield a valid and useful topological ordering, but making sure the
621 true branch has the lower index first makes reporting work better for
622 expressions with ternaries. Walk the false branch first because the
623 array will be reversed to finalize the topological order.
624
625 With the wrong ordering (a ? b : c) && d could become [a c b d], but the
626 (expected) order is really [a b c d]. */
627
628 const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE;
629 for (edge e : b->succs)
630 if ((e->flags & false_fwd) == EDGE_FALSE_VALUE)
631 make_top_index_visit (e->dest, L, marks);
632
633 for (edge e : b->succs)
634 if (!(e->flags & false_fwd))
635 make_top_index_visit (e->dest, L, marks);
636
637 marks[b->index] = 1;
638 L.quick_push (b);
639 }
640
641 /* Find a topological sorting of the blocks in a function so that left operands
642 are before right operands including subexpressions. Sorting on block index
643 does not guarantee this property and the syntactical order of terms is very
644 important to the condition coverage. The sorting algorithm is from Cormen
645 et al (2001) but with back-edges ignored and thus there is no need for
646 temporary marks (for cycle detection). The L argument is a buffer/working
647 memory, and the output will be written to TOP_INDEX.
648
649 For the expression (a || (b && c) || d) the blocks should be [a b c d]. */
650 void
651 make_top_index (array_slice<basic_block> blocks, vec<basic_block>& L,
652 vec<int>& top_index)
653 {
654 L.truncate (0);
655 L.reserve (blocks.size ());
656
657 /* Use of the output map as a temporary for tracking visited status. */
658 top_index.truncate (0);
659 top_index.safe_grow_cleared (blocks.size ());
660 for (const basic_block b : blocks)
661 make_top_index_visit (b, L, top_index);
662
663 /* Insert canaries - if there are unreachable nodes (for example infinite
664 loops) then the unreachable nodes should never be needed for comparison,
665 and L.length () < max_index. An index mapping should also never be
666 recorded twice. */
667 for (unsigned i = 0; i != top_index.length (); i++)
668 top_index[i] = -1;
669
670 gcc_assert (blocks.size () == L.length ());
671 L.reverse ();
672 const unsigned nblocks = L.length ();
673 for (unsigned i = 0; i != nblocks; i++)
674 {
675 gcc_assert (L[i]->index != -1);
676 top_index[L[i]->index] = int (i);
677 }
678 }
679
680 /* Find all nodes including non-conditions in a Boolean expression. We need to
681 know the paths through the expression so that the masking and
682 instrumentation phases can limit searches and know what subgraphs must be
683 threaded through, but not counted, such as the (b || c) in
684 a && fn (b || c) && d.
685
686 It is essentially the intersection of downwards paths from the expression
687 nodes EXPR to the post-dominator and upwards from the post-dominator.
688 Finding the dominator is slightly more involved than picking the first/last,
689 particularly under optimization, because both incoming and outgoing paths
690 may have multiple entries/exits.
691
692 It is assumed GRAPH is an array_slice of the basic blocks of this function
693 sorted by the basic block index. */
694 vec<basic_block>&
695 paths_between (conds_ctx &ctx, array_slice<basic_block> graph,
696 const vec<basic_block>& expr)
697 {
698 if (expr.length () == 1)
699 {
700 ctx.blocks.truncate (0);
701 ctx.blocks.safe_push (expr[0]);
702 return ctx.blocks;
703 }
704
705 basic_block dom;
706 sbitmap up = ctx.G1;
707 sbitmap down = ctx.G2;
708 sbitmap paths = ctx.G3;
709 vec<basic_block>& queue = ctx.B1;
710
711 queue.truncate (0);
712 bitmap_clear (down);
713 dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]);
714 for (basic_block b : expr)
715 if (dom != b)
716 dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b);
717 queue.safe_splice (expr);
718 while (!queue.is_empty ())
719 {
720 basic_block b = queue.pop ();
721 if (!bitmap_set_bit (down, b->index))
722 continue;
723 if (b == dom)
724 continue;
725 for (edge e : b->succs)
726 if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
727 queue.safe_push (e->dest);
728 }
729
730 queue.truncate (0);
731 bitmap_clear (up);
732 dom = expr[0];
733 for (basic_block b : expr)
734 if (dom != b)
735 dom = nearest_common_dominator (CDI_DOMINATORS, dom, b);
736 queue.safe_splice (expr);
737 while (!queue.is_empty ())
738 {
739 basic_block b = queue.pop ();
740 if (!bitmap_set_bit (up, b->index))
741 continue;
742 if (b == dom)
743 continue;
744 for (edge e : b->preds)
745 if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
746 queue.safe_push (e->src);
747 }
748
749 bitmap_and (paths, up, down);
750 vec<basic_block>& blocks = ctx.blocks;
751 blocks.truncate (0);
752 blocks.reserve (graph.size ());
753 sbitmap_iterator itr;
754 unsigned index;
755 EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr)
756 blocks.quick_push (graph[index]);
757 return blocks;
758 }
759
760 }
761
762 /* Context object for the condition coverage. This stores conds_ctx (the
763 buffers reused when analyzing the cfg) and the output arrays. This is
764 designed to be heap allocated and aggressively preallocates large buffers to
765 avoid having to reallocate for most programs. */
766 struct condcov
767 {
768 explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks),
769 m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks))
770 {
771 bitmap_vector_clear (m_maps, 2 * nblocks);
772 }
773 auto_vec<size_t, 128> m_index;
774 auto_vec<basic_block, 256> m_blocks;
775 auto_vec<uint64_t, 512> m_masks;
776 conds_ctx ctx;
777 sbitmap *m_maps;
778 };
779
780 /* Get the length, that is the number of Boolean expression found. cov_length
781 is the one-past index for cov_{blocks,masks,maps}. */
782 size_t
783 cov_length (const struct condcov* cov)
784 {
785 if (cov->m_index.is_empty ())
786 return 0;
787 return cov->m_index.length () - 1;
788 }
789
790 /* The subgraph, exluding intermediates, for the nth Boolean expression. */
791 array_slice<basic_block>
792 cov_blocks (struct condcov* cov, size_t n)
793 {
794 if (n >= cov->m_index.length ())
795 return array_slice<basic_block>::invalid ();
796
797 basic_block *begin = cov->m_blocks.begin () + cov->m_index[n];
798 basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1];
799 return array_slice<basic_block> (begin, end - begin);
800 }
801
802 /* The masks for the nth Boolean expression. */
803 array_slice<uint64_t>
804 cov_masks (struct condcov* cov, size_t n)
805 {
806 if (n >= cov->m_index.length ())
807 return array_slice<uint64_t>::invalid ();
808
809 uint64_t *begin = cov->m_masks.begin () + 2*cov->m_index[n];
810 uint64_t *end = cov->m_masks.begin () + 2*cov->m_index[n + 1];
811 return array_slice<uint64_t> (begin, end - begin);
812 }
813
814 /* The maps for the nth Boolean expression. */
815 array_slice<sbitmap>
816 cov_maps (struct condcov* cov, size_t n)
817 {
818 if (n >= cov->m_index.length ())
819 return array_slice<sbitmap>::invalid ();
820
821 sbitmap *begin = cov->m_maps + 2*n;
822 sbitmap *end = begin + 2;
823 return array_slice<sbitmap> (begin, end - begin);
824 }
825
826 /* Deleter for condcov. */
827 void
828 cov_free (struct condcov* cov)
829 {
830 sbitmap_vector_free (cov->m_maps);
831 delete cov;
832 }
833
834 /* Condition coverage (MC/DC)
835
836 Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for
837 MC/DC" describe an algorithm for modified condition/decision coverage based
838 on AST analysis. This algorithm does analyzes the control flow graph
839 (interpreted as a binary decision diagram) to determine the masking vectors.
840 The individual phases are described in more detail closer to the
841 implementation.
842
843 The coverage only considers the positions, not the symbols, in a
844 conditional, e.g. !A || (!B && A) is a 3-term conditional even though A
845 appears twice. Subexpressions have no effect on term ordering:
846 (a && (b || (c && d)) || e) comes out as [a b c d e]. Functions whose
847 arguments are Boolean expressions are treated as separate expressions, that
848 is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d].
849
850 The output for gcov is a vector of pairs of unsigned integers, interpreted
851 as bit-sets, where the bit index corresponds to the index of the condition
852 in the expression.
853
854 The returned condcov should be released by the caller with cov_free. */
855 struct condcov*
856 find_conditions (struct function *fn)
857 {
858 mark_dfs_back_edges (fn);
859 const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS);
860 const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS);
861 if (!have_dom)
862 calculate_dominance_info (CDI_DOMINATORS);
863 if (!have_post_dom)
864 calculate_dominance_info (CDI_POST_DOMINATORS);
865
866 const unsigned nblocks = n_basic_blocks_for_fn (fn);
867 basic_block *fnblocksp = basic_block_info_for_fn (fn)->address ();
868 condcov *cov = new condcov (nblocks);
869 conds_ctx& ctx = cov->ctx;
870 array_slice<basic_block> fnblocks (fnblocksp, nblocks);
871 make_top_index (fnblocks, ctx.B1, ctx.top_index);
872
873 /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...]. */
874 hash_map<int_hash<unsigned, 0>, vec<basic_block>> exprs;
875 for (basic_block b : fnblocks)
876 {
877 const unsigned uid = condition_uid (fn, b);
878 if (uid == 0)
879 continue;
880 exprs.get_or_insert (uid).safe_push (b);
881 }
882
883 /* Visit all reachable nodes and collect conditions. Topological order is
884 important so the first node of a boolean expression is visited first
885 (it will mark subsequent terms). */
886 cov->m_index.safe_push (0);
887 for (auto expr : exprs)
888 {
889 vec<basic_block>& conds = expr.second;
890 if (conds.length () > CONDITIONS_MAX_TERMS)
891 {
892 location_t loc = gimple_location (gsi_stmt (gsi_last_bb (conds[0])));
893 warning_at (loc, OPT_Wcoverage_too_many_conditions,
894 "Too many conditions (found %u); giving up coverage",
895 conds.length ());
896 continue;
897 }
898 conds.sort (topological_cmp, &ctx.top_index);
899 vec<basic_block>& subgraph = paths_between (ctx, fnblocks, conds);
900 subgraph.sort (topological_cmp, &ctx.top_index);
901 const unsigned index = cov->m_index.length () - 1;
902 sbitmap condm = cov->m_maps[0 + 2*index];
903 sbitmap subgm = cov->m_maps[1 + 2*index];
904 for (basic_block b : conds)
905 bitmap_set_bit (condm, b->index);
906 for (basic_block b : subgraph)
907 bitmap_set_bit (subgm, b->index);
908 cov->m_blocks.safe_splice (subgraph);
909 cov->m_index.safe_push (cov->m_blocks.length ());
910 }
911
912 if (!have_dom)
913 free_dominance_info (fn, CDI_DOMINATORS);
914 if (!have_post_dom)
915 free_dominance_info (fn, CDI_POST_DOMINATORS);
916
917 cov->m_masks.safe_grow_cleared (2 * cov->m_index.last ());
918 const size_t length = cov_length (cov);
919 for (size_t i = 0; i != length; i++)
920 masking_vectors (ctx, cov_blocks (cov, i), cov_maps (cov, i),
921 cov_masks (cov, i));
922
923 return cov;
924 }
925
926 namespace
927 {
928
929 /* Stores the incoming edge and previous counters (in SSA form) on that edge
930 for the node e->deston that edge for the node e->dest. The counters record
931 the seen-true (0), seen-false (1), and current-mask (2). They are stored in
932 an array rather than proper members for access-by-index as the code paths
933 tend to be identical for the different counters. */
934 struct counters
935 {
936 edge e;
937 tree counter[3];
938 tree& operator [] (size_t i) { return counter[i]; }
939 };
940
941 /* Find the counters for the incoming edge e, or NULL if the edge has not been
942 recorded (could be for complex incoming edges). */
943 counters*
944 find_counters (vec<counters>& candidates, edge e)
945 {
946 for (counters& candidate : candidates)
947 if (candidate.e == e)
948 return &candidate;
949 return NULL;
950 }
951
952 /* Resolve the SSA for a specific counter KIND. If it is not modified by any
953 incoming edges, simply forward it, otherwise create a phi node of all the
954 candidate counters and return it. */
955 tree
956 resolve_counter (vec<counters>& cands, size_t kind)
957 {
958 gcc_assert (!cands.is_empty ());
959 gcc_assert (kind < 3);
960
961 counters& fst = cands[0];
962
963 if (!fst.e || fst.e->dest->preds->length () == 1)
964 {
965 gcc_assert (cands.length () == 1);
966 return fst[kind];
967 }
968
969 tree zero0 = build_int_cst (gcov_type_node, 0);
970 tree ssa = make_ssa_name (gcov_type_node);
971 gphi *phi = create_phi_node (ssa, fst.e->dest);
972 for (edge e : fst.e->dest->preds)
973 {
974 counters *prev = find_counters (cands, e);
975 if (prev)
976 add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION);
977 else
978 {
979 tree zero = make_ssa_name (gcov_type_node);
980 gimple_stmt_iterator gsi = gsi_after_labels (e->src);
981 gassign *set = gimple_build_assign (zero, zero0);
982 gsi_insert_before (&gsi, set, GSI_NEW_STMT);
983 add_phi_arg (phi, zero, e, UNKNOWN_LOCATION);
984 }
985 }
986 return ssa;
987 }
988
989 /* Resolve all the counters for a node. Note that the edge is undefined, as
990 the counters are intended to form the base to push to the successors, and
991 because the is only meaningful for nodes with a single predecessor. */
992 counters
993 resolve_counters (vec<counters>& cands)
994 {
995 counters next;
996 next[0] = resolve_counter (cands, 0);
997 next[1] = resolve_counter (cands, 1);
998 next[2] = resolve_counter (cands, 2);
999 return next;
1000 }
1001
1002 }
1003
1004 /* Add instrumentation to a decision subgraph. EXPR should be the
1005 (topologically sorted) block of nodes returned by cov_blocks, MAPS the
1006 bitmaps returned by cov_maps, and MASKS the block of bitsets returned by
1007 cov_masks. CONDNO should be the index of this condition in the function,
1008 i.e. the same argument given to cov_{masks,graphs}. EXPR may contain nodes
1009 in-between the conditions, e.g. when an operand contains a function call,
1010 or there is a setjmp and the cfg is filled with complex edges.
1011
1012 Every node is annotated with three counters; the true, false, and mask
1013 value. First, walk the graph and determine what if there are multiple
1014 possible values for either accumulator depending on the path taken, in which
1015 case a phi node is created and registered as the accumulator. Then, those
1016 values are pushed as accumulators to the immediate successors. For some
1017 very particular programs there may be multiple paths into the expression
1018 (e.g. when prior terms are determined by a surrounding conditional) in which
1019 case the default zero-counter is pushed, otherwise all predecessors will
1020 have been considered before the successor because of topologically ordered
1021 traversal. Finally, expr is traversed again to look for edges to the
1022 outcomes, that is, edges with a destination outside of expr, and the local
1023 accumulators are flushed to the global gcov counters on these edges. In
1024 some cases there are edge splits that cause 3+ edges to the two outcome
1025 nodes.
1026
1027 If a complex edge is taken (e.g. on a longjmp) the accumulators are
1028 attempted poisoned so that there would be no change to the global counters,
1029 but this has proven unreliable in the presence of undefined behavior, see
1030 the setjmp003 test.
1031
1032 It is important that the flushes happen on the basic condition outgoing
1033 edge, otherwise flushes could be lost to exception handling or other
1034 abnormal control flow. */
1035 size_t
1036 instrument_decisions (array_slice<basic_block> expr, size_t condno,
1037 array_slice<sbitmap> maps, array_slice<uint64_t> masks)
1038 {
1039 tree zero = build_int_cst (gcov_type_node, 0);
1040 tree poison = build_int_cst (gcov_type_node, ~0ULL);
1041 const sbitmap core = maps[0];
1042 const sbitmap allg = maps[1];
1043
1044 hash_map<basic_block, vec<counters>> table;
1045 counters zerocounter;
1046 zerocounter.e = NULL;
1047 zerocounter[0] = zero;
1048 zerocounter[1] = zero;
1049 zerocounter[2] = zero;
1050
1051 unsigned xi = 0;
1052 tree rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1053 for (basic_block current : expr)
1054 {
1055 vec<counters>& candidates = table.get_or_insert (current);
1056 if (candidates.is_empty ())
1057 candidates.safe_push (zerocounter);
1058 counters prev = resolve_counters (candidates);
1059
1060 int increment = 0;
1061 for (edge e : current->succs)
1062 {
1063 counters next = prev;
1064 next.e = e;
1065
1066 if (bitmap_bit_p (core, e->src->index) && (e->flags & EDGE_CONDITION))
1067 {
1068 const int k = condition_index (e->flags);
1069 next[k] = emit_bitwise_op (e, prev[k], BIT_IOR_EXPR, rhs);
1070 if (masks[2*xi + k])
1071 {
1072 tree m = build_int_cst (gcov_type_node, masks[2*xi + k]);
1073 next[2] = emit_bitwise_op (e, prev[2], BIT_IOR_EXPR, m);
1074 }
1075 increment = 1;
1076 }
1077 else if (e->flags & EDGE_COMPLEX)
1078 {
1079 /* A complex edge has been taken - wipe the accumulators and
1080 poison the mask so that this path does not contribute to
1081 coverage. */
1082 next[0] = poison;
1083 next[1] = poison;
1084 next[2] = poison;
1085 }
1086 table.get_or_insert (e->dest).safe_push (next);
1087 }
1088 xi += increment;
1089 if (increment)
1090 rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1091 }
1092
1093 gcc_assert (xi == bitmap_count_bits (core));
1094
1095 const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1096 const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC;
1097 const tree atomic_ior = builtin_decl_explicit
1098 (TYPE_PRECISION (gcov_type_node) > 32
1099 ? BUILT_IN_ATOMIC_FETCH_OR_8
1100 : BUILT_IN_ATOMIC_FETCH_OR_4);
1101
1102 /* Flush to the gcov accumulators. */
1103 for (const basic_block b : expr)
1104 {
1105 if (!bitmap_bit_p (core, b->index))
1106 continue;
1107
1108 for (edge e : b->succs)
1109 {
1110 /* Flush the accumulators on leaving the Boolean function. The
1111 destination may be inside the function only when it returns to
1112 the loop header, such as do { ... } while (x); */
1113 if (bitmap_bit_p (allg, e->dest->index)) {
1114 if (!(e->flags & EDGE_DFS_BACK))
1115 continue;
1116 if (e->dest != expr[0])
1117 continue;
1118 }
1119
1120 vec<counters> *cands = table.get (e->dest);
1121 gcc_assert (cands);
1122 counters *prevp = find_counters (*cands, e);
1123 gcc_assert (prevp);
1124 counters prev = *prevp;
1125
1126 /* _true &= ~mask, _false &= ~mask */
1127 counters next;
1128 next[2] = emit_bitwise_op (e, prev[2], BIT_NOT_EXPR);
1129 next[0] = emit_bitwise_op (e, prev[0], BIT_AND_EXPR, next[2]);
1130 next[1] = emit_bitwise_op (e, prev[1], BIT_AND_EXPR, next[2]);
1131
1132 /* _global_true |= _true, _global_false |= _false */
1133 for (size_t k = 0; k != 2; ++k)
1134 {
1135 tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS,
1136 2*condno + k);
1137 if (atomic)
1138 {
1139 ref = unshare_expr (ref);
1140 gcall *flush = gimple_build_call (atomic_ior, 3,
1141 build_addr (ref),
1142 next[k], relaxed);
1143 gsi_insert_on_edge (e, flush);
1144 }
1145 else
1146 {
1147 tree get = emit_assign (e, ref);
1148 tree put = emit_bitwise_op (e, next[k], BIT_IOR_EXPR, get);
1149 emit_assign (e, unshare_expr (ref), put);
1150 }
1151 }
1152 }
1153 }
1154
1155 return xi;
1156 }
1157
1158 #undef CONDITIONS_MAX_TERMS
1159 #undef EDGE_CONDITION
1160
1161 /* Do initialization work for the edge profiler. */
1162
1163 /* Add code:
1164 __thread gcov* __gcov_indirect_call.counters; // pointer to actual counter
1165 __thread void* __gcov_indirect_call.callee; // actual callee address
1166 __thread int __gcov_function_counter; // time profiler function counter
1167 */
1168 static void
1169 init_ic_make_global_vars (void)
1170 {
1171 tree gcov_type_ptr;
1172
1173 gcov_type_ptr = build_pointer_type (get_gcov_type ());
1174
1175 tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
1176
1177 /* callee */
1178 ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
1179 ptr_type_node);
1180
1181 /* counters */
1182 ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
1183 NULL_TREE, gcov_type_ptr);
1184 DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field;
1185
1186 finish_builtin_struct (tuple_type, "indirect_call_tuple",
1187 ic_tuple_counters_field, NULL_TREE);
1188
1189 ic_tuple_var
1190 = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1191 get_identifier ("__gcov_indirect_call"), tuple_type);
1192 TREE_PUBLIC (ic_tuple_var) = 1;
1193 DECL_ARTIFICIAL (ic_tuple_var) = 1;
1194 DECL_INITIAL (ic_tuple_var) = NULL;
1195 DECL_EXTERNAL (ic_tuple_var) = 1;
1196 if (targetm.have_tls)
1197 set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var));
1198 }
1199
1200 /* Create the type and function decls for the interface with gcov. */
1201
1202 void
1203 gimple_init_gcov_profiler (void)
1204 {
1205 tree interval_profiler_fn_type;
1206 tree pow2_profiler_fn_type;
1207 tree topn_values_profiler_fn_type;
1208 tree gcov_type_ptr;
1209 tree ic_profiler_fn_type;
1210 tree average_profiler_fn_type;
1211 const char *fn_name;
1212
1213 if (!gcov_type_node)
1214 {
1215 const char *fn_suffix
1216 = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "";
1217
1218 gcov_type_node = get_gcov_type ();
1219 gcov_type_ptr = build_pointer_type (gcov_type_node);
1220
1221 /* void (*) (gcov_type *, gcov_type, int, unsigned) */
1222 interval_profiler_fn_type
1223 = build_function_type_list (void_type_node,
1224 gcov_type_ptr, gcov_type_node,
1225 integer_type_node,
1226 unsigned_type_node, NULL_TREE);
1227 fn_name = concat ("__gcov_interval_profiler", fn_suffix, NULL);
1228 tree_interval_profiler_fn = build_fn_decl (fn_name,
1229 interval_profiler_fn_type);
1230 free (CONST_CAST (char *, fn_name));
1231 TREE_NOTHROW (tree_interval_profiler_fn) = 1;
1232 DECL_ATTRIBUTES (tree_interval_profiler_fn)
1233 = tree_cons (get_identifier ("leaf"), NULL,
1234 DECL_ATTRIBUTES (tree_interval_profiler_fn));
1235
1236 /* void (*) (gcov_type *, gcov_type) */
1237 pow2_profiler_fn_type
1238 = build_function_type_list (void_type_node,
1239 gcov_type_ptr, gcov_type_node,
1240 NULL_TREE);
1241 fn_name = concat ("__gcov_pow2_profiler", fn_suffix, NULL);
1242 tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type);
1243 free (CONST_CAST (char *, fn_name));
1244 TREE_NOTHROW (tree_pow2_profiler_fn) = 1;
1245 DECL_ATTRIBUTES (tree_pow2_profiler_fn)
1246 = tree_cons (get_identifier ("leaf"), NULL,
1247 DECL_ATTRIBUTES (tree_pow2_profiler_fn));
1248
1249 /* void (*) (gcov_type *, gcov_type) */
1250 topn_values_profiler_fn_type
1251 = build_function_type_list (void_type_node,
1252 gcov_type_ptr, gcov_type_node,
1253 NULL_TREE);
1254 fn_name = concat ("__gcov_topn_values_profiler", fn_suffix, NULL);
1255 tree_topn_values_profiler_fn
1256 = build_fn_decl (fn_name, topn_values_profiler_fn_type);
1257 free (CONST_CAST (char *, fn_name));
1258
1259 TREE_NOTHROW (tree_topn_values_profiler_fn) = 1;
1260 DECL_ATTRIBUTES (tree_topn_values_profiler_fn)
1261 = tree_cons (get_identifier ("leaf"), NULL,
1262 DECL_ATTRIBUTES (tree_topn_values_profiler_fn));
1263
1264 init_ic_make_global_vars ();
1265
1266 /* void (*) (gcov_type, void *) */
1267 ic_profiler_fn_type
1268 = build_function_type_list (void_type_node,
1269 gcov_type_node,
1270 ptr_type_node,
1271 NULL_TREE);
1272 fn_name = concat ("__gcov_indirect_call_profiler_v4", fn_suffix, NULL);
1273 tree_indirect_call_profiler_fn
1274 = build_fn_decl (fn_name, ic_profiler_fn_type);
1275 free (CONST_CAST (char *, fn_name));
1276
1277 TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
1278 DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
1279 = tree_cons (get_identifier ("leaf"), NULL,
1280 DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
1281
1282 tree_time_profiler_counter
1283 = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1284 get_identifier ("__gcov_time_profiler_counter"),
1285 get_gcov_type ());
1286 TREE_PUBLIC (tree_time_profiler_counter) = 1;
1287 DECL_EXTERNAL (tree_time_profiler_counter) = 1;
1288 TREE_STATIC (tree_time_profiler_counter) = 1;
1289 DECL_ARTIFICIAL (tree_time_profiler_counter) = 1;
1290 DECL_INITIAL (tree_time_profiler_counter) = NULL;
1291
1292 /* void (*) (gcov_type *, gcov_type) */
1293 average_profiler_fn_type
1294 = build_function_type_list (void_type_node,
1295 gcov_type_ptr, gcov_type_node, NULL_TREE);
1296 fn_name = concat ("__gcov_average_profiler", fn_suffix, NULL);
1297 tree_average_profiler_fn = build_fn_decl (fn_name,
1298 average_profiler_fn_type);
1299 free (CONST_CAST (char *, fn_name));
1300 TREE_NOTHROW (tree_average_profiler_fn) = 1;
1301 DECL_ATTRIBUTES (tree_average_profiler_fn)
1302 = tree_cons (get_identifier ("leaf"), NULL,
1303 DECL_ATTRIBUTES (tree_average_profiler_fn));
1304 fn_name = concat ("__gcov_ior_profiler", fn_suffix, NULL);
1305 tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type);
1306 free (CONST_CAST (char *, fn_name));
1307 TREE_NOTHROW (tree_ior_profiler_fn) = 1;
1308 DECL_ATTRIBUTES (tree_ior_profiler_fn)
1309 = tree_cons (get_identifier ("leaf"), NULL,
1310 DECL_ATTRIBUTES (tree_ior_profiler_fn));
1311
1312 /* LTO streamer needs assembler names. Because we create these decls
1313 late, we need to initialize them by hand. */
1314 DECL_ASSEMBLER_NAME (tree_interval_profiler_fn);
1315 DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
1316 DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn);
1317 DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
1318 DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
1319 DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
1320 }
1321 }
1322
1323 /* If RESULT is not null, then output instructions as GIMPLE trees to assign
1324 the updated counter from CALL of FUNC to RESULT. Insert the CALL and the
1325 optional assignment instructions to GSI. Use NAME for temporary values. */
1326
1327 static inline void
1328 gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func,
1329 tree result, const char *name)
1330 {
1331 if (result)
1332 {
1333 tree result_type = TREE_TYPE (TREE_TYPE (func));
1334 tree tmp1 = make_temp_ssa_name (result_type, NULL, name);
1335 gimple_set_lhs (call, tmp1);
1336 gsi_insert_after (gsi, call, GSI_NEW_STMT);
1337 tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name);
1338 gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1);
1339 gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1340 assign = gimple_build_assign (result, tmp2);
1341 gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1342 }
1343 else
1344 gsi_insert_after (gsi, call, GSI_NEW_STMT);
1345 }
1346
1347 /* Output instructions as GIMPLE trees to increment the COUNTER. If RESULT is
1348 not null, then assign the updated counter value to RESULT. Insert the
1349 instructions to GSI. Use NAME for temporary values. */
1350
1351 static inline void
1352 gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result,
1353 const char *name)
1354 {
1355 tree type = gcov_type_node;
1356 tree addr = build_fold_addr_expr (counter);
1357 tree one = build_int_cst (type, 1);
1358 tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1359
1360 if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN
1361 || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT))
1362 {
1363 /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */
1364 tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32
1365 ? BUILT_IN_ATOMIC_ADD_FETCH_8
1366 : BUILT_IN_ATOMIC_ADD_FETCH_4);
1367 gcall *call = gimple_build_call (f, 3, addr, one, relaxed);
1368 gen_assign_counter_update (gsi, call, f, result, name);
1369 }
1370 else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT
1371 || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL))
1372 {
1373 /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED);
1374 high_inc = low == 0 ? 1 : 0;
1375 __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */
1376 tree zero32 = build_zero_cst (uint32_type_node);
1377 tree one32 = build_one_cst (uint32_type_node);
1378 tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name);
1379 tree four = build_int_cst (size_type_node, 4);
1380 gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR,
1381 addr, four);
1382 gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1383 if (WORDS_BIG_ENDIAN)
1384 std::swap (addr, addr_high);
1385 tree f = builtin_decl_explicit (BUILT_IN_ATOMIC_ADD_FETCH_4);
1386 gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed);
1387 tree low = make_temp_ssa_name (uint32_type_node, NULL, name);
1388 gimple_call_set_lhs (call1, low);
1389 gsi_insert_after (gsi, call1, GSI_NEW_STMT);
1390 tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name);
1391 gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low,
1392 zero32);
1393 gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1394 tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name);
1395 gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR,
1396 is_zero, one32, zero32);
1397 gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1398 gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc,
1399 relaxed);
1400 gsi_insert_after (gsi, call2, GSI_NEW_STMT);
1401 }
1402 else
1403 {
1404 tree tmp1 = make_temp_ssa_name (type, NULL, name);
1405 gassign *assign1 = gimple_build_assign (tmp1, counter);
1406 gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1407 tree tmp2 = make_temp_ssa_name (type, NULL, name);
1408 gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one);
1409 gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1410 gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2);
1411 gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1412 if (result)
1413 {
1414 gassign *assign4 = gimple_build_assign (result, tmp2);
1415 gsi_insert_after (gsi, assign4, GSI_NEW_STMT);
1416 }
1417 }
1418 }
1419
1420 /* Output instructions as GIMPLE trees to increment the edge
1421 execution count, and insert them on E. */
1422
1423 void
1424 gimple_gen_edge_profiler (int edgeno, edge e)
1425 {
1426 gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e));
1427 tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1428 gen_counter_update (&gsi, counter, NULL_TREE, "PROF_edge_counter");
1429 }
1430
1431 /* Emits code to get VALUE to instrument at GSI, and returns the
1432 variable containing the value. */
1433
1434 static tree
1435 prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value)
1436 {
1437 tree val = value->hvalue.value;
1438 if (POINTER_TYPE_P (TREE_TYPE (val)))
1439 val = fold_convert (build_nonstandard_integer_type
1440 (TYPE_PRECISION (TREE_TYPE (val)), 1), val);
1441 return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val),
1442 true, NULL_TREE, true, GSI_SAME_STMT);
1443 }
1444
1445 /* Output instructions as GIMPLE trees to increment the interval histogram
1446 counter. VALUE is the expression whose value is profiled. TAG is the
1447 tag of the section for counters, BASE is offset of the counter position. */
1448
1449 void
1450 gimple_gen_interval_profiler (histogram_value value, unsigned tag)
1451 {
1452 gimple *stmt = value->hvalue.stmt;
1453 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1454 tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr;
1455 gcall *call;
1456 tree val;
1457 tree start = build_int_cst_type (integer_type_node,
1458 value->hdata.intvl.int_start);
1459 tree steps = build_int_cst_type (unsigned_type_node,
1460 value->hdata.intvl.steps);
1461
1462 ref_ptr = force_gimple_operand_gsi (&gsi,
1463 build_addr (ref),
1464 true, NULL_TREE, true, GSI_SAME_STMT);
1465 val = prepare_instrumented_value (&gsi, value);
1466 call = gimple_build_call (tree_interval_profiler_fn, 4,
1467 ref_ptr, val, start, steps);
1468 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1469 }
1470
1471 /* Output instructions as GIMPLE trees to increment the power of two histogram
1472 counter. VALUE is the expression whose value is profiled. TAG is the tag
1473 of the section for counters. */
1474
1475 void
1476 gimple_gen_pow2_profiler (histogram_value value, unsigned tag)
1477 {
1478 gimple *stmt = value->hvalue.stmt;
1479 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1480 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1481 gcall *call;
1482 tree val;
1483
1484 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1485 true, NULL_TREE, true, GSI_SAME_STMT);
1486 val = prepare_instrumented_value (&gsi, value);
1487 call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val);
1488 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1489 }
1490
1491 /* Output instructions as GIMPLE trees for code to find the most N common
1492 values. VALUE is the expression whose value is profiled. TAG is the tag
1493 of the section for counters. */
1494
1495 void
1496 gimple_gen_topn_values_profiler (histogram_value value, unsigned tag)
1497 {
1498 gimple *stmt = value->hvalue.stmt;
1499 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1500 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1501 gcall *call;
1502 tree val;
1503
1504 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1505 true, NULL_TREE, true, GSI_SAME_STMT);
1506 val = prepare_instrumented_value (&gsi, value);
1507 call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val);
1508 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1509 }
1510
1511
1512 /* Output instructions as GIMPLE trees for code to find the most
1513 common called function in indirect call.
1514 VALUE is the call expression whose indirect callee is profiled.
1515 TAG is the tag of the section for counters. */
1516
1517 void
1518 gimple_gen_ic_profiler (histogram_value value, unsigned tag)
1519 {
1520 tree tmp1;
1521 gassign *stmt1, *stmt2, *stmt3;
1522 gimple *stmt = value->hvalue.stmt;
1523 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1524 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1525
1526 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1527 true, NULL_TREE, true, GSI_SAME_STMT);
1528
1529 /* Insert code:
1530
1531 stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr ();
1532 stmt2: tmp1 = (void *) (indirect call argument value)
1533 stmt3: __gcov_indirect_call.callee = tmp1;
1534
1535 Example:
1536 f_1 = foo;
1537 __gcov_indirect_call.counters = &__gcov4.main[0];
1538 PROF_fn_9 = f_1;
1539 __gcov_indirect_call.callee = PROF_fn_9;
1540 _4 = f_1 ();
1541 */
1542
1543 tree gcov_type_ptr = build_pointer_type (get_gcov_type ());
1544
1545 tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr,
1546 ic_tuple_var, ic_tuple_counters_field, NULL_TREE);
1547
1548 stmt1 = gimple_build_assign (counter_ref, ref_ptr);
1549 tmp1 = make_temp_ssa_name (ptr_type_node, NULL, "PROF_fn");
1550 stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
1551 tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1552 ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1553 stmt3 = gimple_build_assign (callee_ref, tmp1);
1554
1555 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1556 gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1557 gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT);
1558 }
1559
1560
1561 /* Output instructions as GIMPLE trees for code to find the most
1562 common called function in indirect call. Insert instructions at the
1563 beginning of every possible called function.
1564 */
1565
1566 void
1567 gimple_gen_ic_func_profiler (void)
1568 {
1569 struct cgraph_node * c_node = cgraph_node::get (current_function_decl);
1570 gcall *stmt1;
1571 tree tree_uid, cur_func, void0;
1572
1573 /* Disable indirect call profiling for an IFUNC resolver and its
1574 callees since it requires TLS which hasn't been set up yet when
1575 the dynamic linker is resolving IFUNC symbols. See
1576 https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
1577 */
1578 if (c_node->only_called_directly_p ()
1579 || c_node->called_by_ifunc_resolver)
1580 return;
1581
1582 gimple_init_gcov_profiler ();
1583
1584 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1585 basic_block cond_bb = split_edge (single_succ_edge (entry));
1586 basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1587
1588 /* We need to do an extra split in order to not create an input
1589 for a possible PHI node. */
1590 split_edge (single_succ_edge (update_bb));
1591
1592 edge true_edge = single_succ_edge (cond_bb);
1593 true_edge->flags = EDGE_TRUE_VALUE;
1594
1595 profile_probability probability;
1596 if (DECL_VIRTUAL_P (current_function_decl))
1597 probability = profile_probability::very_likely ();
1598 else
1599 probability = profile_probability::unlikely ();
1600
1601 true_edge->probability = probability;
1602 edge e = make_edge (cond_bb, single_succ_edge (update_bb)->dest,
1603 EDGE_FALSE_VALUE);
1604 e->probability = true_edge->probability.invert ();
1605
1606 /* Insert code:
1607
1608 if (__gcov_indirect_call.callee != NULL)
1609 __gcov_indirect_call_profiler_v3 (profile_id, &current_function_decl);
1610
1611 The function __gcov_indirect_call_profiler_v3 is responsible for
1612 resetting __gcov_indirect_call.callee to NULL. */
1613
1614 gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1615 void0 = build_int_cst (ptr_type_node, 0);
1616
1617 tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1618 ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1619
1620 tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE,
1621 true, GSI_SAME_STMT);
1622
1623 gcond *cond = gimple_build_cond (NE_EXPR, ref,
1624 void0, NULL, NULL);
1625 gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1626
1627 gsi = gsi_after_labels (update_bb);
1628
1629 cur_func = force_gimple_operand_gsi (&gsi,
1630 build_addr (current_function_decl),
1631 true, NULL_TREE,
1632 true, GSI_SAME_STMT);
1633 tree_uid = build_int_cst
1634 (gcov_type_node,
1635 cgraph_node::get (current_function_decl)->profile_id);
1636 stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
1637 tree_uid, cur_func);
1638 gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1639 }
1640
1641 /* Output instructions as GIMPLE tree at the beginning for each function.
1642 TAG is the tag of the section for counters, BASE is offset of the
1643 counter position and GSI is the iterator we place the counter. */
1644
1645 void
1646 gimple_gen_time_profiler (unsigned tag)
1647 {
1648 tree type = get_gcov_type ();
1649 basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1650 basic_block cond_bb = split_edge (single_succ_edge (entry));
1651 basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1652
1653 /* We need to do an extra split in order to not create an input
1654 for a possible PHI node. */
1655 split_edge (single_succ_edge (update_bb));
1656
1657 edge true_edge = single_succ_edge (cond_bb);
1658 true_edge->flags = EDGE_TRUE_VALUE;
1659 true_edge->probability = profile_probability::unlikely ();
1660 edge e
1661 = make_edge (cond_bb, single_succ_edge (update_bb)->dest, EDGE_FALSE_VALUE);
1662 e->probability = true_edge->probability.invert ();
1663
1664 gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1665 tree original_ref = tree_coverage_counter_ref (tag, 0);
1666 tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE,
1667 true, GSI_SAME_STMT);
1668
1669 /* Emit: if (counters[0] != 0). */
1670 gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0),
1671 NULL, NULL);
1672 gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1673
1674 /* Emit: counters[0] = ++__gcov_time_profiler_counter. */
1675 gsi = gsi_start_bb (update_bb);
1676 gen_counter_update (&gsi, tree_time_profiler_counter, original_ref,
1677 "PROF_time_profile");
1678 }
1679
1680 /* Output instructions as GIMPLE trees to increment the average histogram
1681 counter. VALUE is the expression whose value is profiled. TAG is the
1682 tag of the section for counters, BASE is offset of the counter position. */
1683
1684 void
1685 gimple_gen_average_profiler (histogram_value value, unsigned tag)
1686 {
1687 gimple *stmt = value->hvalue.stmt;
1688 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1689 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1690 gcall *call;
1691 tree val;
1692
1693 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1694 true, NULL_TREE,
1695 true, GSI_SAME_STMT);
1696 val = prepare_instrumented_value (&gsi, value);
1697 call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val);
1698 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1699 }
1700
1701 /* Output instructions as GIMPLE trees to increment the ior histogram
1702 counter. VALUE is the expression whose value is profiled. TAG is the
1703 tag of the section for counters, BASE is offset of the counter position. */
1704
1705 void
1706 gimple_gen_ior_profiler (histogram_value value, unsigned tag)
1707 {
1708 gimple *stmt = value->hvalue.stmt;
1709 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1710 tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1711 gcall *call;
1712 tree val;
1713
1714 ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1715 true, NULL_TREE, true, GSI_SAME_STMT);
1716 val = prepare_instrumented_value (&gsi, value);
1717 call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val);
1718 gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1719 }
1720
1721 static vec<regex_t> profile_filter_files;
1722 static vec<regex_t> profile_exclude_files;
1723
1724 /* Parse list of provided REGEX (separated with semi-collon) and
1725 create expressions (of type regex_t) and save them into V vector.
1726 If there is a regular expression parsing error, error message is
1727 printed for FLAG_NAME. */
1728
1729 static void
1730 parse_profile_filter (const char *regex, vec<regex_t> *v,
1731 const char *flag_name)
1732 {
1733 v->create (4);
1734 if (regex != NULL)
1735 {
1736 char *str = xstrdup (regex);
1737 for (char *p = strtok (str, ";"); p != NULL; p = strtok (NULL, ";"))
1738 {
1739 regex_t r;
1740 if (regcomp (&r, p, REG_EXTENDED | REG_NOSUB) != 0)
1741 {
1742 error ("invalid regular expression %qs in %qs",
1743 p, flag_name);
1744 return;
1745 }
1746
1747 v->safe_push (r);
1748 }
1749 }
1750 }
1751
1752 /* Parse values of -fprofile-filter-files and -fprofile-exclude-files
1753 options. */
1754
1755 static void
1756 parse_profile_file_filtering ()
1757 {
1758 parse_profile_filter (flag_profile_filter_files, &profile_filter_files,
1759 "-fprofile-filter-files");
1760 parse_profile_filter (flag_profile_exclude_files, &profile_exclude_files,
1761 "-fprofile-exclude-files");
1762 }
1763
1764 /* Parse vectors of regular expressions. */
1765
1766 static void
1767 release_profile_file_filtering ()
1768 {
1769 profile_filter_files.release ();
1770 profile_exclude_files.release ();
1771 }
1772
1773 /* Return true when FILENAME should be instrumented based on
1774 -fprofile-filter-files and -fprofile-exclude-files options. */
1775
1776 static bool
1777 include_source_file_for_profile (const char *filename)
1778 {
1779 /* First check whether file is included in flag_profile_exclude_files. */
1780 for (unsigned i = 0; i < profile_exclude_files.length (); i++)
1781 if (regexec (&profile_exclude_files[i],
1782 filename, 0, NULL, 0) == REG_NOERROR)
1783 return false;
1784
1785 /* For non-empty flag_profile_filter_files include only files matching a
1786 regex in the flag. */
1787 if (profile_filter_files.is_empty ())
1788 return true;
1789
1790 for (unsigned i = 0; i < profile_filter_files.length (); i++)
1791 if (regexec (&profile_filter_files[i], filename, 0, NULL, 0) == REG_NOERROR)
1792 return true;
1793
1794 return false;
1795 }
1796
1797 #ifndef HAVE_sync_compare_and_swapsi
1798 #define HAVE_sync_compare_and_swapsi 0
1799 #endif
1800 #ifndef HAVE_atomic_compare_and_swapsi
1801 #define HAVE_atomic_compare_and_swapsi 0
1802 #endif
1803
1804 #ifndef HAVE_sync_compare_and_swapdi
1805 #define HAVE_sync_compare_and_swapdi 0
1806 #endif
1807 #ifndef HAVE_atomic_compare_and_swapdi
1808 #define HAVE_atomic_compare_and_swapdi 0
1809 #endif
1810
1811 /* Profile all functions in the callgraph. */
1812
1813 static unsigned int
1814 tree_profiling (void)
1815 {
1816 struct cgraph_node *node;
1817
1818 /* Verify whether we can utilize atomic update operations. */
1819 bool can_support_atomic = targetm.have_libatomic;
1820 unsigned HOST_WIDE_INT gcov_type_size
1821 = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ()));
1822 bool have_atomic_4
1823 = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi;
1824 bool have_atomic_8
1825 = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi;
1826 bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4;
1827 if (!can_support_atomic)
1828 {
1829 if (gcov_type_size == 4)
1830 can_support_atomic = have_atomic_4;
1831 else if (gcov_type_size == 8)
1832 can_support_atomic = have_atomic_8;
1833 }
1834
1835 if (flag_profile_update != PROFILE_UPDATE_SINGLE && needs_split)
1836 counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1837
1838 if (flag_profile_update == PROFILE_UPDATE_ATOMIC
1839 && !can_support_atomic)
1840 {
1841 warning (0, "target does not support atomic profile update, "
1842 "single mode is selected");
1843 flag_profile_update = PROFILE_UPDATE_SINGLE;
1844 }
1845 else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC)
1846 flag_profile_update
1847 = can_support_atomic ? PROFILE_UPDATE_ATOMIC : PROFILE_UPDATE_SINGLE;
1848
1849 if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
1850 {
1851 if (needs_split)
1852 counter_update = COUNTER_UPDATE_ATOMIC_SPLIT;
1853 else
1854 counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN;
1855 }
1856
1857 /* This is a small-ipa pass that gets called only once, from
1858 cgraphunit.cc:ipa_passes(). */
1859 gcc_assert (symtab->state == IPA_SSA);
1860
1861 init_node_map (true);
1862 parse_profile_file_filtering ();
1863
1864 FOR_EACH_DEFINED_FUNCTION (node)
1865 {
1866 bool thunk = false;
1867 if (!gimple_has_body_p (node->decl) && !node->thunk)
1868 continue;
1869
1870 /* Don't profile functions produced for builtin stuff. */
1871 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1872 continue;
1873
1874 if (lookup_attribute ("no_profile_instrument_function",
1875 DECL_ATTRIBUTES (node->decl)))
1876 continue;
1877 /* Do not instrument extern inline functions when testing coverage.
1878 While this is not perfectly consistent (early inlined extern inlines
1879 will get acocunted), testsuite expects that. */
1880 if (DECL_EXTERNAL (node->decl)
1881 && flag_test_coverage)
1882 continue;
1883
1884 const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl));
1885 if (!include_source_file_for_profile (file))
1886 continue;
1887
1888 if (node->thunk)
1889 {
1890 /* We cannot expand variadic thunks to Gimple. */
1891 if (stdarg_p (TREE_TYPE (node->decl)))
1892 continue;
1893 thunk = true;
1894 /* When generate profile, expand thunk to gimple so it can be
1895 instrumented same way as other functions. */
1896 if (profile_arc_flag || condition_coverage_flag)
1897 expand_thunk (node, false, true);
1898 /* Read cgraph profile but keep function as thunk at profile-use
1899 time. */
1900 else
1901 {
1902 read_thunk_profile (node);
1903 continue;
1904 }
1905 }
1906
1907 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1908
1909 if (dump_file)
1910 dump_function_header (dump_file, cfun->decl, dump_flags);
1911
1912 /* Local pure-const may imply need to fixup the cfg. */
1913 if (gimple_has_body_p (node->decl)
1914 && (execute_fixup_cfg () & TODO_cleanup_cfg))
1915 cleanup_tree_cfg ();
1916
1917 branch_prob (thunk);
1918
1919 if (! flag_branch_probabilities
1920 && flag_profile_values)
1921 gimple_gen_ic_func_profiler ();
1922
1923 if (flag_branch_probabilities
1924 && !thunk
1925 && flag_profile_values
1926 && flag_value_profile_transformations
1927 && profile_status_for_fn (cfun) == PROFILE_READ)
1928 gimple_value_profile_transformations ();
1929
1930 /* The above could hose dominator info. Currently there is
1931 none coming in, this is a safety valve. It should be
1932 easy to adjust it, if and when there is some. */
1933 free_dominance_info (CDI_DOMINATORS);
1934 free_dominance_info (CDI_POST_DOMINATORS);
1935 pop_cfun ();
1936 }
1937
1938 release_profile_file_filtering ();
1939
1940 /* Drop pure/const flags from instrumented functions. */
1941 if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1942 FOR_EACH_DEFINED_FUNCTION (node)
1943 {
1944 if (!gimple_has_body_p (node->decl)
1945 || !(!node->clone_of
1946 || node->decl != node->clone_of->decl))
1947 continue;
1948
1949 /* Don't profile functions produced for builtin stuff. */
1950 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1951 continue;
1952
1953 node->set_const_flag (false, false);
1954 node->set_pure_flag (false, false);
1955 }
1956
1957 /* Update call statements and rebuild the cgraph. */
1958 FOR_EACH_DEFINED_FUNCTION (node)
1959 {
1960 basic_block bb;
1961
1962 if (!gimple_has_body_p (node->decl)
1963 || !(!node->clone_of
1964 || node->decl != node->clone_of->decl))
1965 continue;
1966
1967 /* Don't profile functions produced for builtin stuff. */
1968 if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1969 continue;
1970
1971 push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1972
1973 if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1974 FOR_EACH_BB_FN (bb, cfun)
1975 {
1976 gimple_stmt_iterator gsi;
1977 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1978 {
1979 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
1980 if (!call || gimple_call_internal_p (call))
1981 continue;
1982
1983 /* We do not clear pure/const on decls without body. */
1984 tree fndecl = gimple_call_fndecl (call);
1985 cgraph_node *callee;
1986 if (fndecl
1987 && (callee = cgraph_node::get (fndecl))
1988 && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
1989 continue;
1990
1991 /* Drop the const attribute from the call type (the pure
1992 attribute is not available on types). */
1993 tree fntype = gimple_call_fntype (call);
1994 if (fntype && TYPE_READONLY (fntype))
1995 {
1996 int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
1997 fntype = build_qualified_type (fntype, quals);
1998 gimple_call_set_fntype (call, fntype);
1999 }
2000
2001 /* Update virtual operands of calls to no longer const/pure
2002 functions. */
2003 update_stmt (call);
2004 }
2005 }
2006
2007 /* re-merge split blocks. */
2008 cleanup_tree_cfg ();
2009 update_ssa (TODO_update_ssa);
2010
2011 cgraph_edge::rebuild_edges ();
2012
2013 pop_cfun ();
2014 }
2015
2016 handle_missing_profiles ();
2017
2018 del_node_map ();
2019 return 0;
2020 }
2021
2022 namespace {
2023
2024 const pass_data pass_data_ipa_tree_profile =
2025 {
2026 SIMPLE_IPA_PASS, /* type */
2027 "profile", /* name */
2028 OPTGROUP_NONE, /* optinfo_flags */
2029 TV_IPA_PROFILE, /* tv_id */
2030 0, /* properties_required */
2031 0, /* properties_provided */
2032 0, /* properties_destroyed */
2033 0, /* todo_flags_start */
2034 TODO_dump_symtab, /* todo_flags_finish */
2035 };
2036
2037 class pass_ipa_tree_profile : public simple_ipa_opt_pass
2038 {
2039 public:
2040 pass_ipa_tree_profile (gcc::context *ctxt)
2041 : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt)
2042 {}
2043
2044 /* opt_pass methods: */
2045 bool gate (function *) final override;
2046 unsigned int execute (function *) final override { return tree_profiling (); }
2047
2048 }; // class pass_ipa_tree_profile
2049
2050 bool
2051 pass_ipa_tree_profile::gate (function *)
2052 {
2053 /* When profile instrumentation, use or test coverage shall be performed.
2054 But for AutoFDO, this there is no instrumentation, thus this pass is
2055 disabled. */
2056 return (!in_lto_p && !flag_auto_profile
2057 && (flag_branch_probabilities || flag_test_coverage
2058 || profile_arc_flag || condition_coverage_flag));
2059 }
2060
2061 } // anon namespace
2062
2063 simple_ipa_opt_pass *
2064 make_pass_ipa_tree_profile (gcc::context *ctxt)
2065 {
2066 return new pass_ipa_tree_profile (ctxt);
2067 }
2068
2069 #include "gt-tree-profile.h"