gcc/tree-profile.cc

   1 /* Calculate branch probabilities, and basic block execution counts.
   2    Copyright (C) 1990-2024 Free Software Foundation, Inc.
   3    Contributed by James E. Wilson, UC Berkeley/Cygnus Support;
   4    based on some ideas from Dain Samples of UC Berkeley.
   5    Further mangling by Bob Manson, Cygnus Support.
   6    Converted to use trees by Dale Johannesen, Apple Computer.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24 /* Generate basic block profile instrumentation and auxiliary files.
  25    Tree-based version.  See profile.cc for overview.  */
  26
  27 #include "config.h"
  28 #include "system.h"
  29 #include "coretypes.h"
  30 #include "memmodel.h"
  31 #include "backend.h"
  32 #include "target.h"
  33 #include "tree.h"
  34 #include "gimple.h"
  35 #include "cfghooks.h"
  36 #include "tree-pass.h"
  37 #include "ssa.h"
  38 #include "cgraph.h"
  39 #include "coverage.h"
  40 #include "diagnostic-core.h"
  41 #include "fold-const.h"
  42 #include "varasm.h"
  43 #include "tree-nested.h"
  44 #include "gimplify.h"
  45 #include "gimple-iterator.h"
  46 #include "gimplify-me.h"
  47 #include "tree-cfg.h"
  48 #include "tree-into-ssa.h"
  49 #include "value-prof.h"
  50 #include "profile.h"
  51 #include "tree-cfgcleanup.h"
  52 #include "stringpool.h"
  53 #include "attribs.h"
  54 #include "tree-pretty-print.h"
  55 #include "langhooks.h"
  56 #include "stor-layout.h"
  57 #include "xregex.h"
  58 #include "alloc-pool.h"
  59 #include "symbol-summary.h"
  60 #include "symtab-thunks.h"
  61 #include "cfganal.h"
  62
  63 static GTY(()) tree gcov_type_node;
  64 static GTY(()) tree tree_interval_profiler_fn;
  65 static GTY(()) tree tree_pow2_profiler_fn;
  66 static GTY(()) tree tree_topn_values_profiler_fn;
  67 static GTY(()) tree tree_indirect_call_profiler_fn;
  68 static GTY(()) tree tree_average_profiler_fn;
  69 static GTY(()) tree tree_ior_profiler_fn;
  70 static GTY(()) tree tree_time_profiler_counter;
  71
  72
  73 static GTY(()) tree ic_tuple_var;
  74 static GTY(()) tree ic_tuple_counters_field;
  75 static GTY(()) tree ic_tuple_callee_field;
  76
  77 /* Types of counter update methods.
  78
  79    By default, the counter updates are done for a single threaded system
  80    (COUNTER_UPDATE_SINGLE_THREAD).
  81
  82    If the user selected atomic profile counter updates
  83    (-fprofile-update=atomic), then the counter updates will be done atomically
  84    on a best-effort basis.  One of three methods to do the counter updates is
  85    selected according to the target capabilities.
  86
  87    Ideally, the counter updates are done through atomic operations in hardware
  88    (COUNTER_UPDATE_ATOMIC_BUILTIN).
  89
  90    If the target supports only 32-bit atomic increments and gcov_type_node is a
  91    64-bit integer type, then for the profile edge counters the increment is
  92    performed through two separate 32-bit atomic increments
  93    (COUNTER_UPDATE_ATOMIC_SPLIT or COUNTER_UPDATE_ATOMIC_PARTIAL).  If the
  94    target supports libatomic (targetm.have_libatomic), then other counter
  95    updates are carried out by libatomic calls (COUNTER_UPDATE_ATOMIC_SPLIT).
  96    If the target does not support libatomic, then the other counter updates are
  97    not done atomically (COUNTER_UPDATE_ATOMIC_PARTIAL) and a warning is
  98    issued.
  99
 100    If the target does not support atomic operations in hardware, however,  it
 101    supports libatomic, then all updates are carried out by libatomic calls
 102    (COUNTER_UPDATE_ATOMIC_BUILTIN).  */
 103 enum counter_update_method {
 104   COUNTER_UPDATE_SINGLE_THREAD,
 105   COUNTER_UPDATE_ATOMIC_BUILTIN,
 106   COUNTER_UPDATE_ATOMIC_SPLIT,
 107   COUNTER_UPDATE_ATOMIC_PARTIAL
 108 };
 109
 110 static counter_update_method counter_update = COUNTER_UPDATE_SINGLE_THREAD;
 111
 112 /* These functions support measuring modified conditition/decision coverage
 113    (MC/DC).  MC/DC requires all of the below during testing:
 114
 115    - Each entry and exit point is invoked
 116    - Each decision takes every possible outcome
 117    - Each condition in a decision takes every possible outcome
 118    - Each condition in a decision is shown to independently affect the outcome
 119      of the decision
 120
 121    Independence of a condition is shown by recording it being evaluated to a
 122    value (true/false) and not being made irrelevant ("masked") by a later term.
 123    This feature adds some instrumentation code, a few bitwise operators, that
 124    records the branches taken in conditions and applies a filter for the
 125    masking effect.  Masking is essentially short-circuiting in reverse: a
 126    condition does not contribute to the outcome if it would short circuit the
 127    (sub) expression if it was evaluated right-to-left, (_ && false) and (_ ||
 128    true).
 129
 130    The program is essentially rewritten this way:
 131
 132    - if (a || b) { fn () }
 133    + if (a) { _t |= 0x1; goto _then; }
 134    + else   { _f |= 0x1;
 135    +    if (b) { _t |= 0x2; _mask |= 0x1; goto _then; }
 136    +    else   { _f |= 0x2; goto _else; }
 137    + _then:
 138    + _gcov_t |= (_t & _mask);
 139    + _gcov_f |= (_f & _mask);
 140    + fn (); goto _end;
 141    + _else:
 142    + _gcov_t |= (_t & _mask);
 143    + _gcov_f |= (_f & _mask);
 144    + fn ();
 145    + _end:
 146
 147    It is assumed the front end will provide discrimnators so that conditional
 148    basic blocks (basic block with a conditional jump and outgoing true/false
 149    edges) that belong to the same Boolean expression have the same
 150    discriminator.  Masking is determined by analyzing these expressions as a
 151    reduced order binary decision diagram.  */
 152 namespace
 153 {
 154 /* Some context and reused instances between function calls.  Large embedded
 155    buffers are used to up-front request enough memory for most programs and
 156    merge them into a single allocation at the cost of using more memory in the
 157    average case.  Some numbers from linux v5.13 which is assumed to be a
 158    reasonably diverse code base: 75% of the functions in linux have less than
 159    16 nodes in the CFG and approx 2.5% have more than 64 nodes.  The functions
 160    that go beyond a few dozen nodes tend to be very large (>100) and so 64
 161    seems like a good balance.
 162
 163    This is really just a performance balance of the cost of allocation and
 164    wasted memory.  */
 165 struct conds_ctx
 166 {
 167     /* This is both a reusable shared allocation which is also used to return
 168        single expressions, which means it for most code should only hold a
 169        couple of elements.  */
 170     auto_vec<basic_block, 64> blocks;
 171
 172     /* Index for the topological order indexed by basic_block->index to an
 173        ordering so that expression (a || b && c) => top_index[a] < top_index[b]
 174        < top_index[c].  */
 175     auto_vec<int, 256> top_index;
 176
 177     /* Pre-allocate bitmaps and vectors for per-function book keeping.  This is
 178        pure instance reuse and the bitmaps carry no data between function
 179        calls.  */
 180     auto_vec<basic_block, 64> B1;
 181     auto_vec<basic_block, 64> B2;
 182     auto_sbitmap G1;
 183     auto_sbitmap G2;
 184     auto_sbitmap G3;
 185
 186     explicit conds_ctx (unsigned size) noexcept (true) : G1 (size), G2 (size),
 187     G3 (size)
 188     {
 189     }
 190 };
 191
 192 /* Only instrument terms with fewer than number of bits in a (wide) gcov
 193    integer, which is probably 64.  The algorithm itself does not impose this
 194    limitation, but it makes for a simpler implementation.
 195
 196    * Allocating the output data structure (coverage_counter_alloc ()) can
 197      assume pairs of gcov_type_unsigned and not use a separate length field.
 198    * A pair gcov_type_unsigned can be used as accumulators.
 199    * Updating accumulators is can use the bitwise operations |=, &= and not
 200      custom operators that work for arbitrary-sized bit-sets.
 201
 202    Most real-world code should be unaffected by this, but it is possible
 203    (especially for generated code) to exceed this limit.  */
 204 #define CONDITIONS_MAX_TERMS (TYPE_PRECISION (gcov_type_node))
 205 #define EDGE_CONDITION (EDGE_TRUE_VALUE | EDGE_FALSE_VALUE)
 206
 207 /* Compare two basic blocks by their order in the expression i.e. for (a || b)
 208    then topological_cmp (a, b, ...) < 0.  The result is undefined if LHS, RHS
 209    belong to different expressions.  The TOP_INDEX argument should be the
 210    top_index vector from ctx.  */
 211 int
 212 topological_cmp (const void *lhs, const void *rhs, void *top_index)
 213 {
 214     const_basic_block l = *(const basic_block*) lhs;
 215     const_basic_block r = *(const basic_block*) rhs;
 216     const vec<int>* im = (const vec<int>*) top_index;
 217     return (*im)[l->index] - (*im)[r->index];
 218 }
 219
 220 /* Find the index of NEEDLE in BLOCKS; return -1 if not found.  This has two
 221    uses, sometimes for the index and sometimes for set member checks.  Sets are
 222    typically very small (number of conditions, >8 is uncommon) so linear search
 223    should be very fast.  */
 224 int
 225 index_of (const basic_block needle, array_slice<basic_block> blocks)
 226 {
 227     for (size_t i = 0; i < blocks.size (); i++)
 228         if (blocks[i] == needle)
 229             return int (i);
 230     return -1;
 231 }
 232
 233 /* Special cases of the single_*_p and single_*_edge functions in basic-block.h
 234    that don't consider exception handling or other complex edges.  This helps
 235    create a view of the CFG with only normal edges - if a basic block has both
 236    an outgoing fallthrough and exceptional edge, it should be considered a
 237    single-successor.  */
 238 bool
 239 single_p (const vec<edge, va_gc> *edges)
 240 {
 241     int n = EDGE_COUNT (edges);
 242     if (n == 0)
 243         return false;
 244
 245     for (edge e : edges)
 246         if (e->flags & EDGE_COMPLEX)
 247             n -= 1;
 248
 249     return n == 1;
 250 }
 251
 252 /* Get the single, non-complex edge.  Behavior is undefined edges have more
 253    than 1 non-complex edges.  */
 254 edge
 255 single_edge (const vec<edge, va_gc> *edges)
 256 {
 257     gcc_checking_assert (single_p (edges));
 258     for (edge e : edges)
 259     {
 260         if (e->flags & EDGE_COMPLEX)
 261             continue;
 262         return e;
 263     }
 264     return NULL;
 265 }
 266
 267 /* Sometimes, for example with function calls, goto labels, and C++
 268    destructors, the CFG gets extra nodes that are essentially single-entry
 269    single-exit in the middle of boolean expressions.  For example:
 270
 271    x || can_throw (y)
 272
 273          A
 274         /|
 275        / |
 276       B  |
 277       |  |
 278       C  |
 279      / \ |
 280     /   \|
 281    F     T
 282
 283    Without the extra node inserted by the function + exception it becomes a
 284    proper 2-term graph, not 2 single-term graphs.
 285
 286        A
 287       /|
 288      C |
 289     / \|
 290    F   T
 291
 292    This function finds the source edge of these paths.  This is often the
 293    identity function.  */
 294 edge
 295 contract_edge_up (edge e)
 296 {
 297     while (true)
 298     {
 299         basic_block src = e->src;
 300         if (!single_p (src->preds))
 301             return e;
 302         if (!single_p (src->succs))
 303             return e;
 304         e = single_edge (src->preds);
 305     }
 306 }
 307
 308 /* A simple struct for storing/returning outcome block pairs.  Either both
 309    blocks are set or both are NULL.  */
 310 struct outcomes
 311 {
 312     basic_block t = NULL;
 313     basic_block f = NULL;
 314
 315     operator bool () const noexcept (true)
 316     {
 317         return t && f;
 318     }
 319 };
 320
 321 /* Get the true/false successors of a basic block.  If b is not a conditional
 322    block both edges are NULL.  */
 323 outcomes
 324 conditional_succs (const basic_block b)
 325 {
 326     outcomes c;
 327     for (edge e : b->succs)
 328     {
 329         if (e->flags & EDGE_TRUE_VALUE)
 330             c.t = e->dest;
 331         if (e->flags & EDGE_FALSE_VALUE)
 332             c.f = e->dest;
 333     }
 334
 335     gcc_assert ((c.t && c.f) || (!c.t && !c.f));
 336     return c;
 337 }
 338
 339 /* Get the index or offset of a conditional flag, 0 for true and 1 for false.
 340    These indices carry no semantics but must be consistent as they are used to
 341    index into data structures in code generation and gcov.  */
 342 unsigned
 343 condition_index (unsigned flag)
 344 {
 345     return (flag & EDGE_CONDITION) == EDGE_TRUE_VALUE ? 0 : 1;
 346 }
 347
 348 /* Returns the condition identifier for the basic block if set, otherwise 0.
 349    This is only meaningful in GIMPLE and is used for condition coverage.
 350
 351    There may be conditions created that did not get an uid, such as those
 352    implicitly created by destructors.  We could include them in the condition
 353    coverage for completeness (i.e. condition coverage implies (implicit) branch
 354    coverage), but they have no natural buckets and should all be single-term.
 355    For now these are ignored and given uid = 0, and branch coverage is left to
 356    -fprofile-arcs.
 357
 358    Under optimization, COND_EXPRs may be folded, replaced with switches,
 359    min-max, etc., which leaves ghost identifiers in basic blocks that do not
 360    end with a conditional jump.  They are not really meaningful for condition
 361    coverage anymore, but since coverage is unreliable under optimization anyway
 362    this is not a big problem.  */
 363 unsigned
 364 condition_uid (struct function *fn, basic_block b)
 365 {
 366     gimple *stmt = gsi_stmt (gsi_last_bb (b));
 367     if (!safe_is_a<gcond *> (stmt))
 368         return 0;
 369
 370     unsigned *v = fn->cond_uids->get (as_a <gcond*> (stmt));
 371     return v ? *v : 0;
 372 }
 373
 374 /* Compute the masking table.
 375
 376    Masking and short circuiting are deeply connected - masking occurs when
 377    control flow reaches a state that is also reachable with short circuiting.
 378    In fact, masking corresponds to short circuiting for the reversed
 379    expression.  This means we can find the limits, the last term in preceeding
 380    subexpressions, by following the edges that short circuit to the same
 381    outcome.  The algorithm treats the CFG as a reduced order binary decision
 382    diagram (see Randall E. Bryant's Graph Based Algorithms for Boolean
 383    Function Manipulation (1987)).
 384
 385    In the simplest case a || b:
 386
 387    a
 388    |\
 389    | b
 390    |/ \
 391    T   F
 392
 393    T has has multiple incoming edges and is the outcome of a short circuit,
 394    with top = a, bot = b.  The top node (a) is masked when the edge (b, T) is
 395    taken.
 396
 397    The names "top" and "bot" refer to a pair of nodes with a shared
 398    successor.  The top is always the node corresponding to the left-most
 399    operand of the two, and it holds that top < bot in a topological ordering.
 400
 401    Now consider (a && b) || (c && d) and its masking table:
 402
 403    a
 404    |\
 405    b \
 406    |\|
 407    | c
 408    | |\
 409    | d \
 410    |/ \|
 411    T   F
 412
 413    a[0] = {}
 414    a[1] = {}
 415    b[0] = {a}
 416    b[1] = {}
 417    c[0] = {}
 418    c[1] = {}
 419    d[0] = {c}
 420    d[1] = {a,b}
 421
 422    Note that 0 and 1 are indices and not boolean values - a[0] is the index in
 423    the masking vector when a takes the true edge.
 424
 425    b[0] and d[0] are identical to the a || b example, and d[1] is the bot in
 426    the triangle [d, b] -> T.  b is the top node in the [d, b] relationship and
 427    last term in (a && b).  To find the other terms masked we use the fact that
 428    all paths in an expression go through either of the outcomes, found by
 429    collecting all non-complex edges that go out of the expression (the
 430    neighborhood).  In some cases the outgoing edge go through intermediate (or
 431    bypass) nodes, and we collect these paths too (see contract_edge_up).
 432
 433    We find the terms by marking the outcomes (in this case c, T) and walk the
 434    predecessors starting at top (in this case b) and masking nodes when both
 435    successors are marked.
 436
 437    The masking table is represented as two bitfields per term in the expression
 438    with the index corresponding to the term in the Boolean expression.
 439    a || b && c becomes the term vector [a b c] and the masking table [a[0]
 440    a[1] b[0] ...].  The kth bit of a masking vector is set if the the kth term
 441    is masked by taking the edge.
 442
 443    The out masks are in uint64_t (the practical maximum for gcov_type_node for
 444    any target) as it has to be big enough to store the target size gcov types
 445    independent of the host.  */
 446 void
 447 masking_vectors (conds_ctx& ctx, array_slice<basic_block> blocks,
 448                  array_slice<sbitmap> maps, array_slice<uint64_t> masks)
 449 {
 450     gcc_assert (blocks.is_valid ());
 451     gcc_assert (!blocks.empty ());
 452     gcc_assert (maps.is_valid ());
 453     gcc_assert (masks.is_valid ());
 454     gcc_assert (sizeof (masks[0]) * BITS_PER_UNIT >= CONDITIONS_MAX_TERMS);
 455
 456     if (bitmap_count_bits (maps[0]) == 1)
 457         return;
 458
 459     sbitmap marks = ctx.G1;
 460     const sbitmap core = maps[0];
 461     const sbitmap allg = maps[1];
 462     vec<basic_block>& queue = ctx.B1;
 463     vec<basic_block>& body = ctx.B2;
 464     const vec<int>& top_index = ctx.top_index;
 465
 466     /* Set up for the iteration - include the outcome nodes in the traversal.
 467        The algorithm compares pairs of nodes and is not really sensitive to
 468        traversal order, but need to maintain topological order because the
 469        index of masking nodes maps to the index in the accumulators.  We must
 470        also check the incoming-to-outcome pairs.  These edges may in turn be
 471        split (this happens with labels on top of then/else blocks) so we must
 472        follow any single-in single-out path.  The non-condition blocks do not
 473        have to be in order as they are non-condition blocks and will not be
 474        considered for the set-bit index.  */
 475     body.truncate (0);
 476     body.reserve (blocks.size () + 2);
 477     for (const basic_block b : blocks)
 478         if (bitmap_bit_p (core, b->index))
 479             body.quick_push (b);
 480
 481     for (basic_block b : blocks)
 482     {
 483         if (!bitmap_bit_p (core, b->index))
 484             continue;
 485
 486         for (edge e : b->succs)
 487         {
 488             if (e->flags & EDGE_COMPLEX)
 489                 continue;
 490             if (bitmap_bit_p (allg, e->dest->index))
 491                 continue;
 492             body.safe_push (e->dest);
 493
 494             /* There may be multiple nodes between the condition edge and the
 495                actual outcome, and we need to know when these paths join to
 496                determine if there is short circuit/masking.  This is
 497                effectively creating a virtual edge from the condition node to
 498                the real outcome.  */
 499             while (!(e->flags & EDGE_DFS_BACK) && single_p (e->dest->succs))
 500             {
 501                 e = single_edge (e->dest->succs);
 502                 body.safe_push (e->dest);
 503             }
 504         }
 505     }
 506
 507     /* Find the masking.  The leftmost element cannot mask anything, so
 508        start at 1.  */
 509     for (size_t i = 1; i != body.length (); i++)
 510     {
 511         const basic_block b = body[i];
 512         for (edge e1 : b->preds)
 513         for (edge e2 : b->preds)
 514         {
 515             if (e1 == e2)
 516                 continue;
 517             if ((e1->flags | e2->flags) & EDGE_COMPLEX)
 518                 continue;
 519
 520             edge etop = contract_edge_up (e1);
 521             edge ebot = contract_edge_up (e2);
 522             gcc_assert (etop != ebot);
 523
 524             const basic_block top = etop->src;
 525             const basic_block bot = ebot->src;
 526             const unsigned cond = etop->flags & ebot->flags & EDGE_CONDITION;
 527             if (!cond)
 528                 continue;
 529             if (top_index[top->index] > top_index[bot->index])
 530                 continue;
 531             if (!bitmap_bit_p (core, top->index))
 532                 continue;
 533             if (!bitmap_bit_p (core, bot->index))
 534                 continue;
 535
 536             outcomes out = conditional_succs (top);
 537             gcc_assert (out);
 538             bitmap_clear (marks);
 539             bitmap_set_bit (marks, out.t->index);
 540             bitmap_set_bit (marks, out.f->index);
 541             queue.truncate (0);
 542             queue.safe_push (top);
 543
 544             // The edge bot -> outcome triggers the masking
 545             const int m = 2*index_of (bot, body) + condition_index (cond);
 546             gcc_assert (m >= 0);
 547             while (!queue.is_empty ())
 548             {
 549                 basic_block q = queue.pop ();
 550                 /* q may have been processed & completed by being added to the
 551                    queue multiple times, so check that there is still work to
 552                    do before continuing.  */
 553                 if (bitmap_bit_p (marks, q->index))
 554                     continue;
 555
 556                 outcomes succs = conditional_succs (q);
 557                 if (!bitmap_bit_p (marks, succs.t->index))
 558                     continue;
 559                 if (!bitmap_bit_p (marks, succs.f->index))
 560                     continue;
 561
 562                 const int index = index_of (q, body);
 563                 gcc_assert (index != -1);
 564                 masks[m] |= uint64_t (1) << index;
 565                 bitmap_set_bit (marks, q->index);
 566
 567                 for (edge e : q->preds)
 568                 {
 569                     e = contract_edge_up (e);
 570                     if (e->flags & EDGE_DFS_BACK)
 571                         continue;
 572                     if (bitmap_bit_p (marks, e->src->index))
 573                         continue;
 574                     if (!bitmap_bit_p (core, e->src->index))
 575                         continue;
 576                     queue.safe_push (e->src);
 577                 }
 578             }
 579         }
 580     }
 581 }
 582
 583 /* Emit LHS = RHS on edges.  This is just a short hand that automates the
 584    building of the assign and immediately puts it on the edge, which becomes
 585    noisy.  */
 586 tree
 587 emit_assign (edge e, tree lhs, tree rhs)
 588 {
 589     gassign *w = gimple_build_assign (lhs, rhs);
 590     gsi_insert_on_edge (e, w);
 591     return lhs;
 592 }
 593
 594 /* Emit lhs = RHS on edges.  The lhs is created.  */
 595 tree
 596 emit_assign (edge e, tree rhs)
 597 {
 598     return emit_assign (e, make_ssa_name (gcov_type_node), rhs);
 599 }
 600
 601 /* Emit LHS = OP1 <OP> OP2 on edges.  */
 602 tree
 603 emit_bitwise_op (edge e, tree op1, tree_code op, tree op2 = NULL_TREE)
 604 {
 605     tree lhs = make_ssa_name (gcov_type_node);
 606     gassign *w = gimple_build_assign (lhs, op, op1, op2);
 607     gsi_insert_on_edge (e, w);
 608     return lhs;
 609 }
 610
 611 /* Visitor for make_top_index.  */
 612 void
 613 make_top_index_visit (basic_block b, vec<basic_block>& L, vec<int>& marks)
 614 {
 615     if (marks[b->index])
 616         return;
 617
 618     /* Follow the false edge first, if it exists, so that true paths are given
 619        the lower index in the ordering.  Any iteration order
 620        would yield a valid and useful topological ordering, but making sure the
 621        true branch has the lower index first makes reporting work better for
 622        expressions with ternaries.  Walk the false branch first because the
 623        array will be reversed to finalize the topological order.
 624
 625        With the wrong ordering (a ? b : c) && d could become [a c b d], but the
 626        (expected) order is really [a b c d].  */
 627
 628     const unsigned false_fwd = EDGE_DFS_BACK | EDGE_FALSE_VALUE;
 629     for (edge e : b->succs)
 630         if ((e->flags & false_fwd) == EDGE_FALSE_VALUE)
 631             make_top_index_visit (e->dest, L, marks);
 632
 633     for (edge e : b->succs)
 634         if (!(e->flags & false_fwd))
 635             make_top_index_visit (e->dest, L, marks);
 636
 637     marks[b->index] = 1;
 638     L.quick_push (b);
 639 }
 640
 641 /* Find a topological sorting of the blocks in a function so that left operands
 642    are before right operands including subexpressions.  Sorting on block index
 643    does not guarantee this property and the syntactical order of terms is very
 644    important to the condition coverage.  The sorting algorithm is from Cormen
 645    et al (2001) but with back-edges ignored and thus there is no need for
 646    temporary marks (for cycle detection).  The L argument is a buffer/working
 647    memory, and the output will be written to TOP_INDEX.
 648
 649    For the expression (a || (b && c) || d) the blocks should be [a b c d].  */
 650 void
 651 make_top_index (array_slice<basic_block> blocks, vec<basic_block>& L,
 652                 vec<int>& top_index)
 653 {
 654     L.truncate (0);
 655     L.reserve (blocks.size ());
 656
 657     /* Use of the output map as a temporary for tracking visited status.  */
 658     top_index.truncate (0);
 659     top_index.safe_grow_cleared (blocks.size ());
 660     for (const basic_block b : blocks)
 661         make_top_index_visit (b, L, top_index);
 662
 663     /* Insert canaries - if there are unreachable nodes (for example infinite
 664        loops) then the unreachable nodes should never be needed for comparison,
 665        and L.length () < max_index.  An index mapping should also never be
 666        recorded twice.  */
 667     for (unsigned i = 0; i != top_index.length (); i++)
 668         top_index[i] = -1;
 669
 670     gcc_assert (blocks.size () == L.length ());
 671     L.reverse ();
 672     const unsigned nblocks = L.length ();
 673     for (unsigned i = 0; i != nblocks; i++)
 674     {
 675         gcc_assert (L[i]->index != -1);
 676         top_index[L[i]->index] = int (i);
 677     }
 678 }
 679
 680 /* Find all nodes including non-conditions in a Boolean expression.  We need to
 681    know the paths through the expression so that the masking and
 682    instrumentation phases can limit searches and know what subgraphs must be
 683    threaded through, but not counted, such as the (b || c) in
 684    a && fn (b || c) && d.
 685
 686    It is essentially the intersection of downwards paths from the expression
 687    nodes EXPR to the post-dominator and upwards from the post-dominator.
 688    Finding the dominator is slightly more involved than picking the first/last,
 689    particularly under optimization, because both incoming and outgoing paths
 690    may have multiple entries/exits.
 691
 692    It is assumed GRAPH is an array_slice of the basic blocks of this function
 693    sorted by the basic block index.  */
 694 vec<basic_block>&
 695 paths_between (conds_ctx &ctx, array_slice<basic_block> graph,
 696                const vec<basic_block>& expr)
 697 {
 698     if (expr.length () == 1)
 699     {
 700         ctx.blocks.truncate (0);
 701         ctx.blocks.safe_push (expr[0]);
 702         return ctx.blocks;
 703     }
 704
 705     basic_block dom;
 706     sbitmap up = ctx.G1;
 707     sbitmap down = ctx.G2;
 708     sbitmap paths = ctx.G3;
 709     vec<basic_block>& queue = ctx.B1;
 710
 711     queue.truncate (0);
 712     bitmap_clear (down);
 713     dom = get_immediate_dominator (CDI_POST_DOMINATORS, expr[0]);
 714     for (basic_block b : expr)
 715         if (dom != b)
 716             dom = nearest_common_dominator (CDI_POST_DOMINATORS, dom, b);
 717     queue.safe_splice (expr);
 718     while (!queue.is_empty ())
 719     {
 720         basic_block b = queue.pop ();
 721         if (!bitmap_set_bit (down, b->index))
 722             continue;
 723         if (b == dom)
 724             continue;
 725         for (edge e : b->succs)
 726             if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
 727                 queue.safe_push (e->dest);
 728     }
 729
 730     queue.truncate (0);
 731     bitmap_clear (up);
 732     dom = expr[0];
 733     for (basic_block b : expr)
 734         if (dom != b)
 735             dom = nearest_common_dominator (CDI_DOMINATORS, dom, b);
 736     queue.safe_splice (expr);
 737     while (!queue.is_empty ())
 738     {
 739         basic_block b = queue.pop ();
 740         if (!bitmap_set_bit (up, b->index))
 741             continue;
 742         if (b == dom)
 743             continue;
 744         for (edge e : b->preds)
 745             if (!(e->flags & (EDGE_COMPLEX | EDGE_DFS_BACK)))
 746                 queue.safe_push (e->src);
 747     }
 748
 749     bitmap_and (paths, up, down);
 750     vec<basic_block>& blocks = ctx.blocks;
 751     blocks.truncate (0);
 752     blocks.reserve (graph.size ());
 753     sbitmap_iterator itr;
 754     unsigned index;
 755     EXECUTE_IF_SET_IN_BITMAP (paths, 0, index, itr)
 756         blocks.quick_push (graph[index]);
 757     return blocks;
 758 }
 759
 760 }
 761
 762 /* Context object for the condition coverage.  This stores conds_ctx (the
 763    buffers reused when analyzing the cfg) and the output arrays.  This is
 764    designed to be heap allocated and aggressively preallocates large buffers to
 765    avoid having to reallocate for most programs.  */
 766 struct condcov
 767 {
 768     explicit condcov (unsigned nblocks) noexcept (true) : ctx (nblocks),
 769     m_maps (sbitmap_vector_alloc (2 * nblocks, nblocks))
 770     {
 771         bitmap_vector_clear (m_maps, 2 * nblocks);
 772     }
 773     auto_vec<size_t, 128> m_index;
 774     auto_vec<basic_block, 256> m_blocks;
 775     auto_vec<uint64_t, 512> m_masks;
 776     conds_ctx ctx;
 777     sbitmap *m_maps;
 778 };
 779
 780 /* Get the length, that is the number of Boolean expression found.  cov_length
 781    is the one-past index for cov_{blocks,masks,maps}.  */
 782 size_t
 783 cov_length (const struct condcov* cov)
 784 {
 785     if (cov->m_index.is_empty ())
 786         return 0;
 787     return cov->m_index.length () - 1;
 788 }
 789
 790 /* The subgraph, exluding intermediates, for the nth Boolean expression.  */
 791 array_slice<basic_block>
 792 cov_blocks (struct condcov* cov, size_t n)
 793 {
 794     if (n >= cov->m_index.length ())
 795         return array_slice<basic_block>::invalid ();
 796
 797     basic_block *begin = cov->m_blocks.begin () + cov->m_index[n];
 798     basic_block *end = cov->m_blocks.begin () + cov->m_index[n + 1];
 799     return array_slice<basic_block> (begin, end - begin);
 800 }
 801
 802 /* The masks for the nth Boolean expression.  */
 803 array_slice<uint64_t>
 804 cov_masks (struct condcov* cov, size_t n)
 805 {
 806     if (n >= cov->m_index.length ())
 807         return array_slice<uint64_t>::invalid ();
 808
 809     uint64_t *begin = cov->m_masks.begin () + 2*cov->m_index[n];
 810     uint64_t *end = cov->m_masks.begin () + 2*cov->m_index[n + 1];
 811     return array_slice<uint64_t> (begin, end - begin);
 812 }
 813
 814 /* The maps for the nth Boolean expression.  */
 815 array_slice<sbitmap>
 816 cov_maps (struct condcov* cov, size_t n)
 817 {
 818     if (n >= cov->m_index.length ())
 819         return array_slice<sbitmap>::invalid ();
 820
 821     sbitmap *begin = cov->m_maps + 2*n;
 822     sbitmap *end = begin + 2;
 823     return array_slice<sbitmap> (begin, end - begin);
 824 }
 825
 826 /* Deleter for condcov.  */
 827 void
 828 cov_free (struct condcov* cov)
 829 {
 830     sbitmap_vector_free (cov->m_maps);
 831     delete cov;
 832 }
 833
 834 /* Condition coverage (MC/DC)
 835
 836    Whalen, Heimdahl, De Silva in "Efficient Test Coverage Measurement for
 837    MC/DC" describe an algorithm for modified condition/decision coverage based
 838    on AST analysis.  This algorithm does analyzes the control flow graph
 839    (interpreted as a binary decision diagram) to determine the masking vectors.
 840    The individual phases are described in more detail closer to the
 841    implementation.
 842
 843    The coverage only considers the positions, not the symbols, in a
 844    conditional, e.g. !A || (!B && A) is a 3-term conditional even though A
 845    appears twice.  Subexpressions have no effect on term ordering:
 846    (a && (b || (c && d)) || e) comes out as [a b c d e].  Functions whose
 847    arguments are Boolean expressions are treated as separate expressions, that
 848    is, a && fn (b || c) && d is treated as [a _fn d] and [b c], not [a b c d].
 849
 850    The output for gcov is a vector of pairs of unsigned integers, interpreted
 851    as bit-sets, where the bit index corresponds to the index of the condition
 852    in the expression.
 853
 854    The returned condcov should be released by the caller with cov_free.  */
 855 struct condcov*
 856 find_conditions (struct function *fn)
 857 {
 858     mark_dfs_back_edges (fn);
 859     const bool have_dom = dom_info_available_p (fn, CDI_DOMINATORS);
 860     const bool have_post_dom = dom_info_available_p (fn, CDI_POST_DOMINATORS);
 861     if (!have_dom)
 862         calculate_dominance_info (CDI_DOMINATORS);
 863     if (!have_post_dom)
 864         calculate_dominance_info (CDI_POST_DOMINATORS);
 865
 866     const unsigned nblocks = n_basic_blocks_for_fn (fn);
 867     basic_block *fnblocksp = basic_block_info_for_fn (fn)->address ();
 868     condcov *cov = new condcov (nblocks);
 869     conds_ctx& ctx = cov->ctx;
 870     array_slice<basic_block> fnblocks (fnblocksp, nblocks);
 871     make_top_index (fnblocks, ctx.B1, ctx.top_index);
 872
 873     /* Bin the Boolean expressions so that exprs[id] -> [x1, x2, ...].  */
 874     hash_map<int_hash<unsigned, 0>, vec<basic_block>> exprs;
 875     for (basic_block b : fnblocks)
 876     {
 877         const unsigned uid = condition_uid (fn, b);
 878         if (uid == 0)
 879             continue;
 880         exprs.get_or_insert (uid).safe_push (b);
 881     }
 882
 883     /* Visit all reachable nodes and collect conditions.  Topological order is
 884        important so the first node of a boolean expression is visited first
 885        (it will mark subsequent terms).  */
 886     cov->m_index.safe_push (0);
 887     for (auto expr : exprs)
 888     {
 889         vec<basic_block>& conds = expr.second;
 890         if (conds.length () > CONDITIONS_MAX_TERMS)
 891         {
 892             location_t loc = gimple_location (gsi_stmt (gsi_last_bb (conds[0])));
 893             warning_at (loc, OPT_Wcoverage_too_many_conditions,
 894                         "Too many conditions (found %u); giving up coverage",
 895                         conds.length ());
 896             continue;
 897         }
 898         conds.sort (topological_cmp, &ctx.top_index);
 899         vec<basic_block>& subgraph = paths_between (ctx, fnblocks, conds);
 900         subgraph.sort (topological_cmp, &ctx.top_index);
 901         const unsigned index = cov->m_index.length () - 1;
 902         sbitmap condm = cov->m_maps[0 + 2*index];
 903         sbitmap subgm = cov->m_maps[1 + 2*index];
 904         for (basic_block b : conds)
 905             bitmap_set_bit (condm, b->index);
 906         for (basic_block b : subgraph)
 907             bitmap_set_bit (subgm, b->index);
 908         cov->m_blocks.safe_splice (subgraph);
 909         cov->m_index.safe_push (cov->m_blocks.length ());
 910     }
 911
 912     if (!have_dom)
 913         free_dominance_info (fn, CDI_DOMINATORS);
 914     if (!have_post_dom)
 915         free_dominance_info (fn, CDI_POST_DOMINATORS);
 916
 917     cov->m_masks.safe_grow_cleared (2 * cov->m_index.last ());
 918     const size_t length = cov_length (cov);
 919     for (size_t i = 0; i != length; i++)
 920         masking_vectors (ctx, cov_blocks (cov, i), cov_maps (cov, i),
 921                          cov_masks (cov, i));
 922
 923     return cov;
 924 }
 925
 926 namespace
 927 {
 928
 929 /* Stores the incoming edge and previous counters (in SSA form) on that edge
 930    for the node e->deston that edge for the node e->dest.  The counters record
 931    the seen-true (0), seen-false (1), and current-mask (2).  They are stored in
 932    an array rather than proper members for access-by-index as the code paths
 933    tend to be identical for the different counters.  */
 934 struct counters
 935 {
 936     edge e;
 937     tree counter[3];
 938     tree& operator [] (size_t i) { return counter[i]; }
 939 };
 940
 941 /* Find the counters for the incoming edge e, or NULL if the edge has not been
 942    recorded (could be for complex incoming edges).  */
 943 counters*
 944 find_counters (vec<counters>& candidates, edge e)
 945 {
 946     for (counters& candidate : candidates)
 947         if (candidate.e == e)
 948             return &candidate;
 949     return NULL;
 950 }
 951
 952 /* Resolve the SSA for a specific counter KIND.  If it is not modified by any
 953    incoming edges, simply forward it, otherwise create a phi node of all the
 954    candidate counters and return it.  */
 955 tree
 956 resolve_counter (vec<counters>& cands, size_t kind)
 957 {
 958     gcc_assert (!cands.is_empty ());
 959     gcc_assert (kind < 3);
 960
 961     counters& fst = cands[0];
 962
 963     if (!fst.e || fst.e->dest->preds->length () == 1)
 964     {
 965         gcc_assert (cands.length () == 1);
 966         return fst[kind];
 967     }
 968
 969     tree zero0 = build_int_cst (gcov_type_node, 0);
 970     tree ssa = make_ssa_name (gcov_type_node);
 971     gphi *phi = create_phi_node (ssa, fst.e->dest);
 972     for (edge e : fst.e->dest->preds)
 973     {
 974         counters *prev = find_counters (cands, e);
 975         if (prev)
 976             add_phi_arg (phi, (*prev)[kind], e, UNKNOWN_LOCATION);
 977         else
 978         {
 979             tree zero = make_ssa_name (gcov_type_node);
 980             gimple_stmt_iterator gsi = gsi_after_labels (e->src);
 981             gassign *set = gimple_build_assign (zero, zero0);
 982             gsi_insert_before (&gsi, set, GSI_NEW_STMT);
 983             add_phi_arg (phi, zero, e, UNKNOWN_LOCATION);
 984         }
 985     }
 986     return ssa;
 987 }
 988
 989 /* Resolve all the counters for a node.  Note that the edge is undefined, as
 990    the counters are intended to form the base to push to the successors, and
 991    because the is only meaningful for nodes with a single predecessor.  */
 992 counters
 993 resolve_counters (vec<counters>& cands)
 994 {
 995     counters next;
 996     next[0] = resolve_counter (cands, 0);
 997     next[1] = resolve_counter (cands, 1);
 998     next[2] = resolve_counter (cands, 2);
 999     return next;
1000 }
1001
1002 }
1003
1004 /* Add instrumentation to a decision subgraph.  EXPR should be the
1005    (topologically sorted) block of nodes returned by cov_blocks, MAPS the
1006    bitmaps returned by cov_maps, and MASKS the block of bitsets returned by
1007    cov_masks.  CONDNO should be the index of this condition in the function,
1008    i.e. the same argument given to cov_{masks,graphs}.  EXPR may contain nodes
1009    in-between the conditions, e.g.  when an operand contains a function call,
1010    or there is a setjmp and the cfg is filled with complex edges.
1011
1012    Every node is annotated with three counters; the true, false, and mask
1013    value.  First, walk the graph and determine what if there are multiple
1014    possible values for either accumulator depending on the path taken, in which
1015    case a phi node is created and registered as the accumulator.  Then, those
1016    values are pushed as accumulators to the immediate successors.  For some
1017    very particular programs there may be multiple paths into the expression
1018    (e.g. when prior terms are determined by a surrounding conditional) in which
1019    case the default zero-counter is pushed, otherwise all predecessors will
1020    have been considered before the successor because of topologically ordered
1021    traversal.  Finally, expr is traversed again to look for edges to the
1022    outcomes, that is, edges with a destination outside of expr, and the local
1023    accumulators are flushed to the global gcov counters on these edges.  In
1024    some cases there are edge splits that cause 3+ edges to the two outcome
1025    nodes.
1026
1027    If a complex edge is taken (e.g. on a longjmp) the accumulators are
1028    attempted poisoned so that there would be no change to the global counters,
1029    but this has proven unreliable in the presence of undefined behavior, see
1030    the setjmp003 test.
1031
1032    It is important that the flushes happen on the basic condition outgoing
1033    edge, otherwise flushes could be lost to exception handling or other
1034    abnormal control flow.  */
1035 size_t
1036 instrument_decisions (array_slice<basic_block> expr, size_t condno,
1037                       array_slice<sbitmap> maps, array_slice<uint64_t> masks)
1038 {
1039     tree zero = build_int_cst (gcov_type_node, 0);
1040     tree poison = build_int_cst (gcov_type_node, ~0ULL);
1041     const sbitmap core = maps[0];
1042     const sbitmap allg = maps[1];
1043
1044     hash_map<basic_block, vec<counters>> table;
1045     counters zerocounter;
1046     zerocounter.e = NULL;
1047     zerocounter[0] = zero;
1048     zerocounter[1] = zero;
1049     zerocounter[2] = zero;
1050
1051     unsigned xi = 0;
1052     tree rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1053     for (basic_block current : expr)
1054     {
1055         vec<counters>& candidates = table.get_or_insert (current);
1056         if (candidates.is_empty ())
1057             candidates.safe_push (zerocounter);
1058         counters prev = resolve_counters (candidates);
1059
1060         int increment = 0;
1061         for (edge e : current->succs)
1062         {
1063             counters next = prev;
1064             next.e = e;
1065
1066             if (bitmap_bit_p (core, e->src->index) && (e->flags & EDGE_CONDITION))
1067             {
1068                 const int k = condition_index (e->flags);
1069                 next[k] = emit_bitwise_op (e, prev[k], BIT_IOR_EXPR, rhs);
1070                 if (masks[2*xi + k])
1071                 {
1072                     tree m = build_int_cst (gcov_type_node, masks[2*xi + k]);
1073                     next[2] = emit_bitwise_op (e, prev[2], BIT_IOR_EXPR, m);
1074                 }
1075                 increment = 1;
1076             }
1077             else if (e->flags & EDGE_COMPLEX)
1078             {
1079                 /* A complex edge has been taken - wipe the accumulators and
1080                    poison the mask so that this path does not contribute to
1081                    coverage.  */
1082                 next[0] = poison;
1083                 next[1] = poison;
1084                 next[2] = poison;
1085             }
1086             table.get_or_insert (e->dest).safe_push (next);
1087         }
1088         xi += increment;
1089         if (increment)
1090             rhs = build_int_cst (gcov_type_node, 1ULL << xi);
1091     }
1092
1093     gcc_assert (xi == bitmap_count_bits (core));
1094
1095     const tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1096     const bool atomic = flag_profile_update == PROFILE_UPDATE_ATOMIC;
1097     const tree atomic_ior = builtin_decl_explicit
1098         (TYPE_PRECISION (gcov_type_node) > 32
1099          ? BUILT_IN_ATOMIC_FETCH_OR_8
1100          : BUILT_IN_ATOMIC_FETCH_OR_4);
1101
1102     /* Flush to the gcov accumulators.  */
1103     for (const basic_block b : expr)
1104     {
1105         if (!bitmap_bit_p (core, b->index))
1106             continue;
1107
1108         for (edge e : b->succs)
1109         {
1110             /* Flush the accumulators on leaving the Boolean function.  The
1111                destination may be inside the function only when it returns to
1112                the loop header, such as do { ... } while (x);  */
1113             if (bitmap_bit_p (allg, e->dest->index)) {
1114                 if (!(e->flags & EDGE_DFS_BACK))
1115                     continue;
1116                 if (e->dest != expr[0])
1117                     continue;
1118             }
1119
1120             vec<counters> *cands = table.get (e->dest);
1121             gcc_assert (cands);
1122             counters *prevp = find_counters (*cands, e);
1123             gcc_assert (prevp);
1124             counters prev = *prevp;
1125
1126             /* _true &= ~mask, _false &= ~mask  */
1127             counters next;
1128             next[2] = emit_bitwise_op (e, prev[2], BIT_NOT_EXPR);
1129             next[0] = emit_bitwise_op (e, prev[0], BIT_AND_EXPR, next[2]);
1130             next[1] = emit_bitwise_op (e, prev[1], BIT_AND_EXPR, next[2]);
1131
1132             /* _global_true |= _true, _global_false |= _false  */
1133             for (size_t k = 0; k != 2; ++k)
1134             {
1135                 tree ref = tree_coverage_counter_ref (GCOV_COUNTER_CONDS,
1136                                                       2*condno + k);
1137                 if (atomic)
1138                 {
1139                     ref = unshare_expr (ref);
1140                     gcall *flush = gimple_build_call (atomic_ior, 3,
1141                                                       build_addr (ref),
1142                                                       next[k], relaxed);
1143                     gsi_insert_on_edge (e, flush);
1144                 }
1145                 else
1146                 {
1147                     tree get = emit_assign (e, ref);
1148                     tree put = emit_bitwise_op (e, next[k], BIT_IOR_EXPR, get);
1149                     emit_assign (e, unshare_expr (ref), put);
1150                 }
1151             }
1152         }
1153     }
1154
1155     return xi;
1156 }
1157
1158 #undef CONDITIONS_MAX_TERMS
1159 #undef EDGE_CONDITION
1160
1161 /* Do initialization work for the edge profiler.  */
1162
1163 /* Add code:
1164    __thread gcov*       __gcov_indirect_call.counters; // pointer to actual counter
1165    __thread void*       __gcov_indirect_call.callee; // actual callee address
1166    __thread int __gcov_function_counter; // time profiler function counter
1167 */
1168 static void
1169 init_ic_make_global_vars (void)
1170 {
1171   tree gcov_type_ptr;
1172
1173   gcov_type_ptr = build_pointer_type (get_gcov_type ());
1174
1175   tree tuple_type = lang_hooks.types.make_type (RECORD_TYPE);
1176
1177   /* callee */
1178   ic_tuple_callee_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, NULL_TREE,
1179                                       ptr_type_node);
1180
1181   /* counters */
1182   ic_tuple_counters_field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
1183                                         NULL_TREE, gcov_type_ptr);
1184   DECL_CHAIN (ic_tuple_counters_field) = ic_tuple_callee_field;
1185
1186   finish_builtin_struct (tuple_type, "indirect_call_tuple",
1187                          ic_tuple_counters_field, NULL_TREE);
1188
1189   ic_tuple_var
1190     = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1191                   get_identifier ("__gcov_indirect_call"), tuple_type);
1192   TREE_PUBLIC (ic_tuple_var) = 1;
1193   DECL_ARTIFICIAL (ic_tuple_var) = 1;
1194   DECL_INITIAL (ic_tuple_var) = NULL;
1195   DECL_EXTERNAL (ic_tuple_var) = 1;
1196   if (targetm.have_tls)
1197     set_decl_tls_model (ic_tuple_var, decl_default_tls_model (ic_tuple_var));
1198 }
1199
1200 /* Create the type and function decls for the interface with gcov.  */
1201
1202 void
1203 gimple_init_gcov_profiler (void)
1204 {
1205   tree interval_profiler_fn_type;
1206   tree pow2_profiler_fn_type;
1207   tree topn_values_profiler_fn_type;
1208   tree gcov_type_ptr;
1209   tree ic_profiler_fn_type;
1210   tree average_profiler_fn_type;
1211   const char *fn_name;
1212
1213   if (!gcov_type_node)
1214     {
1215       const char *fn_suffix
1216         = flag_profile_update == PROFILE_UPDATE_ATOMIC ? "_atomic" : "";
1217
1218       gcov_type_node = get_gcov_type ();
1219       gcov_type_ptr = build_pointer_type (gcov_type_node);
1220
1221       /* void (*) (gcov_type *, gcov_type, int, unsigned)  */
1222       interval_profiler_fn_type
1223               = build_function_type_list (void_type_node,
1224                                           gcov_type_ptr, gcov_type_node,
1225                                           integer_type_node,
1226                                           unsigned_type_node, NULL_TREE);
1227       fn_name = concat ("__gcov_interval_profiler", fn_suffix, NULL);
1228       tree_interval_profiler_fn = build_fn_decl (fn_name,
1229                                                  interval_profiler_fn_type);
1230       free (CONST_CAST (char *, fn_name));
1231       TREE_NOTHROW (tree_interval_profiler_fn) = 1;
1232       DECL_ATTRIBUTES (tree_interval_profiler_fn)
1233         = tree_cons (get_identifier ("leaf"), NULL,
1234                      DECL_ATTRIBUTES (tree_interval_profiler_fn));
1235
1236       /* void (*) (gcov_type *, gcov_type)  */
1237       pow2_profiler_fn_type
1238               = build_function_type_list (void_type_node,
1239                                           gcov_type_ptr, gcov_type_node,
1240                                           NULL_TREE);
1241       fn_name = concat ("__gcov_pow2_profiler", fn_suffix, NULL);
1242       tree_pow2_profiler_fn = build_fn_decl (fn_name, pow2_profiler_fn_type);
1243       free (CONST_CAST (char *, fn_name));
1244       TREE_NOTHROW (tree_pow2_profiler_fn) = 1;
1245       DECL_ATTRIBUTES (tree_pow2_profiler_fn)
1246         = tree_cons (get_identifier ("leaf"), NULL,
1247                      DECL_ATTRIBUTES (tree_pow2_profiler_fn));
1248
1249       /* void (*) (gcov_type *, gcov_type)  */
1250       topn_values_profiler_fn_type
1251               = build_function_type_list (void_type_node,
1252                                           gcov_type_ptr, gcov_type_node,
1253                                           NULL_TREE);
1254       fn_name = concat ("__gcov_topn_values_profiler", fn_suffix, NULL);
1255       tree_topn_values_profiler_fn
1256         = build_fn_decl (fn_name, topn_values_profiler_fn_type);
1257       free (CONST_CAST (char *, fn_name));
1258
1259       TREE_NOTHROW (tree_topn_values_profiler_fn) = 1;
1260       DECL_ATTRIBUTES (tree_topn_values_profiler_fn)
1261         = tree_cons (get_identifier ("leaf"), NULL,
1262                      DECL_ATTRIBUTES (tree_topn_values_profiler_fn));
1263
1264       init_ic_make_global_vars ();
1265
1266       /* void (*) (gcov_type, void *)  */
1267       ic_profiler_fn_type
1268                = build_function_type_list (void_type_node,
1269                                           gcov_type_node,
1270                                           ptr_type_node,
1271                                           NULL_TREE);
1272       fn_name = concat ("__gcov_indirect_call_profiler_v4", fn_suffix, NULL);
1273       tree_indirect_call_profiler_fn
1274         = build_fn_decl (fn_name, ic_profiler_fn_type);
1275       free (CONST_CAST (char *, fn_name));
1276
1277       TREE_NOTHROW (tree_indirect_call_profiler_fn) = 1;
1278       DECL_ATTRIBUTES (tree_indirect_call_profiler_fn)
1279         = tree_cons (get_identifier ("leaf"), NULL,
1280                      DECL_ATTRIBUTES (tree_indirect_call_profiler_fn));
1281
1282       tree_time_profiler_counter
1283         = build_decl (UNKNOWN_LOCATION, VAR_DECL,
1284                       get_identifier ("__gcov_time_profiler_counter"),
1285                       get_gcov_type ());
1286       TREE_PUBLIC (tree_time_profiler_counter) = 1;
1287       DECL_EXTERNAL (tree_time_profiler_counter) = 1;
1288       TREE_STATIC (tree_time_profiler_counter) = 1;
1289       DECL_ARTIFICIAL (tree_time_profiler_counter) = 1;
1290       DECL_INITIAL (tree_time_profiler_counter) = NULL;
1291
1292       /* void (*) (gcov_type *, gcov_type)  */
1293       average_profiler_fn_type
1294               = build_function_type_list (void_type_node,
1295                                           gcov_type_ptr, gcov_type_node, NULL_TREE);
1296       fn_name = concat ("__gcov_average_profiler", fn_suffix, NULL);
1297       tree_average_profiler_fn = build_fn_decl (fn_name,
1298                                                 average_profiler_fn_type);
1299       free (CONST_CAST (char *, fn_name));
1300       TREE_NOTHROW (tree_average_profiler_fn) = 1;
1301       DECL_ATTRIBUTES (tree_average_profiler_fn)
1302         = tree_cons (get_identifier ("leaf"), NULL,
1303                      DECL_ATTRIBUTES (tree_average_profiler_fn));
1304       fn_name = concat ("__gcov_ior_profiler", fn_suffix, NULL);
1305       tree_ior_profiler_fn = build_fn_decl (fn_name, average_profiler_fn_type);
1306       free (CONST_CAST (char *, fn_name));
1307       TREE_NOTHROW (tree_ior_profiler_fn) = 1;
1308       DECL_ATTRIBUTES (tree_ior_profiler_fn)
1309         = tree_cons (get_identifier ("leaf"), NULL,
1310                      DECL_ATTRIBUTES (tree_ior_profiler_fn));
1311
1312       /* LTO streamer needs assembler names.  Because we create these decls
1313          late, we need to initialize them by hand.  */
1314       DECL_ASSEMBLER_NAME (tree_interval_profiler_fn);
1315       DECL_ASSEMBLER_NAME (tree_pow2_profiler_fn);
1316       DECL_ASSEMBLER_NAME (tree_topn_values_profiler_fn);
1317       DECL_ASSEMBLER_NAME (tree_indirect_call_profiler_fn);
1318       DECL_ASSEMBLER_NAME (tree_average_profiler_fn);
1319       DECL_ASSEMBLER_NAME (tree_ior_profiler_fn);
1320     }
1321 }
1322
1323 /* If RESULT is not null, then output instructions as GIMPLE trees to assign
1324    the updated counter from CALL of FUNC to RESULT.  Insert the CALL and the
1325    optional assignment instructions to GSI.  Use NAME for temporary values.  */
1326
1327 static inline void
1328 gen_assign_counter_update (gimple_stmt_iterator *gsi, gcall *call, tree func,
1329                            tree result, const char *name)
1330 {
1331   if (result)
1332     {
1333       tree result_type = TREE_TYPE (TREE_TYPE (func));
1334       tree tmp1 = make_temp_ssa_name (result_type, NULL, name);
1335       gimple_set_lhs (call, tmp1);
1336       gsi_insert_after (gsi, call, GSI_NEW_STMT);
1337       tree tmp2 = make_temp_ssa_name (TREE_TYPE (result), NULL, name);
1338       gassign *assign = gimple_build_assign (tmp2, NOP_EXPR, tmp1);
1339       gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1340       assign = gimple_build_assign (result, tmp2);
1341       gsi_insert_after (gsi, assign, GSI_NEW_STMT);
1342     }
1343   else
1344     gsi_insert_after (gsi, call, GSI_NEW_STMT);
1345 }
1346
1347 /* Output instructions as GIMPLE trees to increment the COUNTER.  If RESULT is
1348    not null, then assign the updated counter value to RESULT.  Insert the
1349    instructions to GSI.  Use NAME for temporary values.  */
1350
1351 static inline void
1352 gen_counter_update (gimple_stmt_iterator *gsi, tree counter, tree result,
1353                     const char *name)
1354 {
1355   tree type = gcov_type_node;
1356   tree addr = build_fold_addr_expr (counter);
1357   tree one = build_int_cst (type, 1);
1358   tree relaxed = build_int_cst (integer_type_node, MEMMODEL_RELAXED);
1359
1360   if (counter_update == COUNTER_UPDATE_ATOMIC_BUILTIN
1361       || (result && counter_update == COUNTER_UPDATE_ATOMIC_SPLIT))
1362     {
1363       /* __atomic_fetch_add (&counter, 1, MEMMODEL_RELAXED); */
1364       tree f = builtin_decl_explicit (TYPE_PRECISION (type) > 32
1365                                       ? BUILT_IN_ATOMIC_ADD_FETCH_8
1366                                       : BUILT_IN_ATOMIC_ADD_FETCH_4);
1367       gcall *call = gimple_build_call (f, 3, addr, one, relaxed);
1368       gen_assign_counter_update (gsi, call, f, result, name);
1369     }
1370   else if (!result && (counter_update == COUNTER_UPDATE_ATOMIC_SPLIT
1371                        || counter_update == COUNTER_UPDATE_ATOMIC_PARTIAL))
1372     {
1373       /* low = __atomic_add_fetch_4 (addr, 1, MEMMODEL_RELAXED);
1374          high_inc = low == 0 ? 1 : 0;
1375          __atomic_add_fetch_4 (addr_high, high_inc, MEMMODEL_RELAXED); */
1376       tree zero32 = build_zero_cst (uint32_type_node);
1377       tree one32 = build_one_cst (uint32_type_node);
1378       tree addr_high = make_temp_ssa_name (TREE_TYPE (addr), NULL, name);
1379       tree four = build_int_cst (size_type_node, 4);
1380       gassign *assign1 = gimple_build_assign (addr_high, POINTER_PLUS_EXPR,
1381                                               addr, four);
1382       gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1383       if (WORDS_BIG_ENDIAN)
1384         std::swap (addr, addr_high);
1385       tree f = builtin_decl_explicit (BUILT_IN_ATOMIC_ADD_FETCH_4);
1386       gcall *call1 = gimple_build_call (f, 3, addr, one, relaxed);
1387       tree low = make_temp_ssa_name (uint32_type_node, NULL, name);
1388       gimple_call_set_lhs (call1, low);
1389       gsi_insert_after (gsi, call1, GSI_NEW_STMT);
1390       tree is_zero = make_temp_ssa_name (boolean_type_node, NULL, name);
1391       gassign *assign2 = gimple_build_assign (is_zero, EQ_EXPR, low,
1392                                               zero32);
1393       gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1394       tree high_inc = make_temp_ssa_name (uint32_type_node, NULL, name);
1395       gassign *assign3 = gimple_build_assign (high_inc, COND_EXPR,
1396                                               is_zero, one32, zero32);
1397       gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1398       gcall *call2 = gimple_build_call (f, 3, addr_high, high_inc,
1399                                         relaxed);
1400       gsi_insert_after (gsi, call2, GSI_NEW_STMT);
1401     }
1402   else
1403     {
1404       tree tmp1 = make_temp_ssa_name (type, NULL, name);
1405       gassign *assign1 = gimple_build_assign (tmp1, counter);
1406       gsi_insert_after (gsi, assign1, GSI_NEW_STMT);
1407       tree tmp2 = make_temp_ssa_name (type, NULL, name);
1408       gassign *assign2 = gimple_build_assign (tmp2, PLUS_EXPR, tmp1, one);
1409       gsi_insert_after (gsi, assign2, GSI_NEW_STMT);
1410       gassign *assign3 = gimple_build_assign (unshare_expr (counter), tmp2);
1411       gsi_insert_after (gsi, assign3, GSI_NEW_STMT);
1412       if (result)
1413         {
1414           gassign *assign4 = gimple_build_assign (result, tmp2);
1415           gsi_insert_after (gsi, assign4, GSI_NEW_STMT);
1416         }
1417     }
1418 }
1419
1420 /* Output instructions as GIMPLE trees to increment the edge
1421    execution count, and insert them on E.  */
1422
1423 void
1424 gimple_gen_edge_profiler (int edgeno, edge e)
1425 {
1426   gimple_stmt_iterator gsi = gsi_last (PENDING_STMT (e));
1427   tree counter = tree_coverage_counter_ref (GCOV_COUNTER_ARCS, edgeno);
1428   gen_counter_update (&gsi, counter, NULL_TREE, "PROF_edge_counter");
1429 }
1430
1431 /* Emits code to get VALUE to instrument at GSI, and returns the
1432    variable containing the value.  */
1433
1434 static tree
1435 prepare_instrumented_value (gimple_stmt_iterator *gsi, histogram_value value)
1436 {
1437   tree val = value->hvalue.value;
1438   if (POINTER_TYPE_P (TREE_TYPE (val)))
1439     val = fold_convert (build_nonstandard_integer_type
1440                           (TYPE_PRECISION (TREE_TYPE (val)), 1), val);
1441   return force_gimple_operand_gsi (gsi, fold_convert (gcov_type_node, val),
1442                                    true, NULL_TREE, true, GSI_SAME_STMT);
1443 }
1444
1445 /* Output instructions as GIMPLE trees to increment the interval histogram
1446    counter.  VALUE is the expression whose value is profiled.  TAG is the
1447    tag of the section for counters, BASE is offset of the counter position.  */
1448
1449 void
1450 gimple_gen_interval_profiler (histogram_value value, unsigned tag)
1451 {
1452   gimple *stmt = value->hvalue.stmt;
1453   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1454   tree ref = tree_coverage_counter_ref (tag, 0), ref_ptr;
1455   gcall *call;
1456   tree val;
1457   tree start = build_int_cst_type (integer_type_node,
1458                                    value->hdata.intvl.int_start);
1459   tree steps = build_int_cst_type (unsigned_type_node,
1460                                    value->hdata.intvl.steps);
1461
1462   ref_ptr = force_gimple_operand_gsi (&gsi,
1463                                       build_addr (ref),
1464                                       true, NULL_TREE, true, GSI_SAME_STMT);
1465   val = prepare_instrumented_value (&gsi, value);
1466   call = gimple_build_call (tree_interval_profiler_fn, 4,
1467                             ref_ptr, val, start, steps);
1468   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1469 }
1470
1471 /* Output instructions as GIMPLE trees to increment the power of two histogram
1472    counter.  VALUE is the expression whose value is profiled.  TAG is the tag
1473    of the section for counters.  */
1474
1475 void
1476 gimple_gen_pow2_profiler (histogram_value value, unsigned tag)
1477 {
1478   gimple *stmt = value->hvalue.stmt;
1479   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1480   tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1481   gcall *call;
1482   tree val;
1483
1484   ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1485                                       true, NULL_TREE, true, GSI_SAME_STMT);
1486   val = prepare_instrumented_value (&gsi, value);
1487   call = gimple_build_call (tree_pow2_profiler_fn, 2, ref_ptr, val);
1488   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1489 }
1490
1491 /* Output instructions as GIMPLE trees for code to find the most N common
1492    values.  VALUE is the expression whose value is profiled.  TAG is the tag
1493    of the section for counters.  */
1494
1495 void
1496 gimple_gen_topn_values_profiler (histogram_value value, unsigned tag)
1497 {
1498   gimple *stmt = value->hvalue.stmt;
1499   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1500   tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1501   gcall *call;
1502   tree val;
1503
1504   ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1505                                       true, NULL_TREE, true, GSI_SAME_STMT);
1506   val = prepare_instrumented_value (&gsi, value);
1507   call = gimple_build_call (tree_topn_values_profiler_fn, 2, ref_ptr, val);
1508   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1509 }
1510
1511
1512 /* Output instructions as GIMPLE trees for code to find the most
1513    common called function in indirect call.
1514    VALUE is the call expression whose indirect callee is profiled.
1515    TAG is the tag of the section for counters.  */
1516
1517 void
1518 gimple_gen_ic_profiler (histogram_value value, unsigned tag)
1519 {
1520   tree tmp1;
1521   gassign *stmt1, *stmt2, *stmt3;
1522   gimple *stmt = value->hvalue.stmt;
1523   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1524   tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1525
1526   ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1527                                       true, NULL_TREE, true, GSI_SAME_STMT);
1528
1529   /* Insert code:
1530
1531     stmt1: __gcov_indirect_call.counters = get_relevant_counter_ptr ();
1532     stmt2: tmp1 = (void *) (indirect call argument value)
1533     stmt3: __gcov_indirect_call.callee = tmp1;
1534
1535     Example:
1536       f_1 = foo;
1537       __gcov_indirect_call.counters = &__gcov4.main[0];
1538       PROF_fn_9 = f_1;
1539       __gcov_indirect_call.callee = PROF_fn_9;
1540       _4 = f_1 ();
1541    */
1542
1543   tree gcov_type_ptr = build_pointer_type (get_gcov_type ());
1544
1545   tree counter_ref = build3 (COMPONENT_REF, gcov_type_ptr,
1546                              ic_tuple_var, ic_tuple_counters_field, NULL_TREE);
1547
1548   stmt1 = gimple_build_assign (counter_ref, ref_ptr);
1549   tmp1 = make_temp_ssa_name (ptr_type_node, NULL, "PROF_fn");
1550   stmt2 = gimple_build_assign (tmp1, unshare_expr (value->hvalue.value));
1551   tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1552                              ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1553   stmt3 = gimple_build_assign (callee_ref, tmp1);
1554
1555   gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1556   gsi_insert_before (&gsi, stmt2, GSI_SAME_STMT);
1557   gsi_insert_before (&gsi, stmt3, GSI_SAME_STMT);
1558 }
1559
1560
1561 /* Output instructions as GIMPLE trees for code to find the most
1562    common called function in indirect call. Insert instructions at the
1563    beginning of every possible called function.
1564   */
1565
1566 void
1567 gimple_gen_ic_func_profiler (void)
1568 {
1569   struct cgraph_node * c_node = cgraph_node::get (current_function_decl);
1570   gcall *stmt1;
1571   tree tree_uid, cur_func, void0;
1572
1573   /* Disable indirect call profiling for an IFUNC resolver and its
1574      callees since it requires TLS which hasn't been set up yet when
1575      the dynamic linker is resolving IFUNC symbols.  See
1576      https://gcc.gnu.org/bugzilla/show_bug.cgi?id=114115
1577    */
1578   if (c_node->only_called_directly_p ()
1579       || c_node->called_by_ifunc_resolver)
1580     return;
1581
1582   gimple_init_gcov_profiler ();
1583
1584   basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1585   basic_block cond_bb = split_edge (single_succ_edge (entry));
1586   basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1587
1588   /* We need to do an extra split in order to not create an input
1589      for a possible PHI node.  */
1590   split_edge (single_succ_edge (update_bb));
1591
1592   edge true_edge = single_succ_edge (cond_bb);
1593   true_edge->flags = EDGE_TRUE_VALUE;
1594
1595   profile_probability probability;
1596   if (DECL_VIRTUAL_P (current_function_decl))
1597     probability = profile_probability::very_likely ();
1598   else
1599     probability = profile_probability::unlikely ();
1600
1601   true_edge->probability = probability;
1602   edge e = make_edge (cond_bb, single_succ_edge (update_bb)->dest,
1603                       EDGE_FALSE_VALUE);
1604   e->probability = true_edge->probability.invert ();
1605
1606   /* Insert code:
1607
1608      if (__gcov_indirect_call.callee != NULL)
1609        __gcov_indirect_call_profiler_v3 (profile_id, &current_function_decl);
1610
1611      The function __gcov_indirect_call_profiler_v3 is responsible for
1612      resetting __gcov_indirect_call.callee to NULL.  */
1613
1614   gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1615   void0 = build_int_cst (ptr_type_node, 0);
1616
1617   tree callee_ref = build3 (COMPONENT_REF, ptr_type_node,
1618                             ic_tuple_var, ic_tuple_callee_field, NULL_TREE);
1619
1620   tree ref = force_gimple_operand_gsi (&gsi, callee_ref, true, NULL_TREE,
1621                                        true, GSI_SAME_STMT);
1622
1623   gcond *cond = gimple_build_cond (NE_EXPR, ref,
1624                                    void0, NULL, NULL);
1625   gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1626
1627   gsi = gsi_after_labels (update_bb);
1628
1629   cur_func = force_gimple_operand_gsi (&gsi,
1630                                        build_addr (current_function_decl),
1631                                        true, NULL_TREE,
1632                                        true, GSI_SAME_STMT);
1633   tree_uid = build_int_cst
1634               (gcov_type_node,
1635                cgraph_node::get (current_function_decl)->profile_id);
1636   stmt1 = gimple_build_call (tree_indirect_call_profiler_fn, 2,
1637                              tree_uid, cur_func);
1638   gsi_insert_before (&gsi, stmt1, GSI_SAME_STMT);
1639 }
1640
1641 /* Output instructions as GIMPLE tree at the beginning for each function.
1642    TAG is the tag of the section for counters, BASE is offset of the
1643    counter position and GSI is the iterator we place the counter.  */
1644
1645 void
1646 gimple_gen_time_profiler (unsigned tag)
1647 {
1648   tree type = get_gcov_type ();
1649   basic_block entry = ENTRY_BLOCK_PTR_FOR_FN (cfun);
1650   basic_block cond_bb = split_edge (single_succ_edge (entry));
1651   basic_block update_bb = split_edge (single_succ_edge (cond_bb));
1652
1653   /* We need to do an extra split in order to not create an input
1654      for a possible PHI node.  */
1655   split_edge (single_succ_edge (update_bb));
1656
1657   edge true_edge = single_succ_edge (cond_bb);
1658   true_edge->flags = EDGE_TRUE_VALUE;
1659   true_edge->probability = profile_probability::unlikely ();
1660   edge e
1661     = make_edge (cond_bb, single_succ_edge (update_bb)->dest, EDGE_FALSE_VALUE);
1662   e->probability = true_edge->probability.invert ();
1663
1664   gimple_stmt_iterator gsi = gsi_start_bb (cond_bb);
1665   tree original_ref = tree_coverage_counter_ref (tag, 0);
1666   tree ref = force_gimple_operand_gsi (&gsi, original_ref, true, NULL_TREE,
1667                                        true, GSI_SAME_STMT);
1668
1669   /* Emit: if (counters[0] != 0).  */
1670   gcond *cond = gimple_build_cond (EQ_EXPR, ref, build_int_cst (type, 0),
1671                                    NULL, NULL);
1672   gsi_insert_before (&gsi, cond, GSI_NEW_STMT);
1673
1674   /* Emit: counters[0] = ++__gcov_time_profiler_counter.  */
1675   gsi = gsi_start_bb (update_bb);
1676   gen_counter_update (&gsi, tree_time_profiler_counter, original_ref,
1677                       "PROF_time_profile");
1678 }
1679
1680 /* Output instructions as GIMPLE trees to increment the average histogram
1681    counter.  VALUE is the expression whose value is profiled.  TAG is the
1682    tag of the section for counters, BASE is offset of the counter position.  */
1683
1684 void
1685 gimple_gen_average_profiler (histogram_value value, unsigned tag)
1686 {
1687   gimple *stmt = value->hvalue.stmt;
1688   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1689   tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1690   gcall *call;
1691   tree val;
1692
1693   ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1694                                       true, NULL_TREE,
1695                                       true, GSI_SAME_STMT);
1696   val = prepare_instrumented_value (&gsi, value);
1697   call = gimple_build_call (tree_average_profiler_fn, 2, ref_ptr, val);
1698   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1699 }
1700
1701 /* Output instructions as GIMPLE trees to increment the ior histogram
1702    counter.  VALUE is the expression whose value is profiled.  TAG is the
1703    tag of the section for counters, BASE is offset of the counter position.  */
1704
1705 void
1706 gimple_gen_ior_profiler (histogram_value value, unsigned tag)
1707 {
1708   gimple *stmt = value->hvalue.stmt;
1709   gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1710   tree ref_ptr = tree_coverage_counter_addr (tag, 0);
1711   gcall *call;
1712   tree val;
1713
1714   ref_ptr = force_gimple_operand_gsi (&gsi, ref_ptr,
1715                                       true, NULL_TREE, true, GSI_SAME_STMT);
1716   val = prepare_instrumented_value (&gsi, value);
1717   call = gimple_build_call (tree_ior_profiler_fn, 2, ref_ptr, val);
1718   gsi_insert_before (&gsi, call, GSI_NEW_STMT);
1719 }
1720
1721 static vec<regex_t> profile_filter_files;
1722 static vec<regex_t> profile_exclude_files;
1723
1724 /* Parse list of provided REGEX (separated with semi-collon) and
1725    create expressions (of type regex_t) and save them into V vector.
1726    If there is a regular expression parsing error, error message is
1727    printed for FLAG_NAME.  */
1728
1729 static void
1730 parse_profile_filter (const char *regex, vec<regex_t> *v,
1731                       const char *flag_name)
1732 {
1733   v->create (4);
1734   if (regex != NULL)
1735     {
1736       char *str = xstrdup (regex);
1737       for (char *p = strtok (str, ";"); p != NULL; p = strtok (NULL, ";"))
1738         {
1739           regex_t r;
1740           if (regcomp (&r, p, REG_EXTENDED | REG_NOSUB) != 0)
1741             {
1742               error ("invalid regular expression %qs in %qs",
1743                      p, flag_name);
1744               return;
1745             }
1746
1747           v->safe_push (r);
1748         }
1749     }
1750 }
1751
1752 /* Parse values of -fprofile-filter-files and -fprofile-exclude-files
1753    options.  */
1754
1755 static void
1756 parse_profile_file_filtering ()
1757 {
1758   parse_profile_filter (flag_profile_filter_files, &profile_filter_files,
1759                         "-fprofile-filter-files");
1760   parse_profile_filter (flag_profile_exclude_files, &profile_exclude_files,
1761                         "-fprofile-exclude-files");
1762 }
1763
1764 /* Parse vectors of regular expressions.  */
1765
1766 static void
1767 release_profile_file_filtering ()
1768 {
1769   profile_filter_files.release ();
1770   profile_exclude_files.release ();
1771 }
1772
1773 /* Return true when FILENAME should be instrumented based on
1774    -fprofile-filter-files and -fprofile-exclude-files options.  */
1775
1776 static bool
1777 include_source_file_for_profile (const char *filename)
1778 {
1779   /* First check whether file is included in flag_profile_exclude_files.  */
1780   for (unsigned i = 0; i < profile_exclude_files.length (); i++)
1781     if (regexec (&profile_exclude_files[i],
1782                  filename, 0, NULL, 0) == REG_NOERROR)
1783       return false;
1784
1785   /* For non-empty flag_profile_filter_files include only files matching a
1786      regex in the flag.  */
1787   if (profile_filter_files.is_empty ())
1788     return true;
1789
1790   for (unsigned i = 0; i < profile_filter_files.length (); i++)
1791     if (regexec (&profile_filter_files[i], filename, 0, NULL, 0) == REG_NOERROR)
1792       return true;
1793
1794   return false;
1795 }
1796
1797 #ifndef HAVE_sync_compare_and_swapsi
1798 #define HAVE_sync_compare_and_swapsi 0
1799 #endif
1800 #ifndef HAVE_atomic_compare_and_swapsi
1801 #define HAVE_atomic_compare_and_swapsi 0
1802 #endif
1803
1804 #ifndef HAVE_sync_compare_and_swapdi
1805 #define HAVE_sync_compare_and_swapdi 0
1806 #endif
1807 #ifndef HAVE_atomic_compare_and_swapdi
1808 #define HAVE_atomic_compare_and_swapdi 0
1809 #endif
1810
1811 /* Profile all functions in the callgraph.  */
1812
1813 static unsigned int
1814 tree_profiling (void)
1815 {
1816   struct cgraph_node *node;
1817
1818   /* Verify whether we can utilize atomic update operations.  */
1819   bool can_support_atomic = targetm.have_libatomic;
1820   unsigned HOST_WIDE_INT gcov_type_size
1821     = tree_to_uhwi (TYPE_SIZE_UNIT (get_gcov_type ()));
1822   bool have_atomic_4
1823     = HAVE_sync_compare_and_swapsi || HAVE_atomic_compare_and_swapsi;
1824   bool have_atomic_8
1825     = HAVE_sync_compare_and_swapdi || HAVE_atomic_compare_and_swapdi;
1826   bool needs_split = gcov_type_size == 8 && !have_atomic_8 && have_atomic_4;
1827   if (!can_support_atomic)
1828     {
1829       if (gcov_type_size == 4)
1830         can_support_atomic = have_atomic_4;
1831       else if (gcov_type_size == 8)
1832         can_support_atomic = have_atomic_8;
1833     }
1834
1835   if (flag_profile_update != PROFILE_UPDATE_SINGLE && needs_split)
1836     counter_update = COUNTER_UPDATE_ATOMIC_PARTIAL;
1837
1838   if (flag_profile_update == PROFILE_UPDATE_ATOMIC
1839       && !can_support_atomic)
1840     {
1841       warning (0, "target does not support atomic profile update, "
1842                "single mode is selected");
1843       flag_profile_update = PROFILE_UPDATE_SINGLE;
1844     }
1845   else if (flag_profile_update == PROFILE_UPDATE_PREFER_ATOMIC)
1846     flag_profile_update
1847       = can_support_atomic ? PROFILE_UPDATE_ATOMIC : PROFILE_UPDATE_SINGLE;
1848
1849   if (flag_profile_update == PROFILE_UPDATE_ATOMIC)
1850     {
1851       if (needs_split)
1852         counter_update = COUNTER_UPDATE_ATOMIC_SPLIT;
1853       else
1854         counter_update = COUNTER_UPDATE_ATOMIC_BUILTIN;
1855     }
1856
1857   /* This is a small-ipa pass that gets called only once, from
1858      cgraphunit.cc:ipa_passes().  */
1859   gcc_assert (symtab->state == IPA_SSA);
1860
1861   init_node_map (true);
1862   parse_profile_file_filtering ();
1863
1864   FOR_EACH_DEFINED_FUNCTION (node)
1865     {
1866       bool thunk = false;
1867       if (!gimple_has_body_p (node->decl) && !node->thunk)
1868         continue;
1869
1870       /* Don't profile functions produced for builtin stuff.  */
1871       if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1872         continue;
1873
1874       if (lookup_attribute ("no_profile_instrument_function",
1875                             DECL_ATTRIBUTES (node->decl)))
1876         continue;
1877       /* Do not instrument extern inline functions when testing coverage.
1878          While this is not perfectly consistent (early inlined extern inlines
1879          will get acocunted), testsuite expects that.  */
1880       if (DECL_EXTERNAL (node->decl)
1881           && flag_test_coverage)
1882         continue;
1883
1884       const char *file = LOCATION_FILE (DECL_SOURCE_LOCATION (node->decl));
1885       if (!include_source_file_for_profile (file))
1886         continue;
1887
1888       if (node->thunk)
1889         {
1890           /* We cannot expand variadic thunks to Gimple.  */
1891           if (stdarg_p (TREE_TYPE (node->decl)))
1892             continue;
1893           thunk = true;
1894           /* When generate profile, expand thunk to gimple so it can be
1895              instrumented same way as other functions.  */
1896           if (profile_arc_flag || condition_coverage_flag)
1897             expand_thunk (node, false, true);
1898           /* Read cgraph profile but keep function as thunk at profile-use
1899              time.  */
1900           else
1901             {
1902               read_thunk_profile (node);
1903               continue;
1904             }
1905         }
1906
1907       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1908
1909       if (dump_file)
1910         dump_function_header (dump_file, cfun->decl, dump_flags);
1911
1912       /* Local pure-const may imply need to fixup the cfg.  */
1913       if (gimple_has_body_p (node->decl)
1914           && (execute_fixup_cfg () & TODO_cleanup_cfg))
1915         cleanup_tree_cfg ();
1916
1917       branch_prob (thunk);
1918
1919       if (! flag_branch_probabilities
1920           && flag_profile_values)
1921         gimple_gen_ic_func_profiler ();
1922
1923       if (flag_branch_probabilities
1924           && !thunk
1925           && flag_profile_values
1926           && flag_value_profile_transformations
1927           && profile_status_for_fn (cfun) == PROFILE_READ)
1928         gimple_value_profile_transformations ();
1929
1930       /* The above could hose dominator info.  Currently there is
1931          none coming in, this is a safety valve.  It should be
1932          easy to adjust it, if and when there is some.  */
1933       free_dominance_info (CDI_DOMINATORS);
1934       free_dominance_info (CDI_POST_DOMINATORS);
1935       pop_cfun ();
1936     }
1937
1938   release_profile_file_filtering ();
1939
1940   /* Drop pure/const flags from instrumented functions.  */
1941   if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1942     FOR_EACH_DEFINED_FUNCTION (node)
1943       {
1944         if (!gimple_has_body_p (node->decl)
1945             || !(!node->clone_of
1946                  || node->decl != node->clone_of->decl))
1947           continue;
1948
1949         /* Don't profile functions produced for builtin stuff.  */
1950         if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1951           continue;
1952
1953         node->set_const_flag (false, false);
1954         node->set_pure_flag (false, false);
1955       }
1956
1957   /* Update call statements and rebuild the cgraph.  */
1958   FOR_EACH_DEFINED_FUNCTION (node)
1959     {
1960       basic_block bb;
1961
1962       if (!gimple_has_body_p (node->decl)
1963           || !(!node->clone_of
1964           || node->decl != node->clone_of->decl))
1965         continue;
1966
1967       /* Don't profile functions produced for builtin stuff.  */
1968       if (DECL_SOURCE_LOCATION (node->decl) == BUILTINS_LOCATION)
1969         continue;
1970
1971       push_cfun (DECL_STRUCT_FUNCTION (node->decl));
1972
1973       if (profile_arc_flag || condition_coverage_flag || flag_test_coverage)
1974         FOR_EACH_BB_FN (bb, cfun)
1975           {
1976             gimple_stmt_iterator gsi;
1977             for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1978               {
1979                 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
1980                 if (!call || gimple_call_internal_p (call))
1981                   continue;
1982
1983                 /* We do not clear pure/const on decls without body.  */
1984                 tree fndecl = gimple_call_fndecl (call);
1985                 cgraph_node *callee;
1986                 if (fndecl
1987                     && (callee = cgraph_node::get (fndecl))
1988                     && callee->get_availability (node) == AVAIL_NOT_AVAILABLE)
1989                   continue;
1990
1991                 /* Drop the const attribute from the call type (the pure
1992                    attribute is not available on types).  */
1993                 tree fntype = gimple_call_fntype (call);
1994                 if (fntype && TYPE_READONLY (fntype))
1995                   {
1996                     int quals = TYPE_QUALS (fntype) & ~TYPE_QUAL_CONST;
1997                     fntype = build_qualified_type (fntype, quals);
1998                     gimple_call_set_fntype (call, fntype);
1999                   }
2000
2001                 /* Update virtual operands of calls to no longer const/pure
2002                    functions.  */
2003                 update_stmt (call);
2004               }
2005           }
2006
2007       /* re-merge split blocks.  */
2008       cleanup_tree_cfg ();
2009       update_ssa (TODO_update_ssa);
2010
2011       cgraph_edge::rebuild_edges ();
2012
2013       pop_cfun ();
2014     }
2015
2016   handle_missing_profiles ();
2017
2018   del_node_map ();
2019   return 0;
2020 }
2021
2022 namespace {
2023
2024 const pass_data pass_data_ipa_tree_profile =
2025 {
2026   SIMPLE_IPA_PASS, /* type */
2027   "profile", /* name */
2028   OPTGROUP_NONE, /* optinfo_flags */
2029   TV_IPA_PROFILE, /* tv_id */
2030   0, /* properties_required */
2031   0, /* properties_provided */
2032   0, /* properties_destroyed */
2033   0, /* todo_flags_start */
2034   TODO_dump_symtab, /* todo_flags_finish */
2035 };
2036
2037 class pass_ipa_tree_profile : public simple_ipa_opt_pass
2038 {
2039 public:
2040   pass_ipa_tree_profile (gcc::context *ctxt)
2041     : simple_ipa_opt_pass (pass_data_ipa_tree_profile, ctxt)
2042   {}
2043
2044   /* opt_pass methods: */
2045   bool gate (function *) final override;
2046   unsigned int execute (function *) final override { return tree_profiling (); }
2047
2048 }; // class pass_ipa_tree_profile
2049
2050 bool
2051 pass_ipa_tree_profile::gate (function *)
2052 {
2053   /* When profile instrumentation, use or test coverage shall be performed.
2054      But for AutoFDO, this there is no instrumentation, thus this pass is
2055      disabled.  */
2056   return (!in_lto_p && !flag_auto_profile
2057           && (flag_branch_probabilities || flag_test_coverage
2058               || profile_arc_flag || condition_coverage_flag));
2059 }
2060
2061 } // anon namespace
2062
2063 simple_ipa_opt_pass *
2064 make_pass_ipa_tree_profile (gcc::context *ctxt)
2065 {
2066   return new pass_ipa_tree_profile (ctxt);
2067 }
2068
2069 #include "gt-tree-profile.h"