gcc/omp-grid.c

   1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
   2
   3    Copyright (C) 2013-2019 Free Software Foundation, Inc.
   4
   5 This file is part of GCC.
   6
   7 GCC is free software; you can redistribute it and/or modify it under
   8 the terms of the GNU General Public License as published by the Free
   9 Software Foundation; either version 3, or (at your option) any later
  10 version.
  11
  12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GCC; see the file COPYING3.  If not see
  19 <http://www.gnu.org/licenses/>.  */
  20
  21 #include "config.h"
  22 #include "system.h"
  23 #include "coretypes.h"
  24 #include "backend.h"
  25 #include "tree.h"
  26 #include "gimple.h"
  27 #include "tree-pass.h"
  28 #include "ssa.h"
  29 #include "cgraph.h"
  30 #include "pretty-print.h"
  31 #include "fold-const.h"
  32 #include "gimplify.h"
  33 #include "gimple-iterator.h"
  34 #include "gimple-walk.h"
  35 #include "tree-inline.h"
  36 #include "langhooks.h"
  37 #include "omp-general.h"
  38 #include "omp-low.h"
  39 #include "omp-grid.h"
  40 #include "gimple-pretty-print.h"
  41
  42 /* Return the lastprivate predicate for a given gridified loop described by
  43    FD).  */
  44
  45 tree
  46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
  47 {
  48   /* When dealing with a gridified loop, we need to check up to three collapsed
  49      iteration variables but they are not actually captured in this fd.
  50      Fortunately, we can easily rely on HSA builtins to get this
  51      information.  */
  52
  53   tree id, size;
  54   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
  55       && gimple_omp_for_grid_intra_group (fd->for_stmt))
  56     {
  57       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
  58       size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
  59     }
  60   else
  61     {
  62       id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
  63       size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
  64     }
  65   tree cond = NULL;
  66   for (int dim = 0; dim < fd->collapse; dim++)
  67     {
  68       tree dim_tree = build_int_cstu (unsigned_type_node, dim);
  69       tree u1 = build_int_cstu (unsigned_type_node, 1);
  70       tree c2
  71         = build2 (EQ_EXPR, boolean_type_node,
  72                   build2 (PLUS_EXPR, unsigned_type_node,
  73                           build_call_expr (id, 1, dim_tree), u1),
  74                   build_call_expr (size, 1, dim_tree));
  75       if (cond)
  76         cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
  77       else
  78         cond = c2;
  79     }
  80   return cond;
  81 }
  82
  83 /* Structure describing the basic properties of the loop we ara analyzing
  84    whether it can be gridified and when it is gridified.  */
  85
  86 struct grid_prop
  87 {
  88   /* True when we are doing tiling gridification, i.e. when there is a distinct
  89      distribute loop over groups and a loop construct over work-items.  False
  90      when distribute and parallel for loops form a combined construct.  */
  91   bool tiling;
  92   /* Location of the target construct for optimization information
  93      messages.  */
  94   dump_user_location_t target_loc;
  95   /* The collapse clause of the involved loops.  Collapse value of all of them
  96      must be the same for gridification to take place.  */
  97   size_t collapse;
  98   /* Group sizes, if requested by the user or NULL if not requested.  */
  99   tree group_sizes[3];
 100 };
 101
 102 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
 103   "gridified HSA kernel because "
 104
 105 /* Return true if STMT is an assignment of a register-type into a local
 106    VAR_DECL.  If GRID is non-NULL, the assignment additionally must not be to
 107    any of the trees specifying group sizes there.  */
 108
 109 static bool
 110 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
 111 {
 112   gassign *assign = dyn_cast <gassign *> (stmt);
 113   if (!assign)
 114     return false;
 115   if (gimple_clobber_p (assign))
 116     return true;
 117   tree lhs = gimple_assign_lhs (assign);
 118   if (!VAR_P (lhs)
 119       || !is_gimple_reg_type (TREE_TYPE (lhs))
 120       || is_global_var (lhs))
 121     return false;
 122   if (grid)
 123     for (unsigned i = 0; i < grid->collapse; i++)
 124       if (lhs == grid->group_sizes[i])
 125         return false;
 126   return true;
 127 }
 128
 129 /* Return true if all statements in SEQ are assignments to local register-type
 130    variables that do not hold group size information.  */
 131
 132 static bool
 133 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
 134 {
 135   if (!seq)
 136     return true;
 137
 138   gimple_stmt_iterator gsi;
 139   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 140     if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
 141       return false;
 142   return true;
 143 }
 144
 145 /* Scan statements in SEQ and call itself recursively on any bind.  GRID
 146    describes hitherto discovered properties of the loop that is evaluated for
 147    possible gridification.  If during whole search only assignments to
 148    register-type local variables (that do not overwrite group size information)
 149    and one single OMP statement is encountered, return true, otherwise return
 150    false.  RET is where we store any OMP statement encountered.  */
 151
 152 static bool
 153 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
 154                                           const char *name, gimple **ret)
 155 {
 156   gimple_stmt_iterator gsi;
 157   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 158     {
 159       gimple *stmt = gsi_stmt (gsi);
 160
 161       if (grid_safe_assignment_p (stmt, grid))
 162         continue;
 163       if (gbind *bind = dyn_cast <gbind *> (stmt))
 164         {
 165           gimple_seq bind_body = gimple_bind_body (bind);
 166           if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
 167                                                          ret))
 168               return false;
 169         }
 170       else if (is_gimple_omp (stmt))
 171         {
 172           if (*ret)
 173             {
 174               if (dump_enabled_p ())
 175                 {
 176                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 177                                    GRID_MISSED_MSG_PREFIX "%s construct "
 178                                    "contains multiple OpenMP constructs\n",
 179                                    name);
 180                   dump_printf_loc (MSG_NOTE, *ret,
 181                                    "The first OpenMP construct within "
 182                                    "a parallel\n");
 183                   dump_printf_loc (MSG_NOTE, stmt,
 184                                    "The second OpenMP construct within "
 185                                    "a parallel\n");
 186                 }
 187               return false;
 188             }
 189           *ret = stmt;
 190         }
 191       else
 192         {
 193           if (dump_enabled_p ())
 194             {
 195               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 196                                GRID_MISSED_MSG_PREFIX "%s construct contains "
 197                                "a complex statement\n", name);
 198               dump_printf_loc (MSG_NOTE, stmt,
 199                                "This statement cannot be analyzed for "
 200                                "gridification\n");
 201             }
 202           return false;
 203         }
 204     }
 205   return true;
 206 }
 207
 208 /* Scan statements in SEQ and make sure that it and any binds in it contain
 209    only assignments to local register-type variables (that do not overwrite
 210    group size information) and one OMP construct.  If so, return that
 211    construct, otherwise return NULL.  GRID describes hitherto discovered
 212    properties of the loop that is evaluated for possible gridification.  If
 213    dumping is enabled and function fails, use NAME to dump a note with the
 214    reason for failure.  */
 215
 216 static gimple *
 217 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
 218                                         const char *name)
 219 {
 220   if (!seq)
 221     {
 222       if (dump_enabled_p ())
 223         dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 224                          GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
 225                          name);
 226       return NULL;
 227     }
 228
 229   gimple *ret = NULL;
 230   if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
 231     {
 232       if (!ret && dump_enabled_p ())
 233         dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 234                          GRID_MISSED_MSG_PREFIX "%s construct does not contain"
 235                          " any other OpenMP construct\n", name);
 236       return ret;
 237     }
 238   else
 239     return NULL;
 240 }
 241
 242 /* Walker function looking for statements there is no point gridifying (and for
 243    noreturn function calls which we cannot do).  Return non-NULL if such a
 244    function is found.  */
 245
 246 static tree
 247 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
 248                                    bool *handled_ops_p,
 249                                    struct walk_stmt_info *wi)
 250 {
 251   *handled_ops_p = false;
 252   gimple *stmt = gsi_stmt (*gsi);
 253   switch (gimple_code (stmt))
 254     {
 255     case GIMPLE_CALL:
 256       if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
 257         {
 258           *handled_ops_p = true;
 259           wi->info = stmt;
 260           return error_mark_node;
 261         }
 262       break;
 263
 264     /* We may reduce the following list if we find a way to implement the
 265        clauses, but now there is no point trying further.  */
 266     case GIMPLE_OMP_CRITICAL:
 267     case GIMPLE_OMP_TASKGROUP:
 268     case GIMPLE_OMP_TASK:
 269     case GIMPLE_OMP_SECTION:
 270     case GIMPLE_OMP_SECTIONS:
 271     case GIMPLE_OMP_SECTIONS_SWITCH:
 272     case GIMPLE_OMP_TARGET:
 273     case GIMPLE_OMP_ORDERED:
 274       *handled_ops_p = true;
 275       wi->info = stmt;
 276       return error_mark_node;
 277     default:
 278       break;
 279     }
 280   return NULL;
 281 }
 282
 283 /* Examine clauses of omp parallel statement PAR and if any prevents
 284    gridification, issue a missed-optimization diagnostics and return false,
 285    otherwise return true.  GRID describes hitherto discovered properties of the
 286    loop that is evaluated for possible gridification.  */
 287
 288 static bool
 289 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
 290 {
 291   tree clauses = gimple_omp_parallel_clauses (par);
 292   while (clauses)
 293     {
 294       switch (OMP_CLAUSE_CODE (clauses))
 295         {
 296         case OMP_CLAUSE_NUM_THREADS:
 297           if (dump_enabled_p ())
 298             {
 299               dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 300                                GRID_MISSED_MSG_PREFIX "because there is "
 301                                "a num_threads clause of the parallel "
 302                                "construct\n");
 303               dump_printf_loc (MSG_NOTE, par,
 304                                "Parallel construct has a num_threads clause\n");
 305             }
 306           return false;
 307
 308         case OMP_CLAUSE_REDUCTION:
 309           if (dump_enabled_p ())
 310             {
 311               dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 312                                GRID_MISSED_MSG_PREFIX "a reduction clause "
 313                                "is present\n ");
 314               dump_printf_loc (MSG_NOTE, par,
 315                                "Parallel construct has a reduction clause\n");
 316             }
 317           return false;
 318
 319         default:
 320           break;
 321         }
 322       clauses = OMP_CLAUSE_CHAIN (clauses);
 323     }
 324   return true;
 325 }
 326
 327 /* Examine clauses and the body of omp loop statement GFOR and if something
 328    prevents gridification, issue a missed-optimization diagnostics and return
 329    false, otherwise return true.  GRID describes hitherto discovered properties
 330    of the loop that is evaluated for possible gridification.  */
 331
 332 static bool
 333 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
 334 {
 335   if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
 336                                                  grid))
 337     {
 338       if (dump_enabled_p ())
 339         {
 340           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 341                            GRID_MISSED_MSG_PREFIX "the inner loop "
 342                            "loop bounds computation contains a complex "
 343                            "statement\n");
 344           dump_printf_loc (MSG_NOTE, gfor,
 345                            "Loop construct cannot be analyzed for "
 346                            "gridification\n");
 347         }
 348       return false;
 349     }
 350
 351   tree clauses = gimple_omp_for_clauses (gfor);
 352   while (clauses)
 353     {
 354       switch (OMP_CLAUSE_CODE (clauses))
 355         {
 356         case OMP_CLAUSE_SCHEDULE:
 357           if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
 358             {
 359               if (dump_enabled_p ())
 360                 {
 361                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 362                                    GRID_MISSED_MSG_PREFIX "the inner loop "
 363                                    "has a non-automatic schedule clause\n");
 364                   dump_printf_loc (MSG_NOTE, gfor,
 365                                    "Loop construct has a non automatic "
 366                                    "schedule clause\n");
 367                 }
 368               return false;
 369             }
 370           break;
 371
 372         case OMP_CLAUSE_REDUCTION:
 373           if (dump_enabled_p ())
 374             {
 375               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 376                                GRID_MISSED_MSG_PREFIX "a reduction "
 377                                "clause is present\n ");
 378               dump_printf_loc (MSG_NOTE, gfor,
 379                                "Loop construct has a reduction schedule "
 380                                "clause\n");
 381             }
 382           return false;
 383
 384         default:
 385           break;
 386         }
 387       clauses = OMP_CLAUSE_CHAIN (clauses);
 388     }
 389   struct walk_stmt_info wi;
 390   memset (&wi, 0, sizeof (wi));
 391   if (walk_gimple_seq (gimple_omp_body (gfor),
 392                        grid_find_ungridifiable_statement,
 393                        NULL, &wi))
 394     {
 395       gimple *bad = (gimple *) wi.info;
 396       if (dump_enabled_p ())
 397         {
 398           if (is_gimple_call (bad))
 399             dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 400                                GRID_MISSED_MSG_PREFIX "the inner loop contains "
 401                                "call to a noreturn function\n");
 402           else
 403             dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 404                              GRID_MISSED_MSG_PREFIX "the inner loop contains "
 405                              "statement %s which cannot be transformed\n",
 406                              gimple_code_name[(int) gimple_code (bad)]);
 407           dump_printf_loc (MSG_NOTE, bad,
 408                            "This statement cannot be analyzed for "
 409                            "gridification\n");
 410         }
 411       return false;
 412     }
 413   return true;
 414 }
 415
 416 /* Given distribute omp construct represented by DIST, which in the original
 417    source forms a compound construct with a looping construct, return true if it
 418    can be turned into a gridified HSA kernel.  Otherwise return false.  GRID
 419    describes hitherto discovered properties of the loop that is evaluated for
 420    possible gridification.  */
 421
 422 static bool
 423 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
 424 {
 425   dump_user_location_t tloc = grid->target_loc;
 426   gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
 427                                                          grid, "distribute");
 428   gomp_parallel *par;
 429   if (!stmt
 430       || !(par = dyn_cast <gomp_parallel *> (stmt))
 431       || !grid_parallel_clauses_gridifiable (par, tloc))
 432     return false;
 433
 434   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
 435                                                  "parallel");
 436   gomp_for *gfor;
 437   if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
 438     return false;
 439
 440   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
 441     {
 442       if (dump_enabled_p ())
 443         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 444                          GRID_MISSED_MSG_PREFIX "the inner loop is not "
 445                          "a simple for loop\n");
 446       return false;
 447     }
 448   gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
 449
 450   if (!grid_inner_loop_gridifiable_p (gfor, grid))
 451     return false;
 452
 453   return true;
 454 }
 455
 456 /* Given an omp loop statement GFOR, return true if it can participate in
 457    tiling gridification, i.e. in one where the distribute and parallel for
 458    loops do not form a compound statement.  GRID describes hitherto discovered
 459    properties of the loop that is evaluated for possible gridification.  */
 460
 461 static bool
 462 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
 463 {
 464   if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
 465     {
 466       if (dump_enabled_p ())
 467         {
 468           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 469                            GRID_MISSED_MSG_PREFIX "an inner loop is not "
 470                            "a simple for loop\n");
 471           dump_printf_loc (MSG_NOTE, gfor,
 472                            "This statement is not a simple for loop\n");
 473         }
 474       return false;
 475     }
 476
 477   if (!grid_inner_loop_gridifiable_p (gfor, grid))
 478     return false;
 479
 480   if (gimple_omp_for_collapse (gfor) != grid->collapse)
 481     {
 482       if (dump_enabled_p ())
 483         {
 484           dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 485                            GRID_MISSED_MSG_PREFIX "an inner loop does not "
 486                            "have use the same collapse clause\n");
 487           dump_printf_loc (MSG_NOTE, gfor,
 488                            "Loop construct uses a different collapse clause\n");
 489         }
 490       return false;
 491     }
 492
 493   struct omp_for_data fd;
 494   struct omp_for_data_loop *loops
 495     = (struct omp_for_data_loop *)alloca (grid->collapse
 496                                           * sizeof (struct omp_for_data_loop));
 497   omp_extract_for_data (gfor, &fd, loops);
 498   for (unsigned i = 0; i < grid->collapse; i++)
 499     {
 500       tree itype, type = TREE_TYPE (fd.loops[i].v);
 501       if (POINTER_TYPE_P (type))
 502         itype = signed_type_for (type);
 503       else
 504         itype = type;
 505
 506       tree n1 = fold_convert (itype, fd.loops[i].n1);
 507       tree n2 = fold_convert (itype, fd.loops[i].n2);
 508       tree t = build_int_cst (itype,
 509                               (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
 510       t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
 511       t = fold_build2 (PLUS_EXPR, itype, t, n2);
 512       t = fold_build2 (MINUS_EXPR, itype, t, n1);
 513       if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
 514         t = fold_build2 (TRUNC_DIV_EXPR, itype,
 515                          fold_build1 (NEGATE_EXPR, itype, t),
 516                          fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
 517       else
 518         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
 519
 520       if (!operand_equal_p (grid->group_sizes[i], t, 0))
 521         {
 522           if (dump_enabled_p ())
 523             {
 524               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 525                                GRID_MISSED_MSG_PREFIX "the distribute and "
 526                                "an internal loop do not agree on tile size\n");
 527               dump_printf_loc (MSG_NOTE, gfor,
 528                                "Loop construct does not seem to loop over "
 529                                "a tile size\n");
 530             }
 531           return false;
 532         }
 533     }
 534   return true;
 535 }
 536
 537 /* Facing a call to FNDECL in the body of a distribute construct, return true
 538    if we can handle it or false if it precludes gridification.  */
 539
 540 static bool
 541 grid_call_permissible_in_distribute_p (tree fndecl)
 542 {
 543   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
 544     return true;
 545
 546   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 547   if (strstr (name, "omp_") != name)
 548     return false;
 549
 550   if ((strcmp (name, "omp_get_thread_num") == 0)
 551       || (strcmp (name, "omp_get_num_threads") == 0)
 552       || (strcmp (name, "omp_get_num_teams") == 0)
 553       || (strcmp (name, "omp_get_team_num") == 0)
 554       || (strcmp (name, "omp_get_level") == 0)
 555       || (strcmp (name, "omp_get_active_level") == 0)
 556       || (strcmp (name, "omp_in_parallel") == 0))
 557     return true;
 558
 559   return false;
 560 }
 561
 562 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
 563    of a distribute construct that is pointed at by GSI, modify it as necessary
 564    for gridification.  If the statement itself got removed, return true.  */
 565
 566 static bool
 567 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
 568 {
 569   gimple *stmt = gsi_stmt (*gsi);
 570   tree fndecl = gimple_call_fndecl (stmt);
 571   gcc_checking_assert (stmt);
 572   if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
 573     return false;
 574
 575   const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
 576   if ((strcmp (name, "omp_get_thread_num") == 0)
 577       || (strcmp (name, "omp_get_level") == 0)
 578       || (strcmp (name, "omp_get_active_level") == 0)
 579       || (strcmp (name, "omp_in_parallel") == 0))
 580     {
 581       tree lhs = gimple_call_lhs (stmt);
 582       if (lhs)
 583         {
 584           gassign *assign
 585             = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
 586           gsi_insert_before (gsi, assign, GSI_SAME_STMT);
 587         }
 588       gsi_remove (gsi, true);
 589       return true;
 590     }
 591
 592   /* The rest of the omp functions can stay as they are, HSA back-end will
 593      handle them correctly.  */
 594   gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
 595                        || (strcmp (name, "omp_get_num_teams") == 0)
 596                        || (strcmp (name, "omp_get_team_num") == 0));
 597   return false;
 598 }
 599
 600 /* Given a sequence of statements within a distribute omp construct or a
 601    parallel construct, which in the original source does not form a compound
 602    construct with a looping construct, return true if it does not prevent us
 603    from turning it into a gridified HSA kernel.  Otherwise return false.  GRID
 604    describes hitherto discovered properties of the loop that is evaluated for
 605    possible gridification.  IN_PARALLEL must be true if seq is within a
 606    parallel construct and flase if it is only within a distribute
 607    construct.  */
 608
 609 static bool
 610 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
 611                                   bool in_parallel)
 612 {
 613   gimple_stmt_iterator gsi;
 614   for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
 615     {
 616       gimple *stmt = gsi_stmt (gsi);
 617
 618       if (grid_safe_assignment_p (stmt, grid)
 619           || gimple_code (stmt) == GIMPLE_GOTO
 620           || gimple_code (stmt) == GIMPLE_LABEL
 621           || gimple_code (stmt) == GIMPLE_COND)
 622         continue;
 623       else if (gbind *bind = dyn_cast <gbind *> (stmt))
 624         {
 625           if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
 626                                                  grid, in_parallel))
 627             return false;
 628           continue;
 629         }
 630       else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
 631         {
 632           if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
 633             {
 634               if (dump_enabled_p ())
 635                 {
 636                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 637                                    GRID_MISSED_MSG_PREFIX "the distribute "
 638                                    "construct contains a try..catch region\n");
 639                   dump_printf_loc (MSG_NOTE, try_stmt,
 640                                    "This statement cannot be analyzed for "
 641                                    "tiled gridification\n");
 642                 }
 643               return false;
 644             }
 645           if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
 646                                                  grid, in_parallel))
 647             return false;
 648           if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
 649                                                  grid, in_parallel))
 650             return false;
 651           continue;
 652         }
 653       else if (is_gimple_call (stmt))
 654         {
 655           tree fndecl = gimple_call_fndecl (stmt);
 656           if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
 657             continue;
 658
 659           if (dump_enabled_p ())
 660             {
 661               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 662                                GRID_MISSED_MSG_PREFIX "the distribute "
 663                                "construct contains a call\n");
 664               dump_printf_loc (MSG_NOTE, stmt,
 665                                "This statement cannot be analyzed for "
 666                                "tiled gridification\n");
 667             }
 668           return false;
 669         }
 670       else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
 671         {
 672           if (in_parallel)
 673             {
 674               if (dump_enabled_p ())
 675                 {
 676                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 677                                    GRID_MISSED_MSG_PREFIX "a parallel "
 678                                    "construct contains another parallel "
 679                                    "construct\n");
 680                   dump_printf_loc (MSG_NOTE, stmt,
 681                                    "This parallel construct is nested in "
 682                                    "another one\n");
 683                 }
 684               return false;
 685             }
 686           if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
 687               || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
 688                                                     grid, true))
 689             return false;
 690         }
 691       else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
 692         {
 693           if (!in_parallel)
 694             {
 695               if (dump_enabled_p ())
 696                 {
 697                   dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 698                                    GRID_MISSED_MSG_PREFIX "a loop "
 699                                    "construct is not nested within a parallel "
 700                                    "construct\n");
 701                   dump_printf_loc (MSG_NOTE, stmt,
 702                                    "This loop construct is not nested in "
 703                                    "a parallel construct\n");
 704                 }
 705               return false;
 706             }
 707           if (!grid_gfor_follows_tiling_pattern (gfor, grid))
 708             return false;
 709         }
 710       else
 711         {
 712           if (dump_enabled_p ())
 713             {
 714               dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
 715                                GRID_MISSED_MSG_PREFIX "the distribute "
 716                                "construct contains a complex statement\n");
 717               dump_printf_loc (MSG_NOTE, stmt,
 718                                "This statement cannot be analyzed for "
 719                                "tiled gridification\n");
 720             }
 721           return false;
 722         }
 723     }
 724     return true;
 725 }
 726
 727 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
 728    return true, otherwise return false.  In the case of success, also fill in
 729    GRID with information describing the kernel grid.  */
 730
 731 static bool
 732 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
 733 {
 734   if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
 735     return false;
 736
 737   dump_user_location_t tloc = target;
 738   grid->target_loc = tloc;
 739   gimple *stmt
 740     = grid_find_single_omp_among_assignments (gimple_omp_body (target),
 741                                               grid, "target");
 742   if (!stmt)
 743     return false;
 744   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
 745   tree group_size = NULL;
 746   if (!teams)
 747     {
 748       if (dump_enabled_p ())
 749         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 750                          GRID_MISSED_MSG_PREFIX "it does not have a sole "
 751                          "teams construct in it.\n");
 752       return false;
 753     }
 754
 755   tree clauses = gimple_omp_teams_clauses (teams);
 756   while (clauses)
 757     {
 758       switch (OMP_CLAUSE_CODE (clauses))
 759         {
 760         case OMP_CLAUSE_NUM_TEAMS:
 761           if (dump_enabled_p ())
 762             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 763                              GRID_MISSED_MSG_PREFIX "the teams construct "
 764                              "contains a num_teams clause\n ");
 765           return false;
 766
 767         case OMP_CLAUSE_REDUCTION:
 768           if (dump_enabled_p ())
 769             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 770                              GRID_MISSED_MSG_PREFIX "a reduction "
 771                              "clause is present\n ");
 772           return false;
 773
 774         case OMP_CLAUSE_THREAD_LIMIT:
 775           if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
 776             group_size = OMP_CLAUSE_OPERAND (clauses, 0);
 777           break;
 778
 779         default:
 780           break;
 781         }
 782       clauses = OMP_CLAUSE_CHAIN (clauses);
 783     }
 784
 785   stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
 786                                                  "teams");
 787   if (!stmt)
 788     return false;
 789   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
 790   if (!dist)
 791     {
 792       if (dump_enabled_p ())
 793         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 794                          GRID_MISSED_MSG_PREFIX "the teams construct does not "
 795                          "have a single distribute construct in it.\n");
 796       return false;
 797     }
 798
 799   gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
 800
 801   grid->collapse = gimple_omp_for_collapse (dist);
 802   if (grid->collapse > 3)
 803     {
 804       if (dump_enabled_p ())
 805         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 806                          GRID_MISSED_MSG_PREFIX "the distribute construct "
 807                          "contains collapse clause with parameter greater "
 808                          "than 3\n");
 809       return false;
 810     }
 811
 812   struct omp_for_data fd;
 813   struct omp_for_data_loop *dist_loops
 814     = (struct omp_for_data_loop *)alloca (grid->collapse
 815                                           * sizeof (struct omp_for_data_loop));
 816   omp_extract_for_data (dist, &fd, dist_loops);
 817   if (fd.chunk_size)
 818     {
 819       if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
 820         {
 821           if (dump_enabled_p ())
 822             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 823                              GRID_MISSED_MSG_PREFIX "the teams "
 824                              "thread limit is different from distribute "
 825                              "schedule chunk\n");
 826           return false;
 827         }
 828       group_size = fd.chunk_size;
 829     }
 830   if (group_size && grid->collapse > 1)
 831     {
 832       if (dump_enabled_p ())
 833         dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 834                          GRID_MISSED_MSG_PREFIX "group size cannot be "
 835                          "set using thread_limit or schedule clauses "
 836                          "when also using a collapse clause greater than 1\n");
 837       return false;
 838     }
 839
 840   if (gimple_omp_for_combined_p (dist))
 841     {
 842       grid->tiling = false;
 843       grid->group_sizes[0] = group_size;
 844       for (unsigned i = 1; i < grid->collapse; i++)
 845         grid->group_sizes[i] = NULL;
 846       return grid_dist_follows_simple_pattern (dist, grid);
 847     }
 848   else
 849     {
 850       grid->tiling = true;
 851       if (group_size)
 852         {
 853           if (dump_enabled_p ())
 854             dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
 855                              GRID_MISSED_MSG_PREFIX "group size cannot be set "
 856                              "using thread_limit or schedule clauses when "
 857                              "distribute and loop constructs do not form "
 858                              "one combined construct\n");
 859           return false;
 860         }
 861       for (unsigned i = 0; i < grid->collapse; i++)
 862         {
 863           if (fd.loops[i].cond_code == GT_EXPR)
 864             grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
 865                                                 TREE_TYPE (fd.loops[i].step),
 866                                                 fd.loops[i].step);
 867           else
 868             grid->group_sizes[i] = fd.loops[i].step;
 869         }
 870       return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
 871                                                false);
 872     }
 873 }
 874
 875 /* Operand walker, used to remap pre-body declarations according to a hash map
 876    provided in DATA.  */
 877
 878 static tree
 879 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
 880 {
 881   tree t = *tp;
 882
 883   if (DECL_P (t) || TYPE_P (t))
 884     *walk_subtrees = 0;
 885   else
 886     *walk_subtrees = 1;
 887
 888   if (VAR_P (t))
 889     {
 890       struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
 891       hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
 892       tree *repl = declmap->get (t);
 893       if (repl)
 894         *tp = *repl;
 895     }
 896   return NULL_TREE;
 897 }
 898
 899 /* Identifiers of segments into which a particular variable should be places
 900    when gridifying.  */
 901
 902 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
 903                        GRID_SEGMENT_GLOBAL};
 904
 905 /* Mark VAR so that it is eventually placed into SEGMENT.  Place an artificial
 906    builtin call into SEQ that will make sure the variable is always considered
 907    address taken.  */
 908
 909 static void
 910 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
 911 {
 912   /* Making a non-addressable variables would require that we re-gimplify all
 913      their uses.  Fortunately, we do not have to do this because if they are
 914      not addressable, it means they are not used in atomic or parallel
 915      statements and so relaxed GPU consistency rules mean we can just keep them
 916      private.  */
 917   if (!TREE_ADDRESSABLE (var))
 918     return;
 919
 920   switch (segment)
 921     {
 922     case GRID_SEGMENT_GROUP:
 923       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
 924                                          NULL, DECL_ATTRIBUTES (var));
 925       break;
 926     case GRID_SEGMENT_GLOBAL:
 927       DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
 928                                          NULL, DECL_ATTRIBUTES (var));
 929       break;
 930     default:
 931       gcc_unreachable ();
 932     }
 933
 934   if (!TREE_STATIC (var))
 935     {
 936       TREE_STATIC (var) = 1;
 937       const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
 938       SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
 939       varpool_node::finalize_decl (var);
 940     }
 941
 942 }
 943
 944 /* Copy leading register-type assignments to local variables in SRC to just
 945    before DST, Creating temporaries, adjusting mapping of operands in WI and
 946    remapping operands as necessary.  Add any new temporaries to TGT_BIND.
 947    Return the first statement that does not conform to grid_safe_assignment_p
 948    or NULL.  If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
 949    variables in traversed bind statements so that they are put into the
 950    appropriate segment.  */
 951
 952 static gimple *
 953 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
 954                                      gbind *tgt_bind,
 955                                      enum grid_var_segment var_segment,
 956                                      struct walk_stmt_info *wi)
 957 {
 958   hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
 959   gimple_stmt_iterator gsi;
 960   for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
 961     {
 962       gimple *stmt = gsi_stmt (gsi);
 963       if (gbind *bind = dyn_cast <gbind *> (stmt))
 964         {
 965           gimple *r = grid_copy_leading_local_assignments
 966             (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
 967
 968           if (var_segment != GRID_SEGMENT_PRIVATE)
 969             for (tree var = gimple_bind_vars (bind);
 970                  var;
 971                  var = DECL_CHAIN (var))
 972               grid_mark_variable_segment (var, var_segment);
 973           if (r)
 974             return r;
 975           else
 976             continue;
 977         }
 978       if (!grid_safe_assignment_p (stmt, NULL))
 979         return stmt;
 980       tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
 981       tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
 982                                  TREE_TYPE (lhs));
 983       DECL_CONTEXT (repl) = current_function_decl;
 984       gimple_bind_append_vars (tgt_bind, repl);
 985
 986       declmap->put (lhs, repl);
 987       gassign *copy = as_a <gassign *> (gimple_copy (stmt));
 988       walk_gimple_op (copy, grid_remap_prebody_decls, wi);
 989       gsi_insert_before (dst, copy, GSI_SAME_STMT);
 990     }
 991   return NULL;
 992 }
 993
 994 /* Statement walker function to make adjustments to statements within the
 995    gridifed kernel copy.  */
 996
 997 static tree
 998 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
 999                         struct walk_stmt_info *)
1000 {
1001   *handled_ops_p = false;
1002   gimple *stmt = gsi_stmt (*gsi);
1003   if (gimple_code (stmt) == GIMPLE_OMP_FOR
1004       && (gimple_omp_for_kind (stmt) & GF_OMP_FOR_SIMD))
1005   {
1006     gomp_for *loop = as_a <gomp_for *> (stmt);
1007     tree clauses = gimple_omp_for_clauses (loop);
1008     tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
1009     if (cl)
1010       OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
1011     else
1012       {
1013         tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
1014         OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
1015         OMP_CLAUSE_CHAIN (c) = clauses;
1016         gimple_omp_for_set_clauses (loop, c);
1017       }
1018   }
1019   return NULL_TREE;
1020 }
1021
1022 /* Given a PARLOOP that is a normal for looping construct but also a part of a
1023    combined construct with a simd loop, eliminate the simd loop.  */
1024
1025 static void
1026 grid_eliminate_combined_simd_part (gomp_for *parloop)
1027 {
1028   struct walk_stmt_info wi;
1029
1030   memset (&wi, 0, sizeof (wi));
1031   wi.val_only = true;
1032   enum gf_mask msk = GF_OMP_FOR_SIMD;
1033   wi.info = (void *) &msk;
1034   walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
1035   gimple *stmt = (gimple *) wi.info;
1036   /* We expect that the SIMD id the only statement in the parallel loop.  */
1037   gcc_assert (stmt
1038               && gimple_code (stmt) == GIMPLE_OMP_FOR
1039               && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_SIMD)
1040               && gimple_omp_for_combined_into_p (stmt)
1041               && !gimple_omp_for_combined_p (stmt));
1042   gomp_for *simd = as_a <gomp_for *> (stmt);
1043
1044   /* Copy over the iteration properties because the body refers to the index in
1045      the bottmom-most loop.  */
1046   unsigned i, collapse = gimple_omp_for_collapse (parloop);
1047   gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
1048   for (i = 0; i < collapse; i++)
1049     {
1050       gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
1051       gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
1052       gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
1053       gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
1054     }
1055
1056   tree *tgt= gimple_omp_for_clauses_ptr (parloop);
1057   while (*tgt)
1058     tgt = &OMP_CLAUSE_CHAIN (*tgt);
1059
1060   /* Copy over all clauses, except for linear clauses, which are turned into
1061      private clauses, and all other simd-specific clauses, which are
1062      ignored.  */
1063   tree *pc = gimple_omp_for_clauses_ptr (simd);
1064   while (*pc)
1065     {
1066       tree c = *pc;
1067       switch (TREE_CODE (c))
1068         {
1069         case OMP_CLAUSE_LINEAR:
1070           {
1071             tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
1072             OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
1073             OMP_CLAUSE_CHAIN (priv) = NULL;
1074             *tgt = priv;
1075             tgt = &OMP_CLAUSE_CHAIN (priv);
1076             pc = &OMP_CLAUSE_CHAIN (c);
1077             break;
1078           }
1079
1080         case OMP_CLAUSE_SAFELEN:
1081         case OMP_CLAUSE_SIMDLEN:
1082         case OMP_CLAUSE_ALIGNED:
1083           pc = &OMP_CLAUSE_CHAIN (c);
1084           break;
1085
1086         default:
1087           *pc = OMP_CLAUSE_CHAIN (c);
1088           OMP_CLAUSE_CHAIN (c) = NULL;
1089           *tgt = c;
1090           tgt = &OMP_CLAUSE_CHAIN (c);
1091           break;
1092         }
1093     }
1094
1095   /* Finally, throw away the simd and mark the parallel loop as not
1096      combined.  */
1097   gimple_omp_set_body (parloop, gimple_omp_body (simd));
1098   gimple_omp_for_set_combined_p (parloop, false);
1099 }
1100
1101 /* Statement walker function marking all parallels as grid_phony and loops as
1102    grid ones representing threads of a particular thread group.  */
1103
1104 static tree
1105 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
1106                         struct walk_stmt_info *wi_in)
1107 {
1108   *handled_ops_p = false;
1109   if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
1110     {
1111       *handled_ops_p = true;
1112       gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
1113       gimple_omp_for_set_grid_intra_group (loop, true);
1114       if (gimple_omp_for_combined_p (loop))
1115         grid_eliminate_combined_simd_part (loop);
1116
1117       struct walk_stmt_info body_wi;
1118       memset (&body_wi, 0, sizeof (body_wi));
1119       walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
1120                            grid_process_grid_body, NULL, &body_wi);
1121
1122       gbind *bind = (gbind *) wi_in->info;
1123       tree c;
1124       for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
1125         if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
1126           {
1127             push_gimplify_context ();
1128             tree ov = OMP_CLAUSE_DECL (c);
1129             tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
1130                                     TREE_TYPE (ov));
1131
1132             grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
1133             DECL_CONTEXT (gv) = current_function_decl;
1134             gimple_bind_append_vars (bind, gv);
1135             tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
1136             gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
1137             x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
1138             gimple_seq l = NULL;
1139             gimplify_and_add (x, &l);
1140             gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
1141             pop_gimplify_context (bind);
1142           }
1143     }
1144   return NULL_TREE;
1145 }
1146
1147 /* Statement walker function marking all parallels as grid_phony and loops as
1148    grid ones representing threads of a particular thread group.  */
1149
1150 static tree
1151 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
1152                                       bool *handled_ops_p,
1153                                       struct walk_stmt_info *wi_in)
1154 {
1155   *handled_ops_p = false;
1156   wi_in->removed_stmt = false;
1157   gimple *stmt = gsi_stmt (*gsi);
1158   if (gbind *bind = dyn_cast <gbind *> (stmt))
1159     {
1160       for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
1161         grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
1162     }
1163   else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
1164     {
1165       *handled_ops_p = true;
1166       gimple_omp_parallel_set_grid_phony (parallel, true);
1167
1168       gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
1169       gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
1170       gimple_seq s = NULL;
1171       gimple_seq_add_stmt (&s, new_bind);
1172       gimple_omp_set_body (parallel, s);
1173
1174       struct walk_stmt_info wi_par;
1175       memset (&wi_par, 0, sizeof (wi_par));
1176       wi_par.info = new_bind;
1177       walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
1178                            grid_mark_tiling_loops, NULL, &wi_par);
1179     }
1180   else if (is_a <gcall *> (stmt))
1181     wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
1182   return NULL_TREE;
1183 }
1184
1185 /* Given freshly copied top level kernel SEQ, identify the individual OMP
1186    components, mark them as part of kernel, copy assignment leading to them
1187    just before DST, remapping them using WI and adding new temporaries to
1188    TGT_BIND, and and return the loop that will be used for kernel dispatch.  */
1189
1190 static gomp_for *
1191 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
1192                                gimple_stmt_iterator *dst,
1193                                gbind *tgt_bind, struct walk_stmt_info *wi)
1194 {
1195   gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
1196                                                       GRID_SEGMENT_GLOBAL, wi);
1197   gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
1198   gcc_assert (teams);
1199   gimple_omp_teams_set_grid_phony (teams, true);
1200   stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
1201                                               tgt_bind, GRID_SEGMENT_GLOBAL,
1202                                               wi);
1203   gcc_checking_assert (stmt);
1204   gomp_for *dist = dyn_cast <gomp_for *> (stmt);
1205   gcc_assert (dist);
1206   gimple_seq prebody = gimple_omp_for_pre_body (dist);
1207   if (prebody)
1208     grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1209                                          GRID_SEGMENT_GROUP, wi);
1210
1211   if (grid->tiling)
1212     {
1213       gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
1214       gimple_omp_for_set_grid_group_iter (dist, true);
1215
1216       struct walk_stmt_info wi_tiled;
1217       memset (&wi_tiled, 0, sizeof (wi_tiled));
1218       walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
1219                            grid_mark_tiling_parallels_and_loops, NULL,
1220                            &wi_tiled);
1221       return dist;
1222     }
1223   else
1224     {
1225       gimple_omp_for_set_grid_phony (dist, true);
1226       stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
1227                                                   tgt_bind,
1228                                                   GRID_SEGMENT_PRIVATE, wi);
1229       gcc_checking_assert (stmt);
1230       gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
1231       gimple_omp_parallel_set_grid_phony (parallel, true);
1232       stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
1233                                                   dst, tgt_bind,
1234                                                   GRID_SEGMENT_PRIVATE, wi);
1235       gomp_for *inner_loop = as_a <gomp_for *> (stmt);
1236       gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
1237       prebody = gimple_omp_for_pre_body (inner_loop);
1238       if (prebody)
1239         grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
1240                                              GRID_SEGMENT_PRIVATE, wi);
1241
1242       if (gimple_omp_for_combined_p (inner_loop))
1243         grid_eliminate_combined_simd_part (inner_loop);
1244       struct walk_stmt_info body_wi;
1245       memset (&body_wi, 0, sizeof (body_wi));
1246       walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
1247                            grid_process_grid_body, NULL, &body_wi);
1248
1249       return inner_loop;
1250     }
1251 }
1252
1253 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
1254    create a GPU kernel for it.  GSI must point to the same statement, TGT_BIND
1255    is the bind into which temporaries inserted before TARGET should be
1256    added.  */
1257
1258 static void
1259 grid_attempt_target_gridification (gomp_target *target,
1260                                    gimple_stmt_iterator *gsi,
1261                                    gbind *tgt_bind)
1262 {
1263   /* removed group_size */
1264   grid_prop grid = {};
1265   if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
1266     return;
1267
1268   location_t loc = gimple_location (target);
1269   if (dump_enabled_p ())
1270     dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
1271                      "Target construct will be turned into a gridified HSA "
1272                      "kernel\n");
1273
1274   /* Copy target body to a GPUKERNEL construct:  */
1275   gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
1276     (gimple_omp_body (target));
1277
1278   hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
1279   struct walk_stmt_info wi;
1280   memset (&wi, 0, sizeof (struct walk_stmt_info));
1281   wi.info = declmap;
1282
1283   /* Copy assignments in between OMP statements before target, mark OMP
1284      statements within copy appropriately.  */
1285   gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
1286                                                         tgt_bind, &wi);
1287
1288   gbind *old_bind
1289     = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
1290   gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
1291   tree new_block = gimple_bind_block (new_bind);
1292   tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
1293   BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
1294   BLOCK_SUBBLOCKS (enc_block) = new_block;
1295   BLOCK_SUPERCONTEXT (new_block) = enc_block;
1296   gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
1297   gimple_seq_add_stmt
1298     (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
1299      gpukernel);
1300
1301   for (size_t i = 0; i < grid.collapse; i++)
1302     walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
1303   push_gimplify_context ();
1304   for (size_t i = 0; i < grid.collapse; i++)
1305     {
1306       tree index_var = gimple_omp_for_index (inner_loop, i);
1307       tree itype, type = TREE_TYPE (index_var);
1308       if (POINTER_TYPE_P (type))
1309         itype = signed_type_for (type);
1310       else
1311         itype = type;
1312
1313       enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
1314       tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
1315       walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
1316       tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
1317       walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
1318       tree step
1319         = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
1320       omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
1321       n1 = fold_convert (itype, n1);
1322       n2 = fold_convert (itype, n2);
1323
1324       tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
1325
1326       tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
1327       t = fold_build2 (PLUS_EXPR, itype, step, t);
1328       t = fold_build2 (PLUS_EXPR, itype, t, n2);
1329       t = fold_build2 (MINUS_EXPR, itype, t, n1);
1330       if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
1331         t = fold_build2 (TRUNC_DIV_EXPR, itype,
1332                          fold_build1 (NEGATE_EXPR, itype, t),
1333                          fold_build1 (NEGATE_EXPR, itype, step));
1334       else
1335         t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
1336       t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
1337       if (grid.tiling)
1338         {
1339           if (cond_code == GT_EXPR)
1340             step = fold_build1 (NEGATE_EXPR, itype, step);
1341           t = fold_build2 (MULT_EXPR, itype, t, step);
1342         }
1343
1344       tree gs = fold_convert (uint32_type_node, t);
1345       gimple_seq tmpseq = NULL;
1346       gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1347       if (!gimple_seq_empty_p (tmpseq))
1348         gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1349
1350       tree ws;
1351       if (grid.group_sizes[i])
1352         {
1353           ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
1354           tmpseq = NULL;
1355           gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
1356           if (!gimple_seq_empty_p (tmpseq))
1357             gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
1358         }
1359       else
1360         ws = build_zero_cst (uint32_type_node);
1361
1362       tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
1363       OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
1364       OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
1365       OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
1366       OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
1367       gimple_omp_target_set_clauses (target, c);
1368     }
1369   pop_gimplify_context (tgt_bind);
1370   delete declmap;
1371   return;
1372 }
1373
1374 /* Walker function doing all the work for create_target_kernels.  */
1375
1376 static tree
1377 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
1378                                    bool *handled_ops_p,
1379                                    struct walk_stmt_info *incoming)
1380 {
1381   *handled_ops_p = false;
1382
1383   gimple *stmt = gsi_stmt (*gsi);
1384   gomp_target *target = dyn_cast <gomp_target *> (stmt);
1385   if (target)
1386     {
1387       gbind *tgt_bind = (gbind *) incoming->info;
1388       gcc_checking_assert (tgt_bind);
1389       grid_attempt_target_gridification (target, gsi, tgt_bind);
1390       return NULL_TREE;
1391     }
1392   gbind *bind = dyn_cast <gbind *> (stmt);
1393   if (bind)
1394     {
1395       *handled_ops_p = true;
1396       struct walk_stmt_info wi;
1397       memset (&wi, 0, sizeof (wi));
1398       wi.info = bind;
1399       walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
1400                            grid_gridify_all_targets_stmt, NULL, &wi);
1401     }
1402   return NULL_TREE;
1403 }
1404
1405 /* Attempt to gridify all target constructs in BODY_P.  All such targets will
1406    have their bodies duplicated, with the new copy being put into a
1407    gimple_omp_grid_body statement.  All kernel-related construct within the
1408    grid_body will be marked with phony flags or kernel kinds.  Moreover, some
1409    re-structuring is often needed, such as copying pre-bodies before the target
1410    construct so that kernel grid sizes can be computed.  */
1411
1412 void
1413 omp_grid_gridify_all_targets (gimple_seq *body_p)
1414 {
1415   struct walk_stmt_info wi;
1416   memset (&wi, 0, sizeof (wi));
1417   walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
1418 }