]> git.ipfire.org Git - thirdparty/gcc.git/blobdiff - gcc/omp-expand.c
Correct a function pre/postcondition [PR102403].
[thirdparty/gcc.git] / gcc / omp-expand.c
index 2361520e60b73765c0c4ed0564b9159744d4750c..159ae0e1647baf318e85366069f3b119a1872f38 100644 (file)
@@ -2,7 +2,7 @@
    directives to separate functions, converts others into explicit calls to the
    runtime library (libgomp) and so forth
 
-Copyright (C) 2005-2018 Free Software Foundation, Inc.
+Copyright (C) 2005-2021 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -52,13 +52,13 @@ along with GCC; see the file COPYING3.  If not see
 #include "omp-general.h"
 #include "omp-offload.h"
 #include "tree-cfgcleanup.h"
+#include "alloc-pool.h"
 #include "symbol-summary.h"
 #include "gomp-constants.h"
 #include "gimple-pretty-print.h"
-#include "hsa-common.h"
-#include "debug.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "tree-eh.h"
 
 /* OMP region information.  Every parallel and workshare
    directive is enclosed between two markers, the OMP_* directive
@@ -101,6 +101,9 @@ struct omp_region
   /* True if this is a combined parallel+workshare region.  */
   bool is_combined_parallel;
 
+  /* Copy of fd.lastprivate_conditional != 0.  */
+  bool has_lastprivate_conditional;
+
   /* The ordered stmt if type is GIMPLE_OMP_ORDERED and it has
      a depend clause.  */
   gomp_ordered *ord_stmt;
@@ -346,12 +349,16 @@ determine_parallel_type (struct omp_region *region)
              || ((OMP_CLAUSE_SCHEDULE_KIND (c) & OMP_CLAUSE_SCHEDULE_MASK)
                  == OMP_CLAUSE_SCHEDULE_STATIC)
              || omp_find_clause (clauses, OMP_CLAUSE_ORDERED)
-             || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_))
+             || omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_)
+             || ((c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_))
+                 && POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (c)))))
            return;
        }
       else if (region->inner->type == GIMPLE_OMP_SECTIONS
-              && omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
-                                  OMP_CLAUSE__REDUCTEMP_))
+              && (omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
+                                   OMP_CLAUSE__REDUCTEMP_)
+                  || omp_find_clause (gimple_omp_sections_clauses (ws_stmt),
+                                      OMP_CLAUSE__CONDTEMP_)))
        return;
 
       region->is_combined_parallel = true;
@@ -477,37 +484,6 @@ gimple_build_cond_empty (tree cond)
   return gimple_build_cond (pred_code, lhs, rhs, NULL_TREE, NULL_TREE);
 }
 
-/* Return true if a parallel REGION is within a declare target function or
-   within a target region and is not a part of a gridified target.  */
-
-static bool
-parallel_needs_hsa_kernel_p (struct omp_region *region)
-{
-  bool indirect = false;
-  for (region = region->outer; region; region = region->outer)
-    {
-      if (region->type == GIMPLE_OMP_PARALLEL)
-       indirect = true;
-      else if (region->type == GIMPLE_OMP_TARGET)
-       {
-         gomp_target *tgt_stmt
-           = as_a <gomp_target *> (last_stmt (region->entry));
-
-         if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
-                              OMP_CLAUSE__GRIDDIM_))
-           return indirect;
-         else
-           return true;
-       }
-    }
-
-  if (lookup_attribute ("omp declare target",
-                       DECL_ATTRIBUTES (current_function_decl)))
-    return true;
-
-  return false;
-}
-
 /* Change DECL_CONTEXT of CHILD_FNDECL to that of the parent function.
    Add CHILD_FNDECL to decl chain of the supercontext of the block
    ENTRY_BLOCK - this is the block which originally contained the
@@ -517,27 +493,44 @@ parallel_needs_hsa_kernel_p (struct omp_region *region)
    function will be emitted with the correct lexical scope.  */
 
 static void
-adjust_context_and_scope (tree entry_block, tree child_fndecl)
+adjust_context_and_scope (struct omp_region *region, tree entry_block,
+                         tree child_fndecl)
 {
+  tree parent_fndecl = NULL_TREE;
+  gimple *entry_stmt;
+  /* OMP expansion expands inner regions before outer ones, so if
+     we e.g. have explicit task region nested in parallel region, when
+     expanding the task region current_function_decl will be the original
+     source function, but we actually want to use as context the child
+     function of the parallel.  */
+  for (region = region->outer;
+       region && parent_fndecl == NULL_TREE; region = region->outer)
+    switch (region->type)
+      {
+      case GIMPLE_OMP_PARALLEL:
+      case GIMPLE_OMP_TASK:
+      case GIMPLE_OMP_TEAMS:
+       entry_stmt = last_stmt (region->entry);
+       parent_fndecl = gimple_omp_taskreg_child_fn (entry_stmt);
+       break;
+      case GIMPLE_OMP_TARGET:
+       entry_stmt = last_stmt (region->entry);
+       parent_fndecl
+         = gimple_omp_target_child_fn (as_a <gomp_target *> (entry_stmt));
+       break;
+      default:
+       break;
+      }
+
+  if (parent_fndecl == NULL_TREE)
+    parent_fndecl = current_function_decl;
+  DECL_CONTEXT (child_fndecl) = parent_fndecl;
+
   if (entry_block != NULL_TREE && TREE_CODE (entry_block) == BLOCK)
     {
       tree b = BLOCK_SUPERCONTEXT (entry_block);
-
       if (TREE_CODE (b) == BLOCK)
         {
-         tree parent_fndecl;
-
-         /* Follow supercontext chain until the parent fndecl
-            is found.  */
-         for (parent_fndecl = BLOCK_SUPERCONTEXT (b);
-              TREE_CODE (parent_fndecl) == BLOCK;
-              parent_fndecl = BLOCK_SUPERCONTEXT (parent_fndecl))
-           ;
-
-         gcc_assert (TREE_CODE (parent_fndecl) == FUNCTION_DECL);
-
-         DECL_CONTEXT (child_fndecl) = parent_fndecl;
-
          DECL_CHAIN (child_fndecl) = BLOCK_VARS (b);
          BLOCK_VARS (b) = child_fndecl;
        }
@@ -581,8 +574,12 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
          switch (region->inner->sched_kind)
            {
            case OMP_CLAUSE_SCHEDULE_RUNTIME:
-             if ((region->inner->sched_modifiers
-                  & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
+             /* For lastprivate(conditional:), our implementation
+                requires monotonic behavior.  */
+             if (region->inner->has_lastprivate_conditional != 0)
+               start_ix2 = 3;
+             else if ((region->inner->sched_modifiers
+                      & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
                start_ix2 = 6;
              else if ((region->inner->sched_modifiers
                        & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
@@ -593,7 +590,8 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
            case OMP_CLAUSE_SCHEDULE_DYNAMIC:
            case OMP_CLAUSE_SCHEDULE_GUIDED:
              if ((region->inner->sched_modifiers
-                  & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0)
+                  & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
+                 && !region->inner->has_lastprivate_conditional)
                {
                  start_ix2 = 3 + region->inner->sched_kind;
                  break;
@@ -723,8 +721,6 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
   tree child_fndecl = gimple_omp_parallel_child_fn (entry_stmt);
   t2 = build_fold_addr_expr (child_fndecl);
 
-  adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
-
   vec_alloc (args, 4 + vec_safe_length (ws_args));
   args->quick_push (t2);
   args->quick_push (t1);
@@ -745,13 +741,6 @@ expand_parallel_call (struct omp_region *region, basic_block bb,
     }
   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                            false, GSI_CONTINUE_LINKING);
-
-  if (hsa_gen_requested_p ()
-      && parallel_needs_hsa_kernel_p (region))
-    {
-      cgraph_node *child_cnode = cgraph_node::get (child_fndecl);
-      hsa_register_kernel (child_cnode);
-    }
 }
 
 /* Build the function call to GOMP_task to actually
@@ -773,6 +762,7 @@ expand_task_call (struct omp_region *region, basic_block bb,
   tree depend = omp_find_clause (clauses, OMP_CLAUSE_DEPEND);
   tree finalc = omp_find_clause (clauses, OMP_CLAUSE_FINAL);
   tree priority = omp_find_clause (clauses, OMP_CLAUSE_PRIORITY);
+  tree detach = omp_find_clause (clauses, OMP_CLAUSE_DETACH);
 
   unsigned int iflags
     = (untied ? GOMP_TASK_FLAG_UNTIED : 0)
@@ -801,13 +791,19 @@ expand_task_call (struct omp_region *region, basic_block bb,
       tree tclauses = gimple_omp_for_clauses (g);
       num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
       if (num_tasks)
-       num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
+       {
+         if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
+           iflags |= GOMP_TASK_FLAG_STRICT;
+         num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
+       }
       else
        {
          num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
          if (num_tasks)
            {
              iflags |= GOMP_TASK_FLAG_GRAINSIZE;
+             if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
+               iflags |= GOMP_TASK_FLAG_STRICT;
              num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
            }
          else
@@ -822,8 +818,13 @@ expand_task_call (struct omp_region *region, basic_block bb,
       if (omp_find_clause (clauses, OMP_CLAUSE_REDUCTION))
        iflags |= GOMP_TASK_FLAG_REDUCTION;
     }
-  else if (priority)
-    iflags |= GOMP_TASK_FLAG_PRIORITY;
+  else
+    {
+      if (priority)
+       iflags |= GOMP_TASK_FLAG_PRIORITY;
+      if (detach)
+       iflags |= GOMP_TASK_FLAG_DETACH;
+    }
 
   tree flags = build_int_cst (unsigned_type_node, iflags);
 
@@ -864,6 +865,11 @@ expand_task_call (struct omp_region *region, basic_block bb,
     priority = integer_zero_node;
 
   gsi = gsi_last_nondebug_bb (bb);
+
+  detach = (detach
+           ? build_fold_addr_expr (OMP_CLAUSE_DECL (detach))
+           : null_pointer_node);
+
   tree t = gimple_omp_task_data_arg (entry_stmt);
   if (t == NULL)
     t2 = null_pointer_node;
@@ -886,10 +892,10 @@ expand_task_call (struct omp_region *region, basic_block bb,
                         num_tasks, priority, startvar, endvar, step);
   else
     t = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_TASK),
-                        9, t1, t2, t3,
+                        10, t1, t2, t3,
                         gimple_omp_task_arg_size (entry_stmt),
                         gimple_omp_task_arg_align (entry_stmt), cond, flags,
-                        depend, priority);
+                        depend, priority, detach);
 
   force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                            false, GSI_CONTINUE_LINKING);
@@ -952,8 +958,6 @@ expand_teams_call (basic_block bb, gomp_teams *entry_stmt)
   tree child_fndecl = gimple_omp_teams_child_fn (entry_stmt);
   tree t2 = build_fold_addr_expr (child_fndecl);
 
-  adjust_context_and_scope (gimple_block (entry_stmt), child_fndecl);
-
   vec<tree, va_gc> *args;
   vec_alloc (args, 5);
   args->quick_push (t2);
@@ -1412,11 +1416,6 @@ expand_omp_taskreg (struct omp_region *region)
       else
        block = gimple_block (entry_stmt);
 
-      /* Make sure to generate early debug for the function before
-         outlining anything.  */
-      if (! gimple_in_ssa_p (cfun))
-       (*debug_hooks->early_global_decl) (cfun->decl);
-
       new_bb = move_sese_region_to_fn (child_cfun, entry_bb, exit_bb, block);
       if (exit_bb)
        single_succ_edge (new_bb)->flags = EDGE_FALLTHRU;
@@ -1497,6 +1496,8 @@ expand_omp_taskreg (struct omp_region *region)
        }
     }
 
+  adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
+
   if (gimple_code (entry_stmt) == GIMPLE_OMP_PARALLEL)
     expand_parallel_call (region, new_bb,
                          as_a <gomp_parallel *> (entry_stmt), ws_args);
@@ -1526,8 +1527,8 @@ struct oacc_collapse
 static tree
 expand_oacc_collapse_init (const struct omp_for_data *fd,
                           gimple_stmt_iterator *gsi,
-                          oacc_collapse *counts, tree bound_type,
-                          location_t loc)
+                          oacc_collapse *counts, tree diff_type,
+                          tree bound_type, location_t loc)
 {
   tree tiling = fd->tiling;
   tree total = build_int_cst (bound_type, 1);
@@ -1544,17 +1545,12 @@ expand_oacc_collapse_init (const struct omp_for_data *fd,
       const omp_for_data_loop *loop = &fd->loops[ix];
 
       tree iter_type = TREE_TYPE (loop->v);
-      tree diff_type = iter_type;
       tree plus_type = iter_type;
 
-      gcc_assert (loop->cond_code == fd->loop.cond_code);
+      gcc_assert (loop->cond_code == LT_EXPR || loop->cond_code == GT_EXPR);
 
       if (POINTER_TYPE_P (iter_type))
        plus_type = sizetype;
-      if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
-       diff_type = signed_type_for (diff_type);
-      if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
-       diff_type = integer_type_node;
 
       if (tiling)
        {
@@ -1642,7 +1638,8 @@ expand_oacc_collapse_init (const struct omp_for_data *fd,
 static void
 expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
                           gimple_stmt_iterator *gsi,
-                          const oacc_collapse *counts, tree ivar)
+                          const oacc_collapse *counts, tree ivar,
+                          tree diff_type)
 {
   tree ivar_type = TREE_TYPE (ivar);
 
@@ -1654,7 +1651,6 @@ expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
       const oacc_collapse *collapse = &counts[ix];
       tree v = inner ? loop->v : collapse->outer;
       tree iter_type = TREE_TYPE (v);
-      tree diff_type = TREE_TYPE (collapse->step);
       tree plus_type = iter_type;
       enum tree_code plus_code = PLUS_EXPR;
       tree expr;
@@ -1676,7 +1672,7 @@ expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
        }
 
       expr = fold_build2 (MULT_EXPR, diff_type, fold_convert (diff_type, expr),
-                         collapse->step);
+                         fold_convert (diff_type, collapse->step));
       expr = fold_build2 (plus_code, iter_type,
                          inner ? collapse->outer : collapse->base,
                          fold_convert (plus_type, expr));
@@ -1712,7 +1708,39 @@ expand_oacc_collapse_vars (const struct omp_for_data *fd, bool inner,
        count = 0;
    and set ZERO_ITER_BB to that bb.  If this isn't the outermost
    of the combined loop constructs, just initialize COUNTS array
-   from the _looptemp_ clauses.  */
+   from the _looptemp_ clauses.  For loop nests with non-rectangular
+   loops, do this only for the rectangular loops.  Then pick
+   the loops which reference outer vars in their bound expressions
+   and the loops which they refer to and for this sub-nest compute
+   number of iterations.  For triangular loops use Faulhaber's formula,
+   otherwise as a fallback, compute by iterating the loops.
+   If e.g. the sub-nest is
+       for (I = N11; I COND1 N12; I += STEP1)
+       for (J = M21 * I + N21; J COND2 M22 * I + N22; J += STEP2)
+       for (K = M31 * J + N31; K COND3 M32 * J + N32; K += STEP3)
+   do:
+       COUNT = 0;
+       for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
+       for (tmpj = M21 * tmpi + N21;
+            tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
+         {
+           int tmpk1 = M31 * tmpj + N31;
+           int tmpk2 = M32 * tmpj + N32;
+           if (tmpk1 COND3 tmpk2)
+             {
+               if (COND3 is <)
+                 adj = STEP3 - 1;
+               else
+                 adj = STEP3 + 1;
+               COUNT += (adj + tmpk2 - tmpk1) / STEP3;
+             }
+         }
+   and finally multiply the counts of the rectangular loops not
+   in the sub-nest with COUNT.  Also, as counts[fd->last_nonrect]
+   store number of iterations of the loops from fd->first_nonrect
+   to fd->last_nonrect inclusive, i.e. the above COUNT multiplied
+   by the counts of rectangular loops not referenced in any non-rectangular
+   loops sandwitched in between those.  */
 
 /* NOTE: It *could* be better to moosh all of the BBs together,
    creating one larger BB with all the computation and the unexpected
@@ -1774,6 +1802,23 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
          else
            counts[0] = NULL_TREE;
        }
+      if (fd->non_rect
+         && fd->last_nonrect == fd->first_nonrect + 1
+         && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+       {
+         tree c[4];
+         for (i = 0; i < 4; i++)
+           {
+             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+                                       OMP_CLAUSE__LOOPTEMP_);
+             gcc_assert (innerc);
+             c[i] = OMP_CLAUSE_DECL (innerc);
+           }
+         counts[0] = c[0];
+         fd->first_inner_iterations = c[1];
+         fd->factor = c[2];
+         fd->adjn1 = c[3];
+       }
       return;
     }
 
@@ -1791,12 +1836,23 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
          break;
        }
     }
+  bool rect_count_seen = false;
   for (i = 0; i < (fd->ordered ? fd->ordered : fd->collapse); i++)
     {
       tree itype = TREE_TYPE (fd->loops[i].v);
 
       if (i >= fd->collapse && counts[i])
        continue;
+      if (fd->non_rect)
+       {
+         /* Skip loops that use outer iterators in their expressions
+            during this phase.  */
+         if (fd->loops[i].m1 || fd->loops[i].m2)
+           {
+             counts[i] = build_zero_cst (type);
+             continue;
+           }
+       }
       if ((SSA_VAR_P (fd->loop.n2) || i >= fd->collapse)
          && ((t = fold_binary (fd->loops[i].cond_code, boolean_type_node,
                                fold_convert (itype, fd->loops[i].n1),
@@ -1892,13 +1948,455 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
        }
       if (SSA_VAR_P (fd->loop.n2) && i < fd->collapse)
        {
-         if (i == 0)
-           t = counts[0];
+         if (fd->non_rect && i >= fd->first_nonrect && i <= fd->last_nonrect)
+           continue;
+         if (!rect_count_seen)
+           {
+             t = counts[i];
+             rect_count_seen = true;
+           }
          else
            t = fold_build2 (MULT_EXPR, type, fd->loop.n2, counts[i]);
          expand_omp_build_assign (gsi, fd->loop.n2, t);
        }
     }
+  if (fd->non_rect && SSA_VAR_P (fd->loop.n2))
+    {
+      gcc_assert (fd->last_nonrect != -1);
+
+      counts[fd->last_nonrect] = create_tmp_reg (type, ".count");
+      expand_omp_build_assign (gsi, counts[fd->last_nonrect],
+                              build_zero_cst (type));
+      for (i = fd->first_nonrect + 1; i < fd->last_nonrect; i++)
+       if (fd->loops[i].m1
+           || fd->loops[i].m2
+           || fd->loops[i].non_rect_referenced)
+         break;
+      if (i == fd->last_nonrect
+         && fd->loops[i].outer == fd->last_nonrect - fd->first_nonrect
+         && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[i].v)))
+       {
+         int o = fd->first_nonrect;
+         tree itype = TREE_TYPE (fd->loops[o].v);
+         tree n1o = create_tmp_reg (itype, ".n1o");
+         t = fold_convert (itype, unshare_expr (fd->loops[o].n1));
+         expand_omp_build_assign (gsi, n1o, t);
+         tree n2o = create_tmp_reg (itype, ".n2o");
+         t = fold_convert (itype, unshare_expr (fd->loops[o].n2));
+         expand_omp_build_assign (gsi, n2o, t);
+         if (fd->loops[i].m1 && fd->loops[i].m2)
+           t = fold_build2 (MINUS_EXPR, itype, unshare_expr (fd->loops[i].m2),
+                            unshare_expr (fd->loops[i].m1));
+         else if (fd->loops[i].m1)
+           t = fold_unary (NEGATE_EXPR, itype,
+                           unshare_expr (fd->loops[i].m1));
+         else
+           t = unshare_expr (fd->loops[i].m2);
+         tree m2minusm1
+           = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
+                                       true, GSI_SAME_STMT);
+
+         gimple_stmt_iterator gsi2 = *gsi;
+         gsi_prev (&gsi2);
+         e = split_block (entry_bb, gsi_stmt (gsi2));
+         e = split_block (e->dest, (gimple *) NULL);
+         basic_block bb1 = e->src;
+         entry_bb = e->dest;
+         *gsi = gsi_after_labels (entry_bb);
+
+         gsi2 = gsi_after_labels (bb1);
+         tree ostep = fold_convert (itype, fd->loops[o].step);
+         t = build_int_cst (itype, (fd->loops[o].cond_code
+                                    == LT_EXPR ? -1 : 1));
+         t = fold_build2 (PLUS_EXPR, itype, ostep, t);
+         t = fold_build2 (PLUS_EXPR, itype, t, n2o);
+         t = fold_build2 (MINUS_EXPR, itype, t, n1o);
+         if (TYPE_UNSIGNED (itype)
+             && fd->loops[o].cond_code == GT_EXPR)
+           t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                            fold_build1 (NEGATE_EXPR, itype, t),
+                            fold_build1 (NEGATE_EXPR, itype, ostep));
+         else
+           t = fold_build2 (TRUNC_DIV_EXPR, itype, t, ostep);
+         tree outer_niters
+           = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                       true, GSI_SAME_STMT);
+         t = fold_build2 (MINUS_EXPR, itype, outer_niters,
+                          build_one_cst (itype));
+         t = fold_build2 (MULT_EXPR, itype, t, ostep);
+         t = fold_build2 (PLUS_EXPR, itype, n1o, t);
+         tree last = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                               true, GSI_SAME_STMT);
+         tree n1, n2, n1e, n2e;
+         t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+         if (fd->loops[i].m1)
+           {
+             n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
+             n1 = fold_build2 (MULT_EXPR, itype, n1o, n1);
+             n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
+           }
+         else
+           n1 = t;
+         n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
+                                        true, GSI_SAME_STMT);
+         t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+         if (fd->loops[i].m2)
+           {
+             n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+             n2 = fold_build2 (MULT_EXPR, itype, n1o, n2);
+             n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
+           }
+         else
+           n2 = t;
+         n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
+                                        true, GSI_SAME_STMT);
+         t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+         if (fd->loops[i].m1)
+           {
+             n1e = fold_convert (itype, unshare_expr (fd->loops[i].m1));
+             n1e = fold_build2 (MULT_EXPR, itype, last, n1e);
+             n1e = fold_build2 (PLUS_EXPR, itype, n1e, t);
+           }
+         else
+           n1e = t;
+         n1e = force_gimple_operand_gsi (&gsi2, n1e, true, NULL_TREE,
+                                         true, GSI_SAME_STMT);
+         t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+         if (fd->loops[i].m2)
+           {
+             n2e = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+             n2e = fold_build2 (MULT_EXPR, itype, last, n2e);
+             n2e = fold_build2 (PLUS_EXPR, itype, n2e, t);
+           }
+         else
+           n2e = t;
+         n2e = force_gimple_operand_gsi (&gsi2, n2e, true, NULL_TREE,
+                                         true, GSI_SAME_STMT);
+         gcond *cond_stmt
+           = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
+                                NULL_TREE, NULL_TREE);
+         gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+         e = split_block (bb1, cond_stmt);
+         e->flags = EDGE_TRUE_VALUE;
+         e->probability = profile_probability::likely ().guessed ();
+         basic_block bb2 = e->dest;
+         gsi2 = gsi_after_labels (bb2);
+
+         cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
+                                        NULL_TREE, NULL_TREE);
+         gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+         e = split_block (bb2, cond_stmt);
+         e->flags = EDGE_TRUE_VALUE;
+         e->probability = profile_probability::likely ().guessed ();
+         gsi2 = gsi_after_labels (e->dest);
+
+         tree step = fold_convert (itype, fd->loops[i].step);
+         t = build_int_cst (itype, (fd->loops[i].cond_code
+                                    == LT_EXPR ? -1 : 1));
+         t = fold_build2 (PLUS_EXPR, itype, step, t);
+         t = fold_build2 (PLUS_EXPR, itype, t, n2);
+         t = fold_build2 (MINUS_EXPR, itype, t, n1);
+         if (TYPE_UNSIGNED (itype)
+             && fd->loops[i].cond_code == GT_EXPR)
+           t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                            fold_build1 (NEGATE_EXPR, itype, t),
+                            fold_build1 (NEGATE_EXPR, itype, step));
+         else
+           t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+         tree first_inner_iterations
+           = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                       true, GSI_SAME_STMT);
+         t = fold_build2 (MULT_EXPR, itype, m2minusm1, ostep);
+         if (TYPE_UNSIGNED (itype)
+             && fd->loops[i].cond_code == GT_EXPR)
+           t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                            fold_build1 (NEGATE_EXPR, itype, t),
+                            fold_build1 (NEGATE_EXPR, itype, step));
+         else
+           t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+         tree factor
+           = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                       true, GSI_SAME_STMT);
+         t = fold_build2 (MINUS_EXPR, itype, outer_niters,
+                          build_one_cst (itype));
+         t = fold_build2 (MULT_EXPR, itype, t, outer_niters);
+         t = fold_build2 (RSHIFT_EXPR, itype, t, integer_one_node);
+         t = fold_build2 (MULT_EXPR, itype, factor, t);
+         t = fold_build2 (PLUS_EXPR, itype,
+                          fold_build2 (MULT_EXPR, itype, outer_niters,
+                                       first_inner_iterations), t);
+         expand_omp_build_assign (&gsi2, counts[fd->last_nonrect],
+                                  fold_convert (type, t));
+
+         basic_block bb3 = create_empty_bb (bb1);
+         add_bb_to_loop (bb3, bb1->loop_father);
+
+         e = make_edge (bb1, bb3, EDGE_FALSE_VALUE);
+         e->probability = profile_probability::unlikely ().guessed ();
+
+         gsi2 = gsi_after_labels (bb3);
+         cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1e, n2e,
+                                        NULL_TREE, NULL_TREE);
+         gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+         e = split_block (bb3, cond_stmt);
+         e->flags = EDGE_TRUE_VALUE;
+         e->probability = profile_probability::likely ().guessed ();
+         basic_block bb4 = e->dest;
+
+         ne = make_edge (bb3, entry_bb, EDGE_FALSE_VALUE);
+         ne->probability = e->probability.invert ();
+
+         basic_block bb5 = create_empty_bb (bb2);
+         add_bb_to_loop (bb5, bb2->loop_father);
+
+         ne = make_edge (bb2, bb5, EDGE_FALSE_VALUE);
+         ne->probability = profile_probability::unlikely ().guessed ();
+
+         for (int j = 0; j < 2; j++)
+           {
+             gsi2 = gsi_after_labels (j ? bb5 : bb4);
+             t = fold_build2 (MINUS_EXPR, itype,
+                              unshare_expr (fd->loops[i].n1),
+                              unshare_expr (fd->loops[i].n2));
+             t = fold_build2 (TRUNC_DIV_EXPR, itype, t, m2minusm1);
+             tree tem
+               = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                           true, GSI_SAME_STMT);
+             t = fold_build2 (MINUS_EXPR, itype, tem, n1o);
+             t = fold_build2 (TRUNC_MOD_EXPR, itype, t, ostep);
+             t = fold_build2 (MINUS_EXPR, itype, tem, t);
+             tem = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                             true, GSI_SAME_STMT);
+             t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+             if (fd->loops[i].m1)
+               {
+                 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
+                 n1 = fold_build2 (MULT_EXPR, itype, tem, n1);
+                 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
+               }
+             else
+               n1 = t;
+             n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+             if (fd->loops[i].m2)
+               {
+                 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+                 n2 = fold_build2 (MULT_EXPR, itype, tem, n2);
+                 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
+               }
+             else
+               n2 = t;
+             n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             expand_omp_build_assign (&gsi2, j ? n2o : n1o, tem);
+
+             cond_stmt = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
+                                            NULL_TREE, NULL_TREE);
+             gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+             e = split_block (gsi_bb (gsi2), cond_stmt);
+             e->flags = j ? EDGE_TRUE_VALUE : EDGE_FALSE_VALUE;
+             e->probability = profile_probability::unlikely ().guessed ();
+             ne = make_edge (e->src, bb1,
+                             j ? EDGE_FALSE_VALUE : EDGE_TRUE_VALUE);
+             ne->probability = e->probability.invert ();
+             gsi2 = gsi_after_labels (e->dest);
+
+             t = fold_build2 (PLUS_EXPR, itype, tem, ostep);
+             expand_omp_build_assign (&gsi2, j ? n2o : n1o, t);
+
+             make_edge (e->dest, bb1, EDGE_FALLTHRU);
+           }
+
+         set_immediate_dominator (CDI_DOMINATORS, bb3, bb1);
+         set_immediate_dominator (CDI_DOMINATORS, bb5, bb2);
+         set_immediate_dominator (CDI_DOMINATORS, entry_bb, bb1);
+
+         if (fd->first_nonrect + 1 == fd->last_nonrect)
+           {
+             fd->first_inner_iterations = first_inner_iterations;
+             fd->factor = factor;
+             fd->adjn1 = n1o;
+           }
+       }
+      else
+       {
+         /* Fallback implementation.  Evaluate the loops with m1/m2
+            non-NULL as well as their outer loops at runtime using temporaries
+            instead of the original iteration variables, and in the
+            body just bump the counter.  */
+         gimple_stmt_iterator gsi2 = *gsi;
+         gsi_prev (&gsi2);
+         e = split_block (entry_bb, gsi_stmt (gsi2));
+         e = split_block (e->dest, (gimple *) NULL);
+         basic_block cur_bb = e->src;
+         basic_block next_bb = e->dest;
+         entry_bb = e->dest;
+         *gsi = gsi_after_labels (entry_bb);
+
+         tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
+         memset (vs, 0, fd->last_nonrect * sizeof (tree));
+
+         for (i = 0; i <= fd->last_nonrect; i++)
+           {
+             if (fd->loops[i].m1 == NULL_TREE
+                 && fd->loops[i].m2 == NULL_TREE
+                 && !fd->loops[i].non_rect_referenced)
+               continue;
+
+             tree itype = TREE_TYPE (fd->loops[i].v);
+
+             gsi2 = gsi_after_labels (cur_bb);
+             tree n1, n2;
+             t = fold_convert (itype, unshare_expr (fd->loops[i].n1));
+             if (fd->loops[i].m1)
+               {
+                 n1 = fold_convert (itype, unshare_expr (fd->loops[i].m1));
+                 n1 = fold_build2 (MULT_EXPR, itype,
+                                   vs[i - fd->loops[i].outer], n1);
+                 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
+               }
+             else
+               n1 = t;
+             n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             if (i < fd->last_nonrect)
+               {
+                 vs[i] = create_tmp_reg (itype, ".it");
+                 expand_omp_build_assign (&gsi2, vs[i], n1);
+               }
+             t = fold_convert (itype, unshare_expr (fd->loops[i].n2));
+             if (fd->loops[i].m2)
+               {
+                 n2 = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+                 n2 = fold_build2 (MULT_EXPR, itype,
+                                   vs[i - fd->loops[i].outer], n2);
+                 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
+               }
+             else
+               n2 = t;
+             n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             if (i == fd->last_nonrect)
+               {
+                 gcond *cond_stmt
+                   = gimple_build_cond (fd->loops[i].cond_code, n1, n2,
+                                        NULL_TREE, NULL_TREE);
+                 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+                 e = split_block (cur_bb, cond_stmt);
+                 e->flags = EDGE_TRUE_VALUE;
+                 ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
+                 e->probability = profile_probability::likely ().guessed ();
+                 ne->probability = e->probability.invert ();
+                 gsi2 = gsi_after_labels (e->dest);
+
+                 t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
+                                            ? -1 : 1));
+                 t = fold_build2 (PLUS_EXPR, itype,
+                                  fold_convert (itype, fd->loops[i].step), t);
+                 t = fold_build2 (PLUS_EXPR, itype, t, n2);
+                 t = fold_build2 (MINUS_EXPR, itype, t, n1);
+                 tree step = fold_convert (itype, fd->loops[i].step);
+                 if (TYPE_UNSIGNED (itype)
+                     && fd->loops[i].cond_code == GT_EXPR)
+                   t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                                    fold_build1 (NEGATE_EXPR, itype, t),
+                                    fold_build1 (NEGATE_EXPR, itype, step));
+                 else
+                   t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+                 t = fold_convert (type, t);
+                 t = fold_build2 (PLUS_EXPR, type,
+                                  counts[fd->last_nonrect], t);
+                 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                               true, GSI_SAME_STMT);
+                 expand_omp_build_assign (&gsi2, counts[fd->last_nonrect], t);
+                 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
+                 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
+                 break;
+               }
+             e = split_block (cur_bb, last_stmt (cur_bb));
+
+             basic_block new_cur_bb = create_empty_bb (cur_bb);
+             add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
+
+             gsi2 = gsi_after_labels (e->dest);
+             tree step = fold_convert (itype,
+                                       unshare_expr (fd->loops[i].step));
+             t = fold_build2 (PLUS_EXPR, itype, vs[i], step);
+             t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                           true, GSI_SAME_STMT);
+             expand_omp_build_assign (&gsi2, vs[i], t);
+
+             ne = split_block (e->dest, last_stmt (e->dest));
+             gsi2 = gsi_after_labels (ne->dest);
+
+             gcond *cond_stmt
+               = gimple_build_cond (fd->loops[i].cond_code, vs[i], n2,
+                                    NULL_TREE, NULL_TREE);
+             gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+             edge e3, e4;
+             if (next_bb == entry_bb)
+               {
+                 e3 = find_edge (ne->dest, next_bb);
+                 e3->flags = EDGE_FALSE_VALUE;
+               }
+             else
+               e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
+             e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
+             e4->probability = profile_probability::likely ().guessed ();
+             e3->probability = e4->probability.invert ();
+             basic_block esrc = e->src;
+             make_edge (e->src, ne->dest, EDGE_FALLTHRU);
+             cur_bb = new_cur_bb;
+             basic_block latch_bb = next_bb;
+             next_bb = e->dest;
+             remove_edge (e);
+             set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
+             set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
+             set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
+           }
+       }
+      t = NULL_TREE;
+      for (i = fd->first_nonrect; i < fd->last_nonrect; i++)
+       if (!fd->loops[i].non_rect_referenced
+           && fd->loops[i].m1 == NULL_TREE
+           && fd->loops[i].m2 == NULL_TREE)
+         {
+           if (t == NULL_TREE)
+             t = counts[i];
+           else
+             t = fold_build2 (MULT_EXPR, type, t, counts[i]);
+         }
+      if (t)
+       {
+         t = fold_build2 (MULT_EXPR, type, counts[fd->last_nonrect], t);
+         expand_omp_build_assign (gsi, counts[fd->last_nonrect], t);
+       }
+      if (!rect_count_seen)
+       t = counts[fd->last_nonrect];
+      else
+       t = fold_build2 (MULT_EXPR, type, fd->loop.n2,
+                        counts[fd->last_nonrect]);
+      expand_omp_build_assign (gsi, fd->loop.n2, t);
+    }
+  else if (fd->non_rect)
+    {
+      tree t = fd->loop.n2;
+      gcc_assert (TREE_CODE (t) == INTEGER_CST);
+      int non_rect_referenced = 0, non_rect = 0;
+      for (i = 0; i < fd->collapse; i++)
+       {
+         if ((i < fd->first_nonrect || i > fd->last_nonrect)
+             && !integer_zerop (counts[i]))
+           t = fold_build2 (TRUNC_DIV_EXPR, type, t, counts[i]);
+         if (fd->loops[i].non_rect_referenced)
+           non_rect_referenced++;
+         if (fd->loops[i].m1 || fd->loops[i].m2)
+           non_rect++;
+       }
+      gcc_assert (non_rect == 1 && non_rect_referenced == 1);
+      counts[fd->last_nonrect] = t;
+    }
 }
 
 /* Helper function for expand_omp_{for_*,simd}.  Generate code like:
@@ -1911,11 +2409,43 @@ expand_omp_for_init_counts (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
    if this loop doesn't have an inner loop construct combined with it.
    If it does have an inner loop construct combined with it and the
    iteration count isn't known constant, store values from counts array
-   into its _looptemp_ temporaries instead.  */
+   into its _looptemp_ temporaries instead.
+   For non-rectangular loops (between fd->first_nonrect and fd->last_nonrect
+   inclusive), use the count of all those loops together, and either
+   find quadratic etc. equation roots, or as a fallback, do:
+       COUNT = 0;
+       for (tmpi = N11; tmpi COND1 N12; tmpi += STEP1)
+       for (tmpj = M21 * tmpi + N21;
+            tmpj COND2 M22 * tmpi + N22; tmpj += STEP2)
+         {
+           int tmpk1 = M31 * tmpj + N31;
+           int tmpk2 = M32 * tmpj + N32;
+           if (tmpk1 COND3 tmpk2)
+             {
+               if (COND3 is <)
+                 adj = STEP3 - 1;
+               else
+                 adj = STEP3 + 1;
+               int temp = (adj + tmpk2 - tmpk1) / STEP3;
+               if (COUNT + temp > T)
+                 {
+                   V1 = tmpi;
+                   V2 = tmpj;
+                   V3 = tmpk1 + (T - COUNT) * STEP3;
+                   goto done;
+                 }
+               else
+                 COUNT += temp;
+             }
+         }
+       done:;
+   but for optional innermost or outermost rectangular loops that aren't
+   referenced by other loop expressions keep doing the division/modulo.  */
 
 static void
 expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
-                         tree *counts, gimple *inner_stmt, tree startvar)
+                         tree *counts, tree *nonrect_bounds,
+                         gimple *inner_stmt, tree startvar)
 {
   int i;
   if (gimple_omp_for_combined_p (fd->for_stmt))
@@ -1933,7 +2463,12 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
         use it.  */
       tree innerc = omp_find_clause (clauses, OMP_CLAUSE__LOOPTEMP_);
       gcc_assert (innerc);
-      for (i = 0; i < fd->collapse; i++)
+      int count = 0;
+      if (fd->non_rect
+         && fd->last_nonrect == fd->first_nonrect + 1
+         && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+       count = 4;
+      for (i = 0; i < fd->collapse + count; i++)
        {
          innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
                                    OMP_CLAUSE__LOOPTEMP_);
@@ -1941,7 +2476,19 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
          if (i)
            {
              tree tem = OMP_CLAUSE_DECL (innerc);
-             tree t = fold_convert (TREE_TYPE (tem), counts[i]);
+             tree t;
+             if (i < fd->collapse)
+               t = counts[i];
+             else
+               switch (i - fd->collapse)
+                 {
+                 case 0: t = counts[0]; break;
+                 case 1: t = fd->first_inner_iterations; break;
+                 case 2: t = fd->factor; break;
+                 case 3: t = fd->adjn1; break;
+                 default: gcc_unreachable ();
+                 }
+             t = fold_convert (TREE_TYPE (tem), t);
              t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
                                            false, GSI_CONTINUE_LINKING);
              gassign *stmt = gimple_build_assign (tem, t);
@@ -1962,82 +2509,574 @@ expand_omp_for_init_vars (struct omp_for_data *fd, gimple_stmt_iterator *gsi,
       itype = vtype;
       if (POINTER_TYPE_P (vtype))
        itype = signed_type_for (vtype);
-      if (i != 0)
+      if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
        t = fold_build2 (TRUNC_MOD_EXPR, type, tem, counts[i]);
       else
        t = tem;
-      t = fold_convert (itype, t);
-      t = fold_build2 (MULT_EXPR, itype, t,
-                      fold_convert (itype, fd->loops[i].step));
-      if (POINTER_TYPE_P (vtype))
-       t = fold_build_pointer_plus (fd->loops[i].n1, t);
-      else
-       t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
-      t = force_gimple_operand_gsi (gsi, t,
-                                   DECL_P (fd->loops[i].v)
-                                   && TREE_ADDRESSABLE (fd->loops[i].v),
-                                   NULL_TREE, false,
-                                   GSI_CONTINUE_LINKING);
-      stmt = gimple_build_assign (fd->loops[i].v, t);
-      gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
-      if (i != 0)
+      if (i == fd->last_nonrect)
        {
-         t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
-         t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
+         t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
                                        false, GSI_CONTINUE_LINKING);
-         stmt = gimple_build_assign (tem, t);
-         gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
-       }
-    }
-}
-
-/* Helper function for expand_omp_for_*.  Generate code like:
-    L10:
-       V3 += STEP3;
-       if (V3 cond3 N32) goto BODY_BB; else goto L11;
-    L11:
-       V3 = N31;
-       V2 += STEP2;
-       if (V2 cond2 N22) goto BODY_BB; else goto L12;
-    L12:
-       V2 = N21;
-       V1 += STEP1;
-       goto BODY_BB;  */
-
-static basic_block
-extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
-                            basic_block body_bb)
-{
-  basic_block last_bb, bb, collapse_bb = NULL;
-  int i;
-  gimple_stmt_iterator gsi;
-  edge e;
-  tree t;
-  gimple *stmt;
-
-  last_bb = cont_bb;
-  for (i = fd->collapse - 1; i >= 0; i--)
-    {
-      tree vtype = TREE_TYPE (fd->loops[i].v);
+         tree stopval = t;
+         tree idx = create_tmp_reg (type, ".count");
+         expand_omp_build_assign (gsi, idx,
+                                  build_zero_cst (type), true);
+         basic_block bb_triang = NULL, bb_triang_dom = NULL;
+         if (fd->first_nonrect + 1 == fd->last_nonrect
+             && (TREE_CODE (fd->loop.n2) == INTEGER_CST
+                 || fd->first_inner_iterations)
+             && (optab_handler (sqrt_optab, TYPE_MODE (double_type_node))
+                 != CODE_FOR_nothing)
+             && !integer_zerop (fd->loop.n2))
+           {
+             tree outer_n1 = fd->adjn1 ? fd->adjn1 : fd->loops[i - 1].n1;
+             tree itype = TREE_TYPE (fd->loops[i].v);
+             tree first_inner_iterations = fd->first_inner_iterations;
+             tree factor = fd->factor;
+             gcond *cond_stmt
+               = gimple_build_cond (NE_EXPR, factor,
+                                    build_zero_cst (TREE_TYPE (factor)),
+                                    NULL_TREE, NULL_TREE);
+             gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+             edge e = split_block (gsi_bb (*gsi), cond_stmt);
+             basic_block bb0 = e->src;
+             e->flags = EDGE_TRUE_VALUE;
+             e->probability = profile_probability::likely ();
+             bb_triang_dom = bb0;
+             *gsi = gsi_after_labels (e->dest);
+             tree slltype = long_long_integer_type_node;
+             tree ulltype = long_long_unsigned_type_node;
+             tree stopvalull = fold_convert (ulltype, stopval);
+             stopvalull
+               = force_gimple_operand_gsi (gsi, stopvalull, true, NULL_TREE,
+                                           false, GSI_CONTINUE_LINKING);
+             first_inner_iterations
+               = fold_convert (slltype, first_inner_iterations);
+             first_inner_iterations
+               = force_gimple_operand_gsi (gsi, first_inner_iterations, true,
+                                           NULL_TREE, false,
+                                           GSI_CONTINUE_LINKING);
+             factor = fold_convert (slltype, factor);
+             factor
+               = force_gimple_operand_gsi (gsi, factor, true, NULL_TREE,
+                                           false, GSI_CONTINUE_LINKING);
+             tree first_inner_iterationsd
+               = fold_build1 (FLOAT_EXPR, double_type_node,
+                              first_inner_iterations);
+             first_inner_iterationsd
+               = force_gimple_operand_gsi (gsi, first_inner_iterationsd, true,
+                                           NULL_TREE, false,
+                                           GSI_CONTINUE_LINKING);
+             tree factord = fold_build1 (FLOAT_EXPR, double_type_node,
+                                         factor);
+             factord = force_gimple_operand_gsi (gsi, factord, true,
+                                                 NULL_TREE, false,
+                                                 GSI_CONTINUE_LINKING);
+             tree stopvald = fold_build1 (FLOAT_EXPR, double_type_node,
+                                          stopvalull);
+             stopvald = force_gimple_operand_gsi (gsi, stopvald, true,
+                                                  NULL_TREE, false,
+                                                  GSI_CONTINUE_LINKING);
+             /* Temporarily disable flag_rounding_math, values will be
+                decimal numbers divided by 2 and worst case imprecisions
+                due to too large values ought to be caught later by the
+                checks for fallback.  */
+             int save_flag_rounding_math = flag_rounding_math;
+             flag_rounding_math = 0;
+             t = fold_build2 (RDIV_EXPR, double_type_node, factord,
+                              build_real (double_type_node, dconst2));
+             tree t3 = fold_build2 (MINUS_EXPR, double_type_node,
+                                    first_inner_iterationsd, t);
+             t3 = force_gimple_operand_gsi (gsi, t3, true, NULL_TREE, false,
+                                            GSI_CONTINUE_LINKING);
+             t = fold_build2 (MULT_EXPR, double_type_node, factord,
+                              build_real (double_type_node, dconst2));
+             t = fold_build2 (MULT_EXPR, double_type_node, t, stopvald);
+             t = fold_build2 (PLUS_EXPR, double_type_node, t,
+                              fold_build2 (MULT_EXPR, double_type_node,
+                                           t3, t3));
+             flag_rounding_math = save_flag_rounding_math;
+             t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+                                           GSI_CONTINUE_LINKING);
+             if (flag_exceptions
+                 && cfun->can_throw_non_call_exceptions
+                 && operation_could_trap_p (LT_EXPR, true, false, NULL_TREE))
+               {
+                 tree tem = fold_build2 (LT_EXPR, boolean_type_node, t,
+                                         build_zero_cst (double_type_node));
+                 tem = force_gimple_operand_gsi (gsi, tem, true, NULL_TREE,
+                                                 false, GSI_CONTINUE_LINKING);
+                 cond_stmt = gimple_build_cond (NE_EXPR, tem,
+                                                boolean_false_node,
+                                                NULL_TREE, NULL_TREE);
+               }
+             else
+               cond_stmt
+                 = gimple_build_cond (LT_EXPR, t,
+                                      build_zero_cst (double_type_node),
+                                      NULL_TREE, NULL_TREE);
+             gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+             e = split_block (gsi_bb (*gsi), cond_stmt);
+             basic_block bb1 = e->src;
+             e->flags = EDGE_FALSE_VALUE;
+             e->probability = profile_probability::very_likely ();
+             *gsi = gsi_after_labels (e->dest);
+             gcall *call = gimple_build_call_internal (IFN_SQRT, 1, t);
+             tree sqrtr = create_tmp_var (double_type_node);
+             gimple_call_set_lhs (call, sqrtr);
+             gsi_insert_after (gsi, call, GSI_CONTINUE_LINKING);
+             t = fold_build2 (MINUS_EXPR, double_type_node, sqrtr, t3);
+             t = fold_build2 (RDIV_EXPR, double_type_node, t, factord);
+             t = fold_build1 (FIX_TRUNC_EXPR, ulltype, t);
+             tree c = create_tmp_var (ulltype);
+             tree d = create_tmp_var (ulltype);
+             expand_omp_build_assign (gsi, c, t, true);
+             t = fold_build2 (MINUS_EXPR, ulltype, c,
+                              build_one_cst (ulltype));
+             t = fold_build2 (MULT_EXPR, ulltype, c, t);
+             t = fold_build2 (RSHIFT_EXPR, ulltype, t, integer_one_node);
+             t = fold_build2 (MULT_EXPR, ulltype,
+                              fold_convert (ulltype, fd->factor), t);
+             tree t2
+               = fold_build2 (MULT_EXPR, ulltype, c,
+                              fold_convert (ulltype,
+                                            fd->first_inner_iterations));
+             t = fold_build2 (PLUS_EXPR, ulltype, t, t2);
+             expand_omp_build_assign (gsi, d, t, true);
+             t = fold_build2 (MULT_EXPR, ulltype,
+                              fold_convert (ulltype, fd->factor), c);
+             t = fold_build2 (PLUS_EXPR, ulltype,
+                              t, fold_convert (ulltype,
+                                               fd->first_inner_iterations));
+             t2 = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+                                            GSI_CONTINUE_LINKING);
+             cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, d,
+                                            NULL_TREE, NULL_TREE);
+             gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+             e = split_block (gsi_bb (*gsi), cond_stmt);
+             basic_block bb2 = e->src;
+             e->flags = EDGE_TRUE_VALUE;
+             e->probability = profile_probability::very_likely ();
+             *gsi = gsi_after_labels (e->dest);
+             t = fold_build2 (PLUS_EXPR, ulltype, d, t2);
+             t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+                                           GSI_CONTINUE_LINKING);
+             cond_stmt = gimple_build_cond (GE_EXPR, stopvalull, t,
+                                            NULL_TREE, NULL_TREE);
+             gsi_insert_after (gsi, cond_stmt, GSI_CONTINUE_LINKING);
+             e = split_block (gsi_bb (*gsi), cond_stmt);
+             basic_block bb3 = e->src;
+             e->flags = EDGE_FALSE_VALUE;
+             e->probability = profile_probability::very_likely ();
+             *gsi = gsi_after_labels (e->dest);
+             t = fold_convert (itype, c);
+             t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i - 1].step);
+             t = fold_build2 (PLUS_EXPR, itype, outer_n1, t);
+             t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, false,
+                                           GSI_CONTINUE_LINKING);
+             expand_omp_build_assign (gsi, fd->loops[i - 1].v, t, true);
+             t2 = fold_build2 (MINUS_EXPR, ulltype, stopvalull, d);
+             t2 = fold_convert (itype, t2);
+             t2 = fold_build2 (MULT_EXPR, itype, t2, fd->loops[i].step);
+             t2 = fold_build2 (PLUS_EXPR, itype, t2, fd->loops[i].n1);
+             if (fd->loops[i].m1)
+               {
+                 t = fold_build2 (MULT_EXPR, itype, t, fd->loops[i].m1);
+                 t2 = fold_build2 (PLUS_EXPR, itype, t2, t);
+               }
+             expand_omp_build_assign (gsi, fd->loops[i].v, t2, true);
+             e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
+             bb_triang = e->src;
+             *gsi = gsi_after_labels (e->dest);
+             remove_edge (e);
+             e = make_edge (bb1, gsi_bb (*gsi), EDGE_TRUE_VALUE);
+             e->probability = profile_probability::very_unlikely ();
+             e = make_edge (bb2, gsi_bb (*gsi), EDGE_FALSE_VALUE);
+             e->probability = profile_probability::very_unlikely ();
+             e = make_edge (bb3, gsi_bb (*gsi), EDGE_TRUE_VALUE);
+             e->probability = profile_probability::very_unlikely ();
+
+             basic_block bb4 = create_empty_bb (bb0);
+             add_bb_to_loop (bb4, bb0->loop_father);
+             e = make_edge (bb0, bb4, EDGE_FALSE_VALUE);
+             e->probability = profile_probability::unlikely ();
+             make_edge (bb4, gsi_bb (*gsi), EDGE_FALLTHRU);
+             set_immediate_dominator (CDI_DOMINATORS, bb4, bb0);
+             set_immediate_dominator (CDI_DOMINATORS, gsi_bb (*gsi), bb0);
+             gimple_stmt_iterator gsi2 = gsi_after_labels (bb4);
+             t2 = fold_build2 (TRUNC_DIV_EXPR, type,
+                               counts[i], counts[i - 1]);
+             t2 = force_gimple_operand_gsi (&gsi2, t2, true, NULL_TREE, false,
+                                            GSI_CONTINUE_LINKING);
+             t = fold_build2 (TRUNC_MOD_EXPR, type, stopval, t2);
+             t2 = fold_build2 (TRUNC_DIV_EXPR, type, stopval, t2);
+             t = fold_convert (itype, t);
+             t2 = fold_convert (itype, t2);
+             t = fold_build2 (MULT_EXPR, itype, t,
+                              fold_convert (itype, fd->loops[i].step));
+             t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+             t2 = fold_build2 (MULT_EXPR, itype, t2,
+                               fold_convert (itype, fd->loops[i - 1].step));
+             t2 = fold_build2 (PLUS_EXPR, itype, fd->loops[i - 1].n1, t2);
+             t2 = force_gimple_operand_gsi (&gsi2, t2, false, NULL_TREE,
+                                            false, GSI_CONTINUE_LINKING);
+             stmt = gimple_build_assign (fd->loops[i - 1].v, t2);
+             gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+             if (fd->loops[i].m1)
+               {
+                 t2 = fold_build2 (MULT_EXPR, itype, fd->loops[i].m1,
+                                   fd->loops[i - 1].v);
+                 t = fold_build2 (PLUS_EXPR, itype, t, t2);
+               }
+             t = force_gimple_operand_gsi (&gsi2, t, false, NULL_TREE,
+                                           false, GSI_CONTINUE_LINKING);
+             stmt = gimple_build_assign (fd->loops[i].v, t);
+             gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+           }
+         /* Fallback implementation.  Evaluate the loops in between
+            (inclusive) fd->first_nonrect and fd->last_nonrect at
+            runtime unsing temporaries instead of the original iteration
+            variables, in the body just bump the counter and compare
+            with the desired value.  */
+         gimple_stmt_iterator gsi2 = *gsi;
+         basic_block entry_bb = gsi_bb (gsi2);
+         edge e = split_block (entry_bb, gsi_stmt (gsi2));
+         e = split_block (e->dest, (gimple *) NULL);
+         basic_block dom_bb = NULL;
+         basic_block cur_bb = e->src;
+         basic_block next_bb = e->dest;
+         entry_bb = e->dest;
+         *gsi = gsi_after_labels (entry_bb);
 
-      bb = create_empty_bb (last_bb);
-      add_bb_to_loop (bb, last_bb->loop_father);
-      gsi = gsi_start_bb (bb);
+         tree *vs = XALLOCAVEC (tree, fd->last_nonrect);
+         tree n1 = NULL_TREE, n2 = NULL_TREE;
+         memset (vs, 0, fd->last_nonrect * sizeof (tree));
 
-      if (i < fd->collapse - 1)
-       {
-         e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
-         e->probability = profile_probability::guessed_always ().apply_scale (1, 8);
+         for (int j = fd->first_nonrect; j <= fd->last_nonrect; j++)
+           {
+             tree itype = TREE_TYPE (fd->loops[j].v);
+             bool rect_p = (fd->loops[j].m1 == NULL_TREE
+                            && fd->loops[j].m2 == NULL_TREE
+                            && !fd->loops[j].non_rect_referenced);
+             gsi2 = gsi_after_labels (cur_bb);
+             t = fold_convert (itype, unshare_expr (fd->loops[j].n1));
+             if (fd->loops[j].m1)
+               {
+                 n1 = fold_convert (itype, unshare_expr (fd->loops[j].m1));
+                 n1 = fold_build2 (MULT_EXPR, itype,
+                                   vs[j - fd->loops[j].outer], n1);
+                 n1 = fold_build2 (PLUS_EXPR, itype, n1, t);
+               }
+             else if (rect_p)
+               n1 = build_zero_cst (type);
+             else
+               n1 = t;
+             n1 = force_gimple_operand_gsi (&gsi2, n1, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             if (j < fd->last_nonrect)
+               {
+                 vs[j] = create_tmp_reg (rect_p ? type : itype, ".it");
+                 expand_omp_build_assign (&gsi2, vs[j], n1);
+               }
+             t = fold_convert (itype, unshare_expr (fd->loops[j].n2));
+             if (fd->loops[j].m2)
+               {
+                 n2 = fold_convert (itype, unshare_expr (fd->loops[j].m2));
+                 n2 = fold_build2 (MULT_EXPR, itype,
+                                   vs[j - fd->loops[j].outer], n2);
+                 n2 = fold_build2 (PLUS_EXPR, itype, n2, t);
+               }
+             else if (rect_p)
+               n2 = counts[j];
+             else
+               n2 = t;
+             n2 = force_gimple_operand_gsi (&gsi2, n2, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             if (j == fd->last_nonrect)
+               {
+                 gcond *cond_stmt
+                   = gimple_build_cond (fd->loops[j].cond_code, n1, n2,
+                                        NULL_TREE, NULL_TREE);
+                 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+                 e = split_block (cur_bb, cond_stmt);
+                 e->flags = EDGE_TRUE_VALUE;
+                 edge ne = make_edge (cur_bb, next_bb, EDGE_FALSE_VALUE);
+                 e->probability = profile_probability::likely ().guessed ();
+                 ne->probability = e->probability.invert ();
+                 gsi2 = gsi_after_labels (e->dest);
+
+                 t = build_int_cst (itype, (fd->loops[j].cond_code == LT_EXPR
+                                            ? -1 : 1));
+                 t = fold_build2 (PLUS_EXPR, itype,
+                                  fold_convert (itype, fd->loops[j].step), t);
+                 t = fold_build2 (PLUS_EXPR, itype, t, n2);
+                 t = fold_build2 (MINUS_EXPR, itype, t, n1);
+                 tree step = fold_convert (itype, fd->loops[j].step);
+                 if (TYPE_UNSIGNED (itype)
+                     && fd->loops[j].cond_code == GT_EXPR)
+                   t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                                    fold_build1 (NEGATE_EXPR, itype, t),
+                                    fold_build1 (NEGATE_EXPR, itype, step));
+                 else
+                   t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+                 t = fold_convert (type, t);
+                 t = fold_build2 (PLUS_EXPR, type, idx, t);
+                 t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                               true, GSI_SAME_STMT);
+                 e = make_edge (e->dest, next_bb, EDGE_FALLTHRU);
+                 set_immediate_dominator (CDI_DOMINATORS, next_bb, cur_bb);
+                 cond_stmt
+                   = gimple_build_cond (LE_EXPR, t, stopval, NULL_TREE,
+                                        NULL_TREE);
+                 gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+                 e = split_block (gsi_bb (gsi2), cond_stmt);
+                 e->flags = EDGE_TRUE_VALUE;
+                 e->probability = profile_probability::likely ().guessed ();
+                 ne = make_edge (e->src, entry_bb, EDGE_FALSE_VALUE);
+                 ne->probability = e->probability.invert ();
+                 gsi2 = gsi_after_labels (e->dest);
+                 expand_omp_build_assign (&gsi2, idx, t);
+                 set_immediate_dominator (CDI_DOMINATORS, entry_bb, dom_bb);
+                 break;
+               }
+             e = split_block (cur_bb, last_stmt (cur_bb));
 
-         t = fd->loops[i + 1].n1;
-         t = force_gimple_operand_gsi (&gsi, t,
-                                       DECL_P (fd->loops[i + 1].v)
-                                       && TREE_ADDRESSABLE (fd->loops[i
-                                                                      + 1].v),
+             basic_block new_cur_bb = create_empty_bb (cur_bb);
+             add_bb_to_loop (new_cur_bb, cur_bb->loop_father);
+
+             gsi2 = gsi_after_labels (e->dest);
+             if (rect_p)
+               t = fold_build2 (PLUS_EXPR, type, vs[j],
+                                build_one_cst (type));
+             else
+               {
+                 tree step
+                   = fold_convert (itype, unshare_expr (fd->loops[j].step));
+                 t = fold_build2 (PLUS_EXPR, itype, vs[j], step);
+               }
+             t = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                           true, GSI_SAME_STMT);
+             expand_omp_build_assign (&gsi2, vs[j], t);
+
+             edge ne = split_block (e->dest, last_stmt (e->dest));
+             gsi2 = gsi_after_labels (ne->dest);
+
+             gcond *cond_stmt;
+             if (next_bb == entry_bb)
+               /* No need to actually check the outermost condition.  */
+               cond_stmt
+                 = gimple_build_cond (EQ_EXPR, boolean_true_node,
+                                      boolean_true_node,
+                                      NULL_TREE, NULL_TREE);
+             else
+               cond_stmt
+                 = gimple_build_cond (rect_p ? LT_EXPR
+                                             : fd->loops[j].cond_code,
+                                      vs[j], n2, NULL_TREE, NULL_TREE);
+             gsi_insert_before (&gsi2, cond_stmt, GSI_SAME_STMT);
+             edge e3, e4;
+             if (next_bb == entry_bb)
+               {
+                 e3 = find_edge (ne->dest, next_bb);
+                 e3->flags = EDGE_FALSE_VALUE;
+                 dom_bb = ne->dest;
+               }
+             else
+               e3 = make_edge (ne->dest, next_bb, EDGE_FALSE_VALUE);
+             e4 = make_edge (ne->dest, new_cur_bb, EDGE_TRUE_VALUE);
+             e4->probability = profile_probability::likely ().guessed ();
+             e3->probability = e4->probability.invert ();
+             basic_block esrc = e->src;
+             make_edge (e->src, ne->dest, EDGE_FALLTHRU);
+             cur_bb = new_cur_bb;
+             basic_block latch_bb = next_bb;
+             next_bb = e->dest;
+             remove_edge (e);
+             set_immediate_dominator (CDI_DOMINATORS, ne->dest, esrc);
+             set_immediate_dominator (CDI_DOMINATORS, latch_bb, ne->dest);
+             set_immediate_dominator (CDI_DOMINATORS, cur_bb, ne->dest);
+           }
+         for (int j = fd->last_nonrect; j >= fd->first_nonrect; j--)
+           {
+             tree itype = TREE_TYPE (fd->loops[j].v);
+             bool rect_p = (fd->loops[j].m1 == NULL_TREE
+                            && fd->loops[j].m2 == NULL_TREE
+                            && !fd->loops[j].non_rect_referenced);
+             if (j == fd->last_nonrect)
+               {
+                 t = fold_build2 (MINUS_EXPR, type, stopval, idx);
+                 t = fold_convert (itype, t);
+                 tree t2
+                   = fold_convert (itype, unshare_expr (fd->loops[j].step));
+                 t = fold_build2 (MULT_EXPR, itype, t, t2);
+                 t = fold_build2 (PLUS_EXPR, itype, n1, t);
+               }
+             else if (rect_p)
+               {
+                 t = fold_convert (itype, vs[j]);
+                 t = fold_build2 (MULT_EXPR, itype, t,
+                                  fold_convert (itype, fd->loops[j].step));
+                 if (POINTER_TYPE_P (vtype))
+                   t = fold_build_pointer_plus (fd->loops[j].n1, t);
+                 else
+                   t = fold_build2 (PLUS_EXPR, itype, fd->loops[j].n1, t);
+               }
+             else
+               t = vs[j];
+             t = force_gimple_operand_gsi (gsi, t, false,
+                                           NULL_TREE, true,
+                                           GSI_SAME_STMT);
+             stmt = gimple_build_assign (fd->loops[j].v, t);
+             gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
+           }
+         if (gsi_end_p (*gsi))
+           *gsi = gsi_last_bb (gsi_bb (*gsi));
+         else
+           gsi_prev (gsi);
+         if (bb_triang)
+           {
+             e = split_block (gsi_bb (*gsi), gsi_stmt (*gsi));
+             make_edge (bb_triang, e->dest, EDGE_FALLTHRU);
+             *gsi = gsi_after_labels (e->dest);
+             if (!gsi_end_p (*gsi))
+               gsi_insert_before (gsi, gimple_build_nop (), GSI_NEW_STMT);
+             set_immediate_dominator (CDI_DOMINATORS, e->dest, bb_triang_dom);
+           }
+       }
+      else
+       {
+         t = fold_convert (itype, t);
+         t = fold_build2 (MULT_EXPR, itype, t,
+                          fold_convert (itype, fd->loops[i].step));
+         if (POINTER_TYPE_P (vtype))
+           t = fold_build_pointer_plus (fd->loops[i].n1, t);
+         else
+           t = fold_build2 (PLUS_EXPR, itype, fd->loops[i].n1, t);
+         t = force_gimple_operand_gsi (gsi, t,
+                                       DECL_P (fd->loops[i].v)
+                                       && TREE_ADDRESSABLE (fd->loops[i].v),
                                        NULL_TREE, false,
                                        GSI_CONTINUE_LINKING);
-         stmt = gimple_build_assign (fd->loops[i + 1].v, t);
-         gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+         stmt = gimple_build_assign (fd->loops[i].v, t);
+         gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+       }
+      if (i != 0 && (i != fd->last_nonrect || fd->first_nonrect))
+       {
+         t = fold_build2 (TRUNC_DIV_EXPR, type, tem, counts[i]);
+         t = force_gimple_operand_gsi (gsi, t, false, NULL_TREE,
+                                       false, GSI_CONTINUE_LINKING);
+         stmt = gimple_build_assign (tem, t);
+         gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+       }
+      if (i == fd->last_nonrect)
+       i = fd->first_nonrect;
+    }
+  if (fd->non_rect)
+    for (i = 0; i <= fd->last_nonrect; i++)
+      if (fd->loops[i].m2)
+       {
+         tree itype = TREE_TYPE (fd->loops[i].v);
+
+         tree t = fold_convert (itype, unshare_expr (fd->loops[i].m2));
+         t = fold_build2 (MULT_EXPR, itype,
+                          fd->loops[i - fd->loops[i].outer].v, t);
+         t = fold_build2 (PLUS_EXPR, itype, t,
+                          fold_convert (itype,
+                                        unshare_expr (fd->loops[i].n2)));
+         nonrect_bounds[i] = create_tmp_reg (itype, ".bound");
+         t = force_gimple_operand_gsi (gsi, t, false,
+                                       NULL_TREE, false,
+                                       GSI_CONTINUE_LINKING);
+         stmt = gimple_build_assign (nonrect_bounds[i], t);
+         gsi_insert_after (gsi, stmt, GSI_CONTINUE_LINKING);
+       }
+}
+
+/* Helper function for expand_omp_for_*.  Generate code like:
+    L10:
+       V3 += STEP3;
+       if (V3 cond3 N32) goto BODY_BB; else goto L11;
+    L11:
+       V3 = N31;
+       V2 += STEP2;
+       if (V2 cond2 N22) goto BODY_BB; else goto L12;
+    L12:
+       V2 = N21;
+       V1 += STEP1;
+       goto BODY_BB;
+   For non-rectangular loops, use temporaries stored in nonrect_bounds
+   for the upper bounds if M?2 multiplier is present.  Given e.g.
+   for (V1 = N11; V1 cond1 N12; V1 += STEP1)
+   for (V2 = N21; V2 cond2 N22; V2 += STEP2)
+   for (V3 = N31; V3 cond3 N32; V3 += STEP3)
+   for (V4 = N41 + M41 * V2; V4 cond4 N42 + M42 * V2; V4 += STEP4)
+   do:
+    L10:
+       V4 += STEP4;
+       if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L11;
+    L11:
+       V4 = N41 + M41 * V2; // This can be left out if the loop
+                            // refers to the immediate parent loop
+       V3 += STEP3;
+       if (V3 cond3 N32) goto BODY_BB; else goto L12;
+    L12:
+       V3 = N31;
+       V2 += STEP2;
+       if (V2 cond2 N22) goto L120; else goto L13;
+    L120:
+       V4 = N41 + M41 * V2;
+       NONRECT_BOUND4 = N42 + M42 * V2;
+       if (V4 cond4 NONRECT_BOUND4) goto BODY_BB; else goto L12;
+    L13:
+       V2 = N21;
+       V1 += STEP1;
+       goto L120;  */
+
+static basic_block
+extract_omp_for_update_vars (struct omp_for_data *fd, tree *nonrect_bounds,
+                            basic_block cont_bb, basic_block body_bb)
+{
+  basic_block last_bb, bb, collapse_bb = NULL;
+  int i;
+  gimple_stmt_iterator gsi;
+  edge e;
+  tree t;
+  gimple *stmt;
+
+  last_bb = cont_bb;
+  for (i = fd->collapse - 1; i >= 0; i--)
+    {
+      tree vtype = TREE_TYPE (fd->loops[i].v);
+
+      bb = create_empty_bb (last_bb);
+      add_bb_to_loop (bb, last_bb->loop_father);
+      gsi = gsi_start_bb (bb);
+
+      if (i < fd->collapse - 1)
+       {
+         e = make_edge (last_bb, bb, EDGE_FALSE_VALUE);
+         e->probability
+           = profile_probability::guessed_always ().apply_scale (1, 8);
+
+         struct omp_for_data_loop *l = &fd->loops[i + 1];
+         if (l->m1 == NULL_TREE || l->outer != 1)
+           {
+             t = l->n1;
+             if (l->m1)
+               {
+                 tree t2
+                   = fold_build2 (MULT_EXPR, TREE_TYPE (t),
+                                  fd->loops[i + 1 - l->outer].v, l->m1);
+                 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t2, t);
+               }
+             t = force_gimple_operand_gsi (&gsi, t,
+                                           DECL_P (l->v)
+                                           && TREE_ADDRESSABLE (l->v),
+                                           NULL_TREE, false,
+                                           GSI_CONTINUE_LINKING);
+             stmt = gimple_build_assign (l->v, t);
+             gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+           }
        }
       else
        collapse_bb = bb;
@@ -2055,9 +3094,83 @@ extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
       stmt = gimple_build_assign (fd->loops[i].v, t);
       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
+      if (fd->loops[i].non_rect_referenced)
+       {
+         basic_block update_bb = NULL, prev_bb = NULL;
+         for (int j = i + 1; j <= fd->last_nonrect; j++)
+           if (j - fd->loops[j].outer == i)
+             {
+               tree n1, n2;
+               struct omp_for_data_loop *l = &fd->loops[j];
+               basic_block this_bb = create_empty_bb (last_bb);
+               add_bb_to_loop (this_bb, last_bb->loop_father);
+               gimple_stmt_iterator gsi2 = gsi_start_bb (this_bb);
+               if (prev_bb)
+                 {
+                   e = make_edge (prev_bb, this_bb, EDGE_TRUE_VALUE);
+                   e->probability
+                     = profile_probability::guessed_always ().apply_scale (7,
+                                                                           8);
+                   set_immediate_dominator (CDI_DOMINATORS, this_bb, prev_bb);
+                 }
+               if (l->m1)
+                 {
+                   t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m1), l->m1,
+                                    fd->loops[i].v);
+                   t = fold_build2 (PLUS_EXPR, TREE_TYPE (l->v), t, l->n1);
+                   n1 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                                  false,
+                                                  GSI_CONTINUE_LINKING);
+                   stmt = gimple_build_assign (l->v, n1);
+                   gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+                   n1 = l->v;
+                 }
+               else
+                 n1 = force_gimple_operand_gsi (&gsi2, l->n1, true,
+                                                NULL_TREE, false,
+                                                GSI_CONTINUE_LINKING);
+               if (l->m2)
+                 {
+                   t = fold_build2 (MULT_EXPR, TREE_TYPE (l->m2), l->m2,
+                                    fd->loops[i].v);
+                   t = fold_build2 (PLUS_EXPR, TREE_TYPE (nonrect_bounds[j]),
+                                    t, unshare_expr (l->n2));
+                   n2 = force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
+                                                  false,
+                                                  GSI_CONTINUE_LINKING);
+                   stmt = gimple_build_assign (nonrect_bounds[j], n2);
+                   gsi_insert_after (&gsi2, stmt, GSI_CONTINUE_LINKING);
+                   n2 = nonrect_bounds[j];
+                 }
+               else
+                 n2 = force_gimple_operand_gsi (&gsi2, unshare_expr (l->n2),
+                                                true, NULL_TREE, false,
+                                                GSI_CONTINUE_LINKING);
+               gcond *cond_stmt
+                 = gimple_build_cond (l->cond_code, n1, n2,
+                                      NULL_TREE, NULL_TREE);
+               gsi_insert_after (&gsi2, cond_stmt, GSI_CONTINUE_LINKING);
+               if (update_bb == NULL)
+                 update_bb = this_bb;
+               e = make_edge (this_bb, bb, EDGE_FALSE_VALUE);
+               e->probability
+                 = profile_probability::guessed_always ().apply_scale (1, 8);
+               if (prev_bb == NULL)
+                 set_immediate_dominator (CDI_DOMINATORS, this_bb, bb);
+               prev_bb = this_bb;
+             }
+         e = make_edge (prev_bb, body_bb, EDGE_TRUE_VALUE);
+         e->probability
+           = profile_probability::guessed_always ().apply_scale (7, 8);
+         body_bb = update_bb;
+       }
+
       if (i > 0)
        {
-         t = fd->loops[i].n2;
+         if (fd->loops[i].m2)
+           t = nonrect_bounds[i];
+         else
+           t = unshare_expr (fd->loops[i].n2);
          t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                        false, GSI_CONTINUE_LINKING);
          tree v = fd->loops[i].v;
@@ -2067,11 +3180,17 @@ extract_omp_for_update_vars (struct omp_for_data *fd, basic_block cont_bb,
          t = fold_build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
          stmt = gimple_build_cond_empty (t);
          gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
+         if (walk_tree (gimple_cond_lhs_ptr (as_a <gcond *> (stmt)),
+                        expand_omp_regimplify_p, NULL, NULL)
+             || walk_tree (gimple_cond_rhs_ptr (as_a <gcond *> (stmt)),
+                           expand_omp_regimplify_p, NULL, NULL))
+           gimple_regimplify_operands (stmt, &gsi);
          e = make_edge (bb, body_bb, EDGE_TRUE_VALUE);
          e->probability = profile_probability::guessed_always ().apply_scale (7, 8);
        }
       else
        make_edge (bb, body_bb, EDGE_FALLTHRU);
+      set_immediate_dominator (CDI_DOMINATORS, bb, last_bb);
       last_bb = bb;
     }
 
@@ -2133,8 +3252,8 @@ expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
              forward = tree_int_cst_sgn (step) != -1;
            }
          if (forward ^ OMP_CLAUSE_DEPEND_SINK_NEGATIVE (deps))
-           warning_at (loc, 0, "%<depend(sink)%> clause waiting for "
-                               "lexically later iteration");
+           warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
+                               "waiting for lexically later iteration");
          break;
        }
       deps = TREE_CHAIN (deps);
@@ -2270,8 +3389,9 @@ expand_omp_ordered_sink (gimple_stmt_iterator *gsi, struct omp_for_data *fd,
                               build_int_cst (itype, 0));
          if (integer_zerop (t) && !warned_step)
            {
-             warning_at (loc, 0, "%<depend(sink)%> refers to iteration never "
-                                 "in the iteration space");
+             warning_at (loc, 0, "%<depend%> clause with %<sink%> modifier "
+                                 "refers to iteration never in the iteration "
+                                 "space");
              warned_step = true;
            }
          cond = fold_build2_loc (loc, BIT_AND_EXPR, boolean_type_node,
@@ -2489,7 +3609,7 @@ expand_omp_for_ordered_loops (struct omp_for_data *fd, tree *counts,
 
       if (e2)
        {
-         struct loop *loop = alloc_loop ();
+         class loop *loop = alloc_loop ();
          loop->header = new_header;
          loop->latch = e2->src;
          add_loop (loop, body_bb->loop_father);
@@ -2664,16 +3784,26 @@ expand_omp_for_generic (struct omp_region *region,
 
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
   if (fd->ordered
-      && omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
+      && omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
                          OMP_CLAUSE_LASTPRIVATE))
     ordered_lastprivate = false;
   tree reductions = NULL_TREE;
-  tree mem = NULL_TREE;
+  tree mem = NULL_TREE, cond_var = NULL_TREE, condtemp = NULL_TREE;
+  tree memv = NULL_TREE;
+  if (fd->lastprivate_conditional)
+    {
+      tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
+                               OMP_CLAUSE__CONDTEMP_);
+      if (fd->have_pointer_condtemp)
+       condtemp = OMP_CLAUSE_DECL (c);
+      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
+      cond_var = OMP_CLAUSE_DECL (c);
+    }
   if (sched_arg)
     {
       if (fd->have_reductemp)
        {
-         tree c = omp_find_clause (gimple_omp_for_clauses (gsi_stmt (gsi)),
+         tree c = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
                                    OMP_CLAUSE__REDUCTEMP_);
          reductions = OMP_CLAUSE_DECL (c);
          gcc_assert (TREE_CODE (reductions) == SSA_NAME);
@@ -2688,8 +3818,20 @@ expand_omp_for_generic (struct omp_region *region,
        }
       else
        reductions = null_pointer_node;
-      /* For now.  */
-      mem = null_pointer_node;
+      if (fd->have_pointer_condtemp)
+       {
+         tree type = TREE_TYPE (condtemp);
+         memv = create_tmp_var (type);
+         TREE_ADDRESSABLE (memv) = 1;
+         unsigned HOST_WIDE_INT sz
+           = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+         sz *= fd->lastprivate_conditional;
+         expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
+                                  false);
+         mem = build_fold_addr_expr (memv);
+       }
+      else
+       mem = null_pointer_node;
     }
   if (fd->collapse > 1 || fd->ordered)
     {
@@ -2709,7 +3851,7 @@ expand_omp_for_generic (struct omp_region *region,
          for (i = first_zero_iter1;
               i < (fd->ordered ? fd->ordered : fd->collapse); i++)
            if (SSA_VAR_P (counts[i]))
-             TREE_NO_WARNING (counts[i]) = 1;
+             suppress_warning (counts[i], OPT_Wuninitialized);
          gsi_prev (&gsi);
          e = split_block (entry_bb, gsi_stmt (gsi));
          entry_bb = e->dest;
@@ -2726,7 +3868,7 @@ expand_omp_for_generic (struct omp_region *region,
             be executed in that case, so just avoid uninit warnings.  */
          for (i = first_zero_iter2; i < fd->ordered; i++)
            if (SSA_VAR_P (counts[i]))
-             TREE_NO_WARNING (counts[i]) = 1;
+             suppress_warning (counts[i], OPT_Wuninitialized);
          if (zero_iter1_bb)
            make_edge (zero_iter2_bb, entry_bb, EDGE_FALLTHRU);
          else
@@ -2939,11 +4081,12 @@ expand_omp_for_generic (struct omp_region *region,
                                true, GSI_SAME_STMT);
   if (arr && !TREE_STATIC (arr))
     {
-      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
-      TREE_THIS_VOLATILE (clobber) = 1;
+      tree clobber = build_clobber (TREE_TYPE (arr));
       gsi_insert_before (&gsi, gimple_build_assign (arr, clobber),
                         GSI_SAME_STMT);
     }
+  if (fd->have_pointer_condtemp)
+    expand_omp_build_assign (&gsi, condtemp, memv, false);
   if (fd->have_reductemp)
     {
       gimple *g = gsi_stmt (gsi);
@@ -3014,6 +4157,35 @@ expand_omp_for_generic (struct omp_region *region,
                                NULL_TREE, false, GSI_CONTINUE_LINKING);
   assign_stmt = gimple_build_assign (startvar, t);
   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+  if (cond_var)
+    {
+      tree itype = TREE_TYPE (cond_var);
+      /* For lastprivate(conditional:) itervar, we need some iteration
+        counter that starts at unsigned non-zero and increases.
+        Prefer as few IVs as possible, so if we can use startvar
+        itself, use that, or startvar + constant (those would be
+        incremented with step), and as last resort use the s0 + 1
+        incremented by 1.  */
+      if ((fd->ordered && fd->collapse == 1)
+         || bias
+         || POINTER_TYPE_P (type)
+         || TREE_CODE (fd->loop.n1) != INTEGER_CST
+         || fd->loop.cond_code != LT_EXPR)
+       t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, istart0),
+                        build_int_cst (itype, 1));
+      else if (tree_int_cst_sgn (fd->loop.n1) == 1)
+       t = fold_convert (itype, t);
+      else
+       {
+         tree c = fold_convert (itype, fd->loop.n1);
+         c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
+         t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
+       }
+      t = force_gimple_operand_gsi (&gsi, t, false,
+                                   NULL_TREE, false, GSI_CONTINUE_LINKING);
+      assign_stmt = gimple_build_assign (cond_var, t);
+      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+    }
 
   t = iend0;
   if (fd->ordered && fd->collapse == 1)
@@ -3060,9 +4232,8 @@ expand_omp_for_generic (struct omp_region *region,
          && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
        {
          tree d = OMP_CLAUSE_DECL (c);
-         bool is_ref = omp_is_reference (d);
          tree t = d, a, dest;
-         if (is_ref)
+         if (omp_privatize_by_reference (t))
            t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
          tree type = TREE_TYPE (t);
          if (POINTER_TYPE_P (type))
@@ -3096,11 +4267,10 @@ expand_omp_for_generic (struct omp_region *region,
                           : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
          t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                        false, GSI_CONTINUE_LINKING);
-         assign_stmt = gimple_build_assign (dest, t);
-         gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+         expand_omp_build_assign (&gsi, dest, t, true);
        }
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   if (fd->ordered)
     {
@@ -3150,13 +4320,18 @@ expand_omp_for_generic (struct omp_region *region,
          gsi = gsi_last_bb (l0_bb);
          expand_omp_build_assign (&gsi, counts[fd->collapse - 1],
                                   istart0, true);
-         gsi = gsi_last_bb (cont_bb);
-         t = fold_build2 (PLUS_EXPR, fd->iter_type, counts[fd->collapse - 1],
-                          build_int_cst (fd->iter_type, 1));
-         expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
-         tree aref = build4 (ARRAY_REF, fd->iter_type, counts[fd->ordered],
-                             size_zero_node, NULL_TREE, NULL_TREE);
-         expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
+         if (cont_bb)
+           {
+             gsi = gsi_last_bb (cont_bb);
+             t = fold_build2 (PLUS_EXPR, fd->iter_type,
+                              counts[fd->collapse - 1],
+                              build_int_cst (fd->iter_type, 1));
+             expand_omp_build_assign (&gsi, counts[fd->collapse - 1], t);
+             tree aref = build4 (ARRAY_REF, fd->iter_type,
+                                 counts[fd->ordered], size_zero_node,
+                                 NULL_TREE, NULL_TREE);
+             expand_omp_build_assign (&gsi, aref, counts[fd->collapse - 1]);
+           }
          t = counts[fd->collapse - 1];
        }
       else if (fd->collapse > 1)
@@ -3185,6 +4360,25 @@ expand_omp_for_generic (struct omp_region *region,
       vmain = gimple_omp_continue_control_use (cont_stmt);
       vback = gimple_omp_continue_control_def (cont_stmt);
 
+      if (cond_var)
+       {
+         tree itype = TREE_TYPE (cond_var);
+         tree t2;
+         if ((fd->ordered && fd->collapse == 1)
+              || bias
+              || POINTER_TYPE_P (type)
+              || TREE_CODE (fd->loop.n1) != INTEGER_CST
+              || fd->loop.cond_code != LT_EXPR)
+           t2 = build_int_cst (itype, 1);
+         else
+           t2 = fold_convert (itype, fd->loop.step);
+         t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
+         t2 = force_gimple_operand_gsi (&gsi, t2, false,
+                                        NULL_TREE, true, GSI_SAME_STMT);
+         assign_stmt = gimple_build_assign (cond_var, t2);
+         gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+       }
+
       if (!gimple_omp_for_combined_p (fd->for_stmt))
        {
          if (POINTER_TYPE_P (type))
@@ -3200,20 +4394,21 @@ expand_omp_for_generic (struct omp_region *region,
 
          if (fd->ordered && counts[fd->collapse - 1] == NULL_TREE)
            {
+             tree tem;
              if (fd->collapse > 1)
-               t = fd->loop.v;
+               tem = fd->loop.v;
              else
                {
-                 t = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
-                                  fd->loops[0].v, fd->loops[0].n1);
-                 t = fold_convert (fd->iter_type, t);
+                 tem = fold_build2 (MINUS_EXPR, TREE_TYPE (fd->loops[0].v),
+                                    fd->loops[0].v, fd->loops[0].n1);
+                 tem = fold_convert (fd->iter_type, tem);
                }
              tree aref = build4 (ARRAY_REF, fd->iter_type,
                                  counts[fd->ordered], size_zero_node,
                                  NULL_TREE, NULL_TREE);
-             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-                                           true, GSI_SAME_STMT);
-             expand_omp_build_assign (&gsi, aref, t);
+             tem = force_gimple_operand_gsi (&gsi, tem, true, NULL_TREE,
+                                             true, GSI_SAME_STMT);
+             expand_omp_build_assign (&gsi, aref, tem);
            }
 
          t = build2 (fd->loop.cond_code, boolean_type_node,
@@ -3227,7 +4422,7 @@ expand_omp_for_generic (struct omp_region *region,
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, l1_bb);
+       collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, l1_bb);
 
       /* Emit code to get the next parallel iteration in L2_BB.  */
       gsi = gsi_start_bb (l2_bb);
@@ -3256,8 +4451,7 @@ expand_omp_for_generic (struct omp_region *region,
   if (fd->ordered)
     {
       tree arr = counts[fd->ordered];
-      tree clobber = build_constructor (TREE_TYPE (arr), NULL);
-      TREE_THIS_VOLATILE (clobber) = 1;
+      tree clobber = build_clobber (TREE_TYPE (arr));
       gsi_insert_after (&gsi, gimple_build_assign (arr, clobber),
                        GSI_SAME_STMT);
     }
@@ -3377,14 +4571,14 @@ expand_omp_for_generic (struct omp_region *region,
       /* We enter expand_omp_for_generic with a loop.  This original loop may
         have its own loop struct, or it may be part of an outer loop struct
         (which may be the fake loop).  */
-      struct loop *outer_loop = entry_bb->loop_father;
+      class loop *outer_loop = entry_bb->loop_father;
       bool orig_loop_has_loop_struct = l1_bb->loop_father != outer_loop;
 
       add_bb_to_loop (l2_bb, outer_loop);
 
       /* We've added a new loop around the original loop.  Allocate the
         corresponding loop struct.  */
-      struct loop *new_loop = alloc_loop ();
+      class loop *new_loop = alloc_loop ();
       new_loop->header = l0_bb;
       new_loop->latch = l2_bb;
       add_loop (new_loop, outer_loop);
@@ -3394,7 +4588,7 @@ expand_omp_for_generic (struct omp_region *region,
       if (!orig_loop_has_loop_struct
          && !gimple_omp_for_combined_p (fd->for_stmt))
        {
-         struct loop *orig_loop = alloc_loop ();
+         class loop *orig_loop = alloc_loop ();
          orig_loop->header = l1_bb;
          /* The loop may have multiple latches.  */
          add_loop (orig_loop, new_loop);
@@ -3402,6 +4596,127 @@ expand_omp_for_generic (struct omp_region *region,
     }
 }
 
+/* Helper function for expand_omp_for_static_nochunk.  If PTR is NULL,
+   compute needed allocation size.  If !ALLOC of team allocations,
+   if ALLOC of thread allocation.  SZ is the initial needed size for
+   other purposes, ALLOC_ALIGN guaranteed alignment of allocation in bytes,
+   CNT number of elements of each array, for !ALLOC this is
+   omp_get_num_threads (), for ALLOC number of iterations handled by the
+   current thread.  If PTR is non-NULL, it is the start of the allocation
+   and this routine shall assign to OMP_CLAUSE_DECL (c) of those _scantemp_
+   clauses pointers to the corresponding arrays.  */
+
+static tree
+expand_omp_scantemp_alloc (tree clauses, tree ptr, unsigned HOST_WIDE_INT sz,
+                          unsigned HOST_WIDE_INT alloc_align, tree cnt,
+                          gimple_stmt_iterator *gsi, bool alloc)
+{
+  tree eltsz = NULL_TREE;
+  unsigned HOST_WIDE_INT preval = 0;
+  if (ptr && sz)
+    ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
+                      ptr, size_int (sz));
+  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+    if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+       && !OMP_CLAUSE__SCANTEMP__CONTROL (c)
+       && (!OMP_CLAUSE__SCANTEMP__ALLOC (c)) != alloc)
+      {
+       tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+       unsigned HOST_WIDE_INT al = TYPE_ALIGN_UNIT (pointee_type);
+       if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
+         {
+           unsigned HOST_WIDE_INT szl
+             = tree_to_uhwi (TYPE_SIZE_UNIT (pointee_type));
+           szl = least_bit_hwi (szl);
+           if (szl)
+             al = MIN (al, szl);
+         }
+       if (ptr == NULL_TREE)
+         {
+           if (eltsz == NULL_TREE)
+             eltsz = TYPE_SIZE_UNIT (pointee_type);
+           else
+             eltsz = size_binop (PLUS_EXPR, eltsz,
+                                 TYPE_SIZE_UNIT (pointee_type));
+         }
+       if (preval == 0 && al <= alloc_align)
+         {
+           unsigned HOST_WIDE_INT diff = ROUND_UP (sz, al) - sz;
+           sz += diff;
+           if (diff && ptr)
+             ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr),
+                                ptr, size_int (diff));
+         }
+       else if (al > preval)
+         {
+           if (ptr)
+             {
+               ptr = fold_convert (pointer_sized_int_node, ptr);
+               ptr = fold_build2 (PLUS_EXPR, pointer_sized_int_node, ptr,
+                                  build_int_cst (pointer_sized_int_node,
+                                                 al - 1));
+               ptr = fold_build2 (BIT_AND_EXPR, pointer_sized_int_node, ptr,
+                                  build_int_cst (pointer_sized_int_node,
+                                                 -(HOST_WIDE_INT) al));
+               ptr = fold_convert (ptr_type_node, ptr);
+             }
+           else
+             sz += al - 1;
+         }
+       if (tree_fits_uhwi_p (TYPE_SIZE_UNIT (pointee_type)))
+         preval = al;
+       else
+         preval = 1;
+       if (ptr)
+         {
+           expand_omp_build_assign (gsi, OMP_CLAUSE_DECL (c), ptr, false);
+           ptr = OMP_CLAUSE_DECL (c);
+           ptr = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (ptr), ptr,
+                              size_binop (MULT_EXPR, cnt,
+                                          TYPE_SIZE_UNIT (pointee_type)));
+         }
+      }
+
+  if (ptr == NULL_TREE)
+    {
+      eltsz = size_binop (MULT_EXPR, eltsz, cnt);
+      if (sz)
+       eltsz = size_binop (PLUS_EXPR, eltsz, size_int (sz));
+      return eltsz;
+    }
+  else
+    return ptr;
+}
+
+/* Return the last _looptemp_ clause if one has been created for
+   lastprivate on distribute parallel for{, simd} or taskloop.
+   FD is the loop data and INNERC should be the second _looptemp_
+   clause (the one holding the end of the range).
+   This is followed by collapse - 1 _looptemp_ clauses for the
+   counts[1] and up, and for triangular loops followed by 4
+   further _looptemp_ clauses (one for counts[0], one first_inner_iterations,
+   one factor and one adjn1).  After this there is optionally one
+   _looptemp_ clause that this function returns.  */
+
+static tree
+find_lastprivate_looptemp (struct omp_for_data *fd, tree innerc)
+{
+  gcc_assert (innerc);
+  int count = fd->collapse - 1;
+  if (fd->non_rect
+      && fd->last_nonrect == fd->first_nonrect + 1
+      && !TYPE_UNSIGNED (TREE_TYPE (fd->loops[fd->last_nonrect].v)))
+    count += 4;
+  for (int i = 0; i < count; i++)
+    {
+      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+                               OMP_CLAUSE__LOOPTEMP_);
+      gcc_assert (innerc);
+    }
+  return omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
+                         OMP_CLAUSE__LOOPTEMP_);
+}
+
 /* A subroutine of expand_omp_for.  Generate code for a parallel
    loop with static schedule and no specified chunk size.  Given
    parameters:
@@ -3444,17 +4759,19 @@ expand_omp_for_static_nochunk (struct omp_region *region,
                               struct omp_for_data *fd,
                               gimple *inner_stmt)
 {
-  tree n, q, s0, e0, e, t, tt, nthreads, threadid;
+  tree n, q, s0, e0, e, t, tt, nthreads = NULL_TREE, threadid;
   tree type, itype, vmain, vback;
   basic_block entry_bb, second_bb, third_bb, exit_bb, seq_start_bb;
   basic_block body_bb, cont_bb, collapse_bb = NULL;
-  basic_block fin_bb;
-  gimple_stmt_iterator gsi;
+  basic_block fin_bb, fourth_bb = NULL, fifth_bb = NULL, sixth_bb = NULL;
+  basic_block exit1_bb = NULL, exit2_bb = NULL, exit3_bb = NULL;
+  gimple_stmt_iterator gsi, gsip;
   edge ep;
   bool broken_loop = region->cont == NULL;
   tree *counts = NULL;
   tree n1, n2, step;
   tree reductions = NULL_TREE;
+  tree cond_var = NULL_TREE, condtemp = NULL_TREE;
 
   itype = type = TREE_TYPE (fd->loop.v);
   if (POINTER_TYPE_P (type))
@@ -3479,6 +4796,8 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   /* Iteration space partitioning goes in ENTRY_BB.  */
   gsi = gsi_last_nondebug_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+  gsip = gsi;
+  gsi_prev (&gsip);
 
   if (fd->collapse > 1)
     {
@@ -3508,7 +4827,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       n2 = force_gimple_operand_gsi (&gsi, n2, true, NULL_TREE,
                                     true, GSI_SAME_STMT);
       gcond *cond_stmt = gimple_build_cond (fd->loop.cond_code, n1, n2,
-                                                NULL_TREE, NULL_TREE);
+                                           NULL_TREE, NULL_TREE);
       gsi_insert_before (&gsi, cond_stmt, GSI_SAME_STMT);
       if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
                     expand_omp_regimplify_p, NULL, NULL)
@@ -3538,28 +4857,105 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       gsi = gsi_last_bb (entry_bb);
     }
 
-  if (fd->have_reductemp)
+  if (fd->lastprivate_conditional)
+    {
+      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+      tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
+      if (fd->have_pointer_condtemp)
+       condtemp = OMP_CLAUSE_DECL (c);
+      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
+      cond_var = OMP_CLAUSE_DECL (c);
+    }
+  if (fd->have_reductemp
+      /* For scan, we don't want to reinitialize condtemp before the
+        second loop.  */
+      || (fd->have_pointer_condtemp && !fd->have_scantemp)
+      || fd->have_nonctrl_scantemp)
     {
       tree t1 = build_int_cst (long_integer_type_node, 0);
       tree t2 = build_int_cst (long_integer_type_node, 1);
       tree t3 = build_int_cstu (long_integer_type_node,
                                (HOST_WIDE_INT_1U << 31) + 1);
       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
-      clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
-      reductions = OMP_CLAUSE_DECL (clauses);
-      gcc_assert (TREE_CODE (reductions) == SSA_NAME);
-      gimple *g = SSA_NAME_DEF_STMT (reductions);
-      reductions = gimple_assign_rhs1 (g);
-      OMP_CLAUSE_DECL (clauses) = reductions;
-      gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
+      gimple_stmt_iterator gsi2 = gsi_none ();
+      gimple *g = NULL;
+      tree mem = null_pointer_node, memv = NULL_TREE;
+      unsigned HOST_WIDE_INT condtemp_sz = 0;
+      unsigned HOST_WIDE_INT alloc_align = 0;
+      if (fd->have_reductemp)
+       {
+         gcc_assert (!fd->have_nonctrl_scantemp);
+         tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+         reductions = OMP_CLAUSE_DECL (c);
+         gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+         g = SSA_NAME_DEF_STMT (reductions);
+         reductions = gimple_assign_rhs1 (g);
+         OMP_CLAUSE_DECL (c) = reductions;
+         gsi2 = gsi_for_stmt (g);
+       }
+      else
+       {
+         if (gsi_end_p (gsip))
+           gsi2 = gsi_after_labels (region->entry);
+         else
+           gsi2 = gsip;
+         reductions = null_pointer_node;
+       }
+      if (fd->have_pointer_condtemp || fd->have_nonctrl_scantemp)
+       {
+         tree type;
+         if (fd->have_pointer_condtemp)
+           type = TREE_TYPE (condtemp);
+         else
+           type = ptr_type_node;
+         memv = create_tmp_var (type);
+         TREE_ADDRESSABLE (memv) = 1;
+         unsigned HOST_WIDE_INT sz = 0;
+         tree size = NULL_TREE;
+         if (fd->have_pointer_condtemp)
+           {
+             sz = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+             sz *= fd->lastprivate_conditional;
+             condtemp_sz = sz;
+           }
+         if (fd->have_nonctrl_scantemp)
+           {
+             nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
+             gimple *g = gimple_build_call (nthreads, 0);
+             nthreads = create_tmp_var (integer_type_node);
+             gimple_call_set_lhs (g, nthreads);
+             gsi_insert_before (&gsi2, g, GSI_SAME_STMT);
+             nthreads = fold_convert (sizetype, nthreads);
+             alloc_align = TYPE_ALIGN_UNIT (long_long_integer_type_node);
+             size = expand_omp_scantemp_alloc (clauses, NULL_TREE, sz,
+                                               alloc_align, nthreads, NULL,
+                                               false);
+             size = fold_convert (type, size);
+           }
+         else
+           size = build_int_cst (type, sz);
+         expand_omp_build_assign (&gsi2, memv, size, false);
+         mem = build_fold_addr_expr (memv);
+       }
       tree t
        = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
                           9, t1, t2, t2, t3, t1, null_pointer_node,
-                          null_pointer_node, reductions, null_pointer_node);
+                          null_pointer_node, reductions, mem);
       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
                                true, GSI_SAME_STMT);
-      gsi_remove (&gsi2, true);
-      release_ssa_name (gimple_assign_lhs (g));
+      if (fd->have_pointer_condtemp)
+       expand_omp_build_assign (&gsi2, condtemp, memv, false);
+      if (fd->have_nonctrl_scantemp)
+       {
+         tree ptr = fd->have_pointer_condtemp ? condtemp : memv;
+         expand_omp_scantemp_alloc (clauses, ptr, condtemp_sz,
+                                    alloc_align, nthreads, &gsi2, false);
+       }
+      if (fd->have_reductemp)
+       {
+         gsi_remove (&gsi2, true);
+         release_ssa_name (gimple_assign_lhs (g));
+       }
     }
   switch (gimple_omp_for_kind (fd->for_stmt))
     {
@@ -3645,6 +5041,72 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   gsi = gsi_last_nondebug_bb (third_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
 
+  if (fd->have_nonctrl_scantemp)
+    {
+      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+      tree controlp = NULL_TREE, controlb = NULL_TREE;
+      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+           && OMP_CLAUSE__SCANTEMP__CONTROL (c))
+         {
+           if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
+             controlb = OMP_CLAUSE_DECL (c);
+           else
+             controlp = OMP_CLAUSE_DECL (c);
+           if (controlb && controlp)
+             break;
+         }
+      gcc_assert (controlp && controlb);
+      tree cnt = create_tmp_var (sizetype);
+      gimple *g = gimple_build_assign (cnt, NOP_EXPR, q);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      unsigned HOST_WIDE_INT alloc_align = TYPE_ALIGN_UNIT (ptr_type_node);
+      tree sz = expand_omp_scantemp_alloc (clauses, NULL_TREE, 0,
+                                          alloc_align, cnt, NULL, true);
+      tree size = create_tmp_var (sizetype);
+      expand_omp_build_assign (&gsi, size, sz, false);
+      tree cmp = fold_build2 (GT_EXPR, boolean_type_node,
+                             size, size_int (16384));
+      expand_omp_build_assign (&gsi, controlb, cmp);
+      g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
+                            NULL_TREE, NULL_TREE);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      fourth_bb = split_block (third_bb, g)->dest;
+      gsi = gsi_last_nondebug_bb (fourth_bb);
+      /* FIXME: Once we have allocators, this should use allocator.  */
+      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_MALLOC), 1, size);
+      gimple_call_set_lhs (g, controlp);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      expand_omp_scantemp_alloc (clauses, controlp, 0, alloc_align, cnt,
+                                &gsi, true);
+      gsi_prev (&gsi);
+      g = gsi_stmt (gsi);
+      fifth_bb = split_block (fourth_bb, g)->dest;
+      gsi = gsi_last_nondebug_bb (fifth_bb);
+
+      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_SAVE), 0);
+      gimple_call_set_lhs (g, controlp);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      tree alloca_decl = builtin_decl_explicit (BUILT_IN_ALLOCA_WITH_ALIGN);
+      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+           && OMP_CLAUSE__SCANTEMP__ALLOC (c))
+         {
+           tree tmp = create_tmp_var (sizetype);
+           tree pointee_type = TREE_TYPE (TREE_TYPE (OMP_CLAUSE_DECL (c)));
+           g = gimple_build_assign (tmp, MULT_EXPR, cnt,
+                                    TYPE_SIZE_UNIT (pointee_type));
+           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+           g = gimple_build_call (alloca_decl, 2, tmp,
+                                  size_int (TYPE_ALIGN (pointee_type)));
+           gimple_call_set_lhs (g, OMP_CLAUSE_DECL (c));
+           gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+         }
+
+      sixth_bb = split_block (fifth_bb, g)->dest;
+      gsi = gsi_last_nondebug_bb (sixth_bb);
+    }
+
   t = build2 (MULT_EXPR, itype, q, threadid);
   t = build2 (PLUS_EXPR, itype, t, tt);
   s0 = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true, GSI_SAME_STMT);
@@ -3679,15 +5141,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
          && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
        {
-         int i;
-         for (i = 1; i < fd->collapse; i++)
-           {
-             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-                                       OMP_CLAUSE__LOOPTEMP_);
-             gcc_assert (innerc);
-           }
-         innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-                                   OMP_CLAUSE__LOOPTEMP_);
+         innerc = find_lastprivate_looptemp (fd, innerc);
          if (innerc)
            {
              /* If needed (distribute parallel for with lastprivate),
@@ -3719,6 +5173,33 @@ expand_omp_for_static_nochunk (struct omp_region *region,
                                NULL_TREE, false, GSI_CONTINUE_LINKING);
   assign_stmt = gimple_build_assign (startvar, t);
   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+  if (cond_var)
+    {
+      tree itype = TREE_TYPE (cond_var);
+      /* For lastprivate(conditional:) itervar, we need some iteration
+        counter that starts at unsigned non-zero and increases.
+        Prefer as few IVs as possible, so if we can use startvar
+        itself, use that, or startvar + constant (those would be
+        incremented with step), and as last resort use the s0 + 1
+        incremented by 1.  */
+      if (POINTER_TYPE_P (type)
+         || TREE_CODE (n1) != INTEGER_CST
+         || fd->loop.cond_code != LT_EXPR)
+       t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
+                        build_int_cst (itype, 1));
+      else if (tree_int_cst_sgn (n1) == 1)
+       t = fold_convert (itype, t);
+      else
+       {
+         tree c = fold_convert (itype, n1);
+         c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
+         t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
+       }
+      t = force_gimple_operand_gsi (&gsi, t, false,
+                                   NULL_TREE, false, GSI_CONTINUE_LINKING);
+      assign_stmt = gimple_build_assign (cond_var, t);
+      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+    }
 
   t = fold_convert (itype, e0);
   t = fold_build2 (MULT_EXPR, itype, t, step);
@@ -3746,6 +5227,7 @@ expand_omp_for_static_nochunk (struct omp_region *region,
     }
   /* Handle linear clause adjustments.  */
   tree itercnt = NULL_TREE;
+  tree *nonrect_bounds = NULL;
   if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_FOR)
     for (tree c = gimple_omp_for_clauses (fd->for_stmt);
         c; c = OMP_CLAUSE_CHAIN (c))
@@ -3753,9 +5235,8 @@ expand_omp_for_static_nochunk (struct omp_region *region,
          && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
        {
          tree d = OMP_CLAUSE_DECL (c);
-         bool is_ref = omp_is_reference (d);
          tree t = d, a, dest;
-         if (is_ref)
+         if (omp_privatize_by_reference (t))
            t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
          if (itercnt == NULL_TREE)
            {
@@ -3784,15 +5265,22 @@ expand_omp_for_static_nochunk (struct omp_region *region,
                           : POINTER_PLUS_EXPR, TREE_TYPE (t), t, a);
          t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                        false, GSI_CONTINUE_LINKING);
-         assign_stmt = gimple_build_assign (dest, t);
-         gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+         expand_omp_build_assign (&gsi, dest, t, true);
        }
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
-
-  if (!broken_loop)
     {
-      /* The code controlling the sequential loop replaces the
+      if (fd->non_rect)
+       {
+         nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
+         memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
+       }
+      expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
+                               startvar);
+    }
+
+  if (!broken_loop)
+    {
+      /* The code controlling the sequential loop replaces the
         GIMPLE_OMP_CONTINUE.  */
       gsi = gsi_last_nondebug_bb (cont_bb);
       gomp_continue *cont_stmt = as_a <gomp_continue *> (gsi_stmt (gsi));
@@ -3800,6 +5288,23 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       vmain = gimple_omp_continue_control_use (cont_stmt);
       vback = gimple_omp_continue_control_def (cont_stmt);
 
+      if (cond_var)
+       {
+         tree itype = TREE_TYPE (cond_var);
+         tree t2;
+         if (POINTER_TYPE_P (type)
+             || TREE_CODE (n1) != INTEGER_CST
+             || fd->loop.cond_code != LT_EXPR)
+           t2 = build_int_cst (itype, 1);
+         else
+           t2 = fold_convert (itype, step);
+         t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
+         t2 = force_gimple_operand_gsi (&gsi, t2, false,
+                                        NULL_TREE, true, GSI_SAME_STMT);
+         assign_stmt = gimple_build_assign (cond_var, t2);
+         gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+       }
+
       if (!gimple_omp_for_combined_p (fd->for_stmt))
        {
          if (POINTER_TYPE_P (type))
@@ -3823,7 +5328,8 @@ expand_omp_for_static_nochunk (struct omp_region *region,
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+       collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
+                                                  cont_bb, body_bb);
     }
 
   /* Replace the GIMPLE_OMP_RETURN with a barrier, or nothing.  */
@@ -3831,7 +5337,9 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
     {
       t = gimple_omp_return_lhs (gsi_stmt (gsi));
-      if (fd->have_reductemp)
+      if (fd->have_reductemp
+         || ((fd->have_pointer_condtemp || fd->have_scantemp)
+             && !fd->have_nonctrl_scantemp))
        {
          tree fn;
          if (t)
@@ -3842,15 +5350,55 @@ expand_omp_for_static_nochunk (struct omp_region *region,
          if (t)
            {
              gimple_call_set_lhs (g, t);
-             gsi_insert_after (&gsi, gimple_build_assign (reductions,
-                                                          NOP_EXPR, t),
-                               GSI_SAME_STMT);
+             if (fd->have_reductemp)
+               gsi_insert_after (&gsi, gimple_build_assign (reductions,
+                                                            NOP_EXPR, t),
+                                 GSI_SAME_STMT);
            }
          gsi_insert_after (&gsi, g, GSI_SAME_STMT);
        }
       else
        gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
     }
+  else if ((fd->have_pointer_condtemp || fd->have_scantemp)
+          && !fd->have_nonctrl_scantemp)
+    {
+      tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
+      gcall *g = gimple_build_call (fn, 0);
+      gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+    }
+  if (fd->have_scantemp && !fd->have_nonctrl_scantemp)
+    {
+      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+      tree controlp = NULL_TREE, controlb = NULL_TREE;
+      for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+       if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE__SCANTEMP_
+           && OMP_CLAUSE__SCANTEMP__CONTROL (c))
+         {
+           if (TREE_TYPE (OMP_CLAUSE_DECL (c)) == boolean_type_node)
+             controlb = OMP_CLAUSE_DECL (c);
+           else
+             controlp = OMP_CLAUSE_DECL (c);
+           if (controlb && controlp)
+             break;
+         }
+      gcc_assert (controlp && controlb);
+      gimple *g = gimple_build_cond (NE_EXPR, controlb, boolean_false_node,
+                                    NULL_TREE, NULL_TREE);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      exit1_bb = split_block (exit_bb, g)->dest;
+      gsi = gsi_after_labels (exit1_bb);
+      g = gimple_build_call (builtin_decl_explicit (BUILT_IN_FREE), 1,
+                            controlp);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      exit2_bb = split_block (exit1_bb, g)->dest;
+      gsi = gsi_after_labels (exit2_bb);
+      g = gimple_build_call (builtin_decl_implicit (BUILT_IN_STACK_RESTORE), 1,
+                            controlp);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      exit3_bb = split_block (exit2_bb, g)->dest;
+      gsi = gsi_after_labels (exit3_bb);
+    }
   gsi_remove (&gsi, true);
 
   /* Connect all the blocks.  */
@@ -3859,8 +5407,34 @@ expand_omp_for_static_nochunk (struct omp_region *region,
   ep = find_edge (entry_bb, second_bb);
   ep->flags = EDGE_TRUE_VALUE;
   ep->probability = profile_probability::guessed_always ().apply_scale (1, 4);
-  find_edge (third_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
-  find_edge (third_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
+  if (fourth_bb)
+    {
+      ep = make_edge (third_bb, fifth_bb, EDGE_FALSE_VALUE);
+      ep->probability
+       = profile_probability::guessed_always ().apply_scale (1, 2);
+      ep = find_edge (third_bb, fourth_bb);
+      ep->flags = EDGE_TRUE_VALUE;
+      ep->probability
+       = profile_probability::guessed_always ().apply_scale (1, 2);
+      ep = find_edge (fourth_bb, fifth_bb);
+      redirect_edge_and_branch (ep, sixth_bb);
+    }
+  else
+    sixth_bb = third_bb;
+  find_edge (sixth_bb, seq_start_bb)->flags = EDGE_FALSE_VALUE;
+  find_edge (sixth_bb, fin_bb)->flags = EDGE_TRUE_VALUE;
+  if (exit1_bb)
+    {
+      ep = make_edge (exit_bb, exit2_bb, EDGE_FALSE_VALUE);
+      ep->probability
+       = profile_probability::guessed_always ().apply_scale (1, 2);
+      ep = find_edge (exit_bb, exit1_bb);
+      ep->flags = EDGE_TRUE_VALUE;
+      ep->probability
+       = profile_probability::guessed_always ().apply_scale (1, 2);
+      ep = find_edge (exit1_bb, exit2_bb);
+      redirect_edge_and_branch (ep, exit3_bb);
+    }
 
   if (!broken_loop)
     {
@@ -3888,14 +5462,24 @@ expand_omp_for_static_nochunk (struct omp_region *region,
 
   set_immediate_dominator (CDI_DOMINATORS, second_bb, entry_bb);
   set_immediate_dominator (CDI_DOMINATORS, third_bb, entry_bb);
-  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, third_bb);
+  if (fourth_bb)
+    {
+      set_immediate_dominator (CDI_DOMINATORS, fifth_bb, third_bb);
+      set_immediate_dominator (CDI_DOMINATORS, sixth_bb, third_bb);
+    }
+  set_immediate_dominator (CDI_DOMINATORS, seq_start_bb, sixth_bb);
 
   set_immediate_dominator (CDI_DOMINATORS, body_bb,
                           recompute_dominator (CDI_DOMINATORS, body_bb));
   set_immediate_dominator (CDI_DOMINATORS, fin_bb,
                           recompute_dominator (CDI_DOMINATORS, fin_bb));
+  if (exit1_bb)
+    {
+      set_immediate_dominator (CDI_DOMINATORS, exit2_bb, exit_bb);
+      set_immediate_dominator (CDI_DOMINATORS, exit3_bb, exit_bb);
+    }
 
-  struct loop *loop = body_bb->loop_father;
+  class loop *loop = body_bb->loop_father;
   if (loop != entry_bb->loop_father)
     {
       gcc_assert (broken_loop || loop->header == body_bb);
@@ -3981,12 +5565,13 @@ expand_omp_for_static_chunk (struct omp_region *region,
   tree type, itype, vmain, vback, vextra;
   basic_block entry_bb, exit_bb, body_bb, seq_start_bb, iter_part_bb;
   basic_block trip_update_bb = NULL, cont_bb, collapse_bb = NULL, fin_bb;
-  gimple_stmt_iterator gsi;
+  gimple_stmt_iterator gsi, gsip;
   edge se;
   bool broken_loop = region->cont == NULL;
   tree *counts = NULL;
   tree n1, n2, step;
   tree reductions = NULL_TREE;
+  tree cond_var = NULL_TREE, condtemp = NULL_TREE;
 
   itype = type = TREE_TYPE (fd->loop.v);
   if (POINTER_TYPE_P (type))
@@ -4015,6 +5600,8 @@ expand_omp_for_static_chunk (struct omp_region *region,
   /* Trip and adjustment setup goes in ENTRY_BB.  */
   gsi = gsi_last_nondebug_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
+  gsip = gsi;
+  gsi_prev (&gsip);
 
   if (fd->collapse > 1)
     {
@@ -4074,28 +5661,68 @@ expand_omp_for_static_chunk (struct omp_region *region,
       gsi = gsi_last_bb (entry_bb);
     }
 
-  if (fd->have_reductemp)
+  if (fd->lastprivate_conditional)
+    {
+      tree clauses = gimple_omp_for_clauses (fd->for_stmt);
+      tree c = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
+      if (fd->have_pointer_condtemp)
+       condtemp = OMP_CLAUSE_DECL (c);
+      c = omp_find_clause (OMP_CLAUSE_CHAIN (c), OMP_CLAUSE__CONDTEMP_);
+      cond_var = OMP_CLAUSE_DECL (c);
+    }
+  if (fd->have_reductemp || fd->have_pointer_condtemp)
     {
       tree t1 = build_int_cst (long_integer_type_node, 0);
       tree t2 = build_int_cst (long_integer_type_node, 1);
       tree t3 = build_int_cstu (long_integer_type_node,
                                (HOST_WIDE_INT_1U << 31) + 1);
       tree clauses = gimple_omp_for_clauses (fd->for_stmt);
-      clauses = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
-      reductions = OMP_CLAUSE_DECL (clauses);
-      gcc_assert (TREE_CODE (reductions) == SSA_NAME);
-      gimple *g = SSA_NAME_DEF_STMT (reductions);
-      reductions = gimple_assign_rhs1 (g);
-      OMP_CLAUSE_DECL (clauses) = reductions;
-      gimple_stmt_iterator gsi2 = gsi_for_stmt (g);
+      gimple_stmt_iterator gsi2 = gsi_none ();
+      gimple *g = NULL;
+      tree mem = null_pointer_node, memv = NULL_TREE;
+      if (fd->have_reductemp)
+       {
+         tree c = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
+         reductions = OMP_CLAUSE_DECL (c);
+         gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+         g = SSA_NAME_DEF_STMT (reductions);
+         reductions = gimple_assign_rhs1 (g);
+         OMP_CLAUSE_DECL (c) = reductions;
+         gsi2 = gsi_for_stmt (g);
+       }
+      else
+       {
+         if (gsi_end_p (gsip))
+           gsi2 = gsi_after_labels (region->entry);
+         else
+           gsi2 = gsip;
+         reductions = null_pointer_node;
+       }
+      if (fd->have_pointer_condtemp)
+       {
+         tree type = TREE_TYPE (condtemp);
+         memv = create_tmp_var (type);
+         TREE_ADDRESSABLE (memv) = 1;
+         unsigned HOST_WIDE_INT sz
+           = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
+         sz *= fd->lastprivate_conditional;
+         expand_omp_build_assign (&gsi2, memv, build_int_cst (type, sz),
+                                  false);
+         mem = build_fold_addr_expr (memv);
+       }
       tree t
        = build_call_expr (builtin_decl_explicit (BUILT_IN_GOMP_LOOP_START),
                           9, t1, t2, t2, t3, t1, null_pointer_node,
-                          null_pointer_node, reductions, null_pointer_node);
+                          null_pointer_node, reductions, mem);
       force_gimple_operand_gsi (&gsi2, t, true, NULL_TREE,
                                true, GSI_SAME_STMT);
-      gsi_remove (&gsi2, true);
-      release_ssa_name (gimple_assign_lhs (g));
+      if (fd->have_pointer_condtemp)
+       expand_omp_build_assign (&gsi2, condtemp, memv, false);
+      if (fd->have_reductemp)
+       {
+         gsi_remove (&gsi2, true);
+         release_ssa_name (gimple_assign_lhs (g));
+       }
     }
   switch (gimple_omp_for_kind (fd->for_stmt))
     {
@@ -4229,15 +5856,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
       if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST
          && gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_DISTRIBUTE)
        {
-         int i;
-         for (i = 1; i < fd->collapse; i++)
-           {
-             innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-                                       OMP_CLAUSE__LOOPTEMP_);
-             gcc_assert (innerc);
-           }
-         innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-                                   OMP_CLAUSE__LOOPTEMP_);
+         innerc = find_lastprivate_looptemp (fd, innerc);
          if (innerc)
            {
              /* If needed (distribute parallel for with lastprivate),
@@ -4270,6 +5889,33 @@ expand_omp_for_static_chunk (struct omp_region *region,
                                NULL_TREE, false, GSI_CONTINUE_LINKING);
   assign_stmt = gimple_build_assign (startvar, t);
   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+  if (cond_var)
+    {
+      tree itype = TREE_TYPE (cond_var);
+      /* For lastprivate(conditional:) itervar, we need some iteration
+        counter that starts at unsigned non-zero and increases.
+        Prefer as few IVs as possible, so if we can use startvar
+        itself, use that, or startvar + constant (those would be
+        incremented with step), and as last resort use the s0 + 1
+        incremented by 1.  */
+      if (POINTER_TYPE_P (type)
+         || TREE_CODE (n1) != INTEGER_CST
+         || fd->loop.cond_code != LT_EXPR)
+       t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, s0),
+                        build_int_cst (itype, 1));
+      else if (tree_int_cst_sgn (n1) == 1)
+       t = fold_convert (itype, t);
+      else
+       {
+         tree c = fold_convert (itype, n1);
+         c = fold_build2 (MINUS_EXPR, itype, build_int_cst (itype, 1), c);
+         t = fold_build2 (PLUS_EXPR, itype, fold_convert (itype, t), c);
+       }
+      t = force_gimple_operand_gsi (&gsi, t, false,
+                                   NULL_TREE, false, GSI_CONTINUE_LINKING);
+      assign_stmt = gimple_build_assign (cond_var, t);
+      gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+    }
 
   t = fold_convert (itype, e0);
   t = fold_build2 (MULT_EXPR, itype, t, step);
@@ -4304,9 +5950,8 @@ expand_omp_for_static_chunk (struct omp_region *region,
          && !OMP_CLAUSE_LINEAR_NO_COPYIN (c))
        {
          tree d = OMP_CLAUSE_DECL (c);
-         bool is_ref = omp_is_reference (d);
          tree t = d, a, dest;
-         if (is_ref)
+         if (omp_privatize_by_reference (t))
            t = build_simple_mem_ref_loc (OMP_CLAUSE_LOCATION (c), t);
          tree type = TREE_TYPE (t);
          if (POINTER_TYPE_P (type))
@@ -4342,11 +5987,10 @@ expand_omp_for_static_chunk (struct omp_region *region,
                           : POINTER_PLUS_EXPR, TREE_TYPE (t), v, a);
          t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
                                        false, GSI_CONTINUE_LINKING);
-         assign_stmt = gimple_build_assign (dest, t);
-         gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
+         expand_omp_build_assign (&gsi, dest, t, true);
        }
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   if (!broken_loop)
     {
@@ -4357,6 +6001,23 @@ expand_omp_for_static_chunk (struct omp_region *region,
       vmain = gimple_omp_continue_control_use (cont_stmt);
       vback = gimple_omp_continue_control_def (cont_stmt);
 
+      if (cond_var)
+       {
+         tree itype = TREE_TYPE (cond_var);
+         tree t2;
+         if (POINTER_TYPE_P (type)
+             || TREE_CODE (n1) != INTEGER_CST
+             || fd->loop.cond_code != LT_EXPR)
+           t2 = build_int_cst (itype, 1);
+         else
+           t2 = fold_convert (itype, step);
+         t2 = fold_build2 (PLUS_EXPR, itype, cond_var, t2);
+         t2 = force_gimple_operand_gsi (&gsi, t2, false,
+                                        NULL_TREE, true, GSI_SAME_STMT);
+         assign_stmt = gimple_build_assign (cond_var, t2);
+         gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
+       }
+
       if (!gimple_omp_for_combined_p (fd->for_stmt))
        {
          if (POINTER_TYPE_P (type))
@@ -4384,7 +6045,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+       collapse_bb = extract_omp_for_update_vars (fd, NULL, cont_bb, body_bb);
 
       /* Trip update code goes into TRIP_UPDATE_BB.  */
       gsi = gsi_start_bb (trip_update_bb);
@@ -4400,7 +6061,7 @@ expand_omp_for_static_chunk (struct omp_region *region,
   if (!gimple_omp_return_nowait_p (gsi_stmt (gsi)))
     {
       t = gimple_omp_return_lhs (gsi_stmt (gsi));
-      if (fd->have_reductemp)
+      if (fd->have_reductemp || fd->have_pointer_condtemp)
        {
          tree fn;
          if (t)
@@ -4411,15 +6072,22 @@ expand_omp_for_static_chunk (struct omp_region *region,
          if (t)
            {
              gimple_call_set_lhs (g, t);
-             gsi_insert_after (&gsi, gimple_build_assign (reductions,
-                                                          NOP_EXPR, t),
-                               GSI_SAME_STMT);
+             if (fd->have_reductemp)
+               gsi_insert_after (&gsi, gimple_build_assign (reductions,
+                                                            NOP_EXPR, t),
+                                 GSI_SAME_STMT);
            }
          gsi_insert_after (&gsi, g, GSI_SAME_STMT);
        }
       else
        gsi_insert_after (&gsi, omp_build_barrier (t), GSI_SAME_STMT);
     }
+  else if (fd->have_pointer_condtemp)
+    {
+      tree fn = builtin_decl_explicit (BUILT_IN_GOMP_LOOP_END_NOWAIT);
+      gcall *g = gimple_build_call (fn, 0);
+      gsi_insert_after (&gsi, g, GSI_SAME_STMT);
+    }
   gsi_remove (&gsi, true);
 
   /* Connect the new blocks.  */
@@ -4549,8 +6217,8 @@ expand_omp_for_static_chunk (struct omp_region *region,
 
   if (!broken_loop)
     {
-      struct loop *loop = body_bb->loop_father;
-      struct loop *trip_loop = alloc_loop ();
+      class loop *loop = body_bb->loop_father;
+      class loop *trip_loop = alloc_loop ();
       trip_loop->header = iter_part_bb;
       trip_loop->latch = trip_update_bb;
       add_loop (trip_loop, iter_part_bb->loop_father);
@@ -4591,49 +6259,8 @@ expand_omp_for_static_chunk (struct omp_region *region,
        if (V cond N2) goto L0; else goto L2;
     L2:
 
-    For collapsed loops, given parameters:
-      collapse(3)
-      for (V1 = N11; V1 cond1 N12; V1 += STEP1)
-       for (V2 = N21; V2 cond2 N22; V2 += STEP2)
-         for (V3 = N31; V3 cond3 N32; V3 += STEP3)
-           BODY;
-
-    we generate pseudocode
-
-       if (cond3 is <)
-         adj = STEP3 - 1;
-       else
-         adj = STEP3 + 1;
-       count3 = (adj + N32 - N31) / STEP3;
-       if (cond2 is <)
-         adj = STEP2 - 1;
-       else
-         adj = STEP2 + 1;
-       count2 = (adj + N22 - N21) / STEP2;
-       if (cond1 is <)
-         adj = STEP1 - 1;
-       else
-         adj = STEP1 + 1;
-       count1 = (adj + N12 - N11) / STEP1;
-       count = count1 * count2 * count3;
-       V = 0;
-       V1 = N11;
-       V2 = N21;
-       V3 = N31;
-       goto L1;
-    L0:
-       BODY;
-       V += 1;
-       V3 += STEP3;
-       V2 += (V3 cond3 N32) ? 0 : STEP2;
-       V3 = (V3 cond3 N32) ? V3 : N31;
-       V1 += (V2 cond2 N22) ? 0 : STEP1;
-       V2 = (V2 cond2 N22) ? V2 : N21;
-    L1:
-       if (V < count) goto L0; else goto L2;
-    L2:
-
-      */
+    For collapsed loops, emit the outer loops as scalar
+    and only try to vectorize the innermost loop.  */
 
 static void
 expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
@@ -4648,11 +6275,19 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
   tree *counts = NULL;
   int i;
   int safelen_int = INT_MAX;
+  bool dont_vectorize = false;
   tree safelen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
                                  OMP_CLAUSE_SAFELEN);
   tree simduid = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
                                  OMP_CLAUSE__SIMDUID_);
+  tree ifc = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
+                             OMP_CLAUSE_IF);
+  tree simdlen = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                 OMP_CLAUSE_SIMDLEN);
+  tree condtemp = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
+                                  OMP_CLAUSE__CONDTEMP_);
   tree n1, n2;
+  tree cond_var = condtemp ? OMP_CLAUSE_DECL (condtemp) : NULL_TREE;
 
   if (safelen)
     {
@@ -4665,6 +6300,12 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
       if (safelen_int == 1)
        safelen_int = 0;
     }
+  if ((ifc && integer_zerop (OMP_CLAUSE_IF_EXPR (ifc)))
+      || (simdlen && integer_onep (OMP_CLAUSE_SIMDLEN_EXPR (simdlen))))
+    {
+      safelen_int = 0;
+      dont_vectorize = true;
+    }
   type = TREE_TYPE (fd->loop.v);
   entry_bb = region->entry;
   cont_bb = region->cont;
@@ -4693,7 +6334,9 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_FOR);
   /* Not needed in SSA form right now.  */
   gcc_assert (!gimple_in_ssa_p (cfun));
-  if (fd->collapse > 1)
+  if (fd->collapse > 1
+      && (gimple_omp_for_combined_into_p (fd->for_stmt)
+         || broken_loop))
     {
       int first_zero_iter = -1, dummy = -1;
       basic_block zero_iter_bb = l2_bb, dummy_bb = NULL;
@@ -4720,6 +6363,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
       n2 = OMP_CLAUSE_DECL (innerc);
     }
   tree step = fd->loop.step;
+  tree orig_step = step; /* May be different from step if is_simt.  */
 
   bool is_simt = omp_find_clause (gimple_omp_for_clauses (fd->for_stmt),
                                  OMP_CLAUSE__SIMT_);
@@ -4757,24 +6401,177 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
       step = fold_build2 (MULT_EXPR, TREE_TYPE (step), step, vf);
     }
 
-  expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
+  tree n2var = NULL_TREE;
+  tree n2v = NULL_TREE;
+  tree *nonrect_bounds = NULL;
+  tree min_arg1 = NULL_TREE, min_arg2 = NULL_TREE;
   if (fd->collapse > 1)
     {
-      if (gimple_omp_for_combined_into_p (fd->for_stmt))
+      if (broken_loop || gimple_omp_for_combined_into_p (fd->for_stmt))
        {
+         if (fd->non_rect)
+           {
+             nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
+             memset (nonrect_bounds, 0,
+                     sizeof (tree) * (fd->last_nonrect + 1));
+           }
+         expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
+         gcc_assert (entry_bb == gsi_bb (gsi));
+         gcc_assert (fd->for_stmt == gsi_stmt (gsi));
          gsi_prev (&gsi);
-         expand_omp_for_init_vars (fd, &gsi, counts, NULL, n1);
-         gsi_next (&gsi);
+         entry_bb = split_block (entry_bb, gsi_stmt (gsi))->dest;
+         expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds,
+                                   NULL, n1);
+         gsi = gsi_for_stmt (fd->for_stmt);
+       }
+      if (broken_loop)
+       ;
+      else if (gimple_omp_for_combined_into_p (fd->for_stmt))
+       {
+         /* Compute in n2var the limit for the first innermost loop,
+            i.e. fd->loop.v + MIN (n2 - fd->loop.v, cnt)
+            where cnt is how many iterations would the loop have if
+            all further iterations were assigned to the current task.  */
+         n2var = create_tmp_var (type);
+         i = fd->collapse - 1;
+         tree itype = TREE_TYPE (fd->loops[i].v);
+         if (POINTER_TYPE_P (itype))
+           itype = signed_type_for (itype);
+         t = build_int_cst (itype, (fd->loops[i].cond_code == LT_EXPR
+                                    ? -1 : 1));
+         t = fold_build2 (PLUS_EXPR, itype,
+                          fold_convert (itype, fd->loops[i].step), t);
+         t = fold_build2 (PLUS_EXPR, itype, t,
+                          fold_convert (itype, fd->loops[i].n2));
+         if (fd->loops[i].m2)
+           {
+             tree t2 = fold_convert (itype,
+                                     fd->loops[i - fd->loops[i].outer].v);
+             tree t3 = fold_convert (itype, fd->loops[i].m2);
+             t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+             t = fold_build2 (PLUS_EXPR, itype, t, t2);
+           }
+         t = fold_build2 (MINUS_EXPR, itype, t,
+                          fold_convert (itype, fd->loops[i].v));
+         if (TYPE_UNSIGNED (itype) && fd->loops[i].cond_code == GT_EXPR)
+           t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                            fold_build1 (NEGATE_EXPR, itype, t),
+                            fold_build1 (NEGATE_EXPR, itype,
+                                         fold_convert (itype,
+                                                       fd->loops[i].step)));
+         else
+           t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+                            fold_convert (itype, fd->loops[i].step));
+         t = fold_convert (type, t);
+         tree t2 = fold_build2 (MINUS_EXPR, type, n2, n1);
+         min_arg1 = create_tmp_var (type);
+         expand_omp_build_assign (&gsi, min_arg1, t2);
+         min_arg2 = create_tmp_var (type);
+         expand_omp_build_assign (&gsi, min_arg2, t);
        }
       else
-       for (i = 0; i < fd->collapse; i++)
-         {
-           tree itype = TREE_TYPE (fd->loops[i].v);
-           if (POINTER_TYPE_P (itype))
-             itype = signed_type_for (itype);
-           t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
-           expand_omp_build_assign (&gsi, fd->loops[i].v, t);
-         }
+       {
+         if (TREE_CODE (n2) == INTEGER_CST)
+           {
+             /* Indicate for lastprivate handling that at least one iteration
+                has been performed, without wasting runtime.  */
+             if (integer_nonzerop (n2))
+               expand_omp_build_assign (&gsi, fd->loop.v,
+                                        fold_convert (type, n2));
+             else
+               /* Indicate that no iteration has been performed.  */
+               expand_omp_build_assign (&gsi, fd->loop.v,
+                                        build_one_cst (type));
+           }
+         else
+           {
+             expand_omp_build_assign (&gsi, fd->loop.v,
+                                      build_zero_cst (type));
+             expand_omp_build_assign (&gsi, n2, build_one_cst (type));
+           }
+         for (i = 0; i < fd->collapse; i++)
+           {
+             t = fold_convert (TREE_TYPE (fd->loops[i].v), fd->loops[i].n1);
+             if (fd->loops[i].m1)
+               {
+                 tree t2
+                   = fold_convert (TREE_TYPE (t),
+                                   fd->loops[i - fd->loops[i].outer].v);
+                 tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i].m1);
+                 t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+                 t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
+               }
+             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+             /* For normal non-combined collapsed loops just initialize
+                the outermost iterator in the entry_bb.  */
+             if (!broken_loop)
+               break;
+           }
+       }
+    }
+  else
+    expand_omp_build_assign (&gsi, fd->loop.v, fold_convert (type, n1));
+  tree altv = NULL_TREE, altn2 = NULL_TREE;
+  if (fd->collapse == 1
+      && !broken_loop
+      && TREE_CODE (orig_step) != INTEGER_CST)
+    {
+      /* The vectorizer currently punts on loops with non-constant steps
+        for the main IV (can't compute number of iterations and gives up
+        because of that).  As for OpenMP loops it is always possible to
+        compute the number of iterations upfront, use an alternate IV
+        as the loop iterator:
+        altn2 = n1 < n2 ? (n2 - n1 + step - 1) / step : 0;
+        for (i = n1, altv = 0; altv < altn2; altv++, i += step)  */
+      altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
+      expand_omp_build_assign (&gsi, altv, build_zero_cst (TREE_TYPE (altv)));
+      tree itype = TREE_TYPE (fd->loop.v);
+      if (POINTER_TYPE_P (itype))
+       itype = signed_type_for (itype);
+      t = build_int_cst (itype, (fd->loop.cond_code == LT_EXPR ? -1 : 1));
+      t = fold_build2 (PLUS_EXPR, itype,
+                      fold_convert (itype, step), t);
+      t = fold_build2 (PLUS_EXPR, itype, t, fold_convert (itype, n2));
+      t = fold_build2 (MINUS_EXPR, itype, t,
+                      fold_convert (itype, fd->loop.v));
+      if (TYPE_UNSIGNED (itype) && fd->loop.cond_code == GT_EXPR)
+       t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                        fold_build1 (NEGATE_EXPR, itype, t),
+                        fold_build1 (NEGATE_EXPR, itype,
+                                     fold_convert (itype, step)));
+      else
+       t = fold_build2 (TRUNC_DIV_EXPR, itype, t,
+                        fold_convert (itype, step));
+      t = fold_convert (TREE_TYPE (altv), t);
+      altn2 = create_tmp_var (TREE_TYPE (altv));
+      expand_omp_build_assign (&gsi, altn2, t);
+      tree t2 = fold_convert (TREE_TYPE (fd->loop.v), n2);
+      t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
+                                    true, GSI_SAME_STMT);
+      t2 = fold_build2 (fd->loop.cond_code, boolean_type_node, fd->loop.v, t2);
+      gassign *g = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
+                                       build_zero_cst (TREE_TYPE (altv)));
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+    }
+  else if (fd->collapse > 1
+          && !broken_loop
+          && !gimple_omp_for_combined_into_p (fd->for_stmt)
+          && TREE_CODE (fd->loops[fd->collapse - 1].step) != INTEGER_CST)
+    {
+      altv = create_tmp_var (unsigned_type_for (TREE_TYPE (fd->loops[0].v)));
+      altn2 = create_tmp_var (TREE_TYPE (altv));
+    }
+  if (cond_var)
+    {
+      if (POINTER_TYPE_P (type)
+         || TREE_CODE (n1) != INTEGER_CST
+         || fd->loop.cond_code != LT_EXPR
+         || tree_int_cst_sgn (n1) != 1)
+       expand_omp_build_assign (&gsi, cond_var,
+                                build_one_cst (TREE_TYPE (cond_var)));
+      else
+       expand_omp_build_assign (&gsi, cond_var,
+                                fold_convert (TREE_TYPE (cond_var), n1));
     }
 
   /* Remove the GIMPLE_OMP_FOR statement.  */
@@ -4787,11 +6584,23 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
       stmt = gsi_stmt (gsi);
       gcc_assert (gimple_code (stmt) == GIMPLE_OMP_CONTINUE);
 
-      if (POINTER_TYPE_P (type))
-       t = fold_build_pointer_plus (fd->loop.v, step);
-      else
-       t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
-      expand_omp_build_assign (&gsi, fd->loop.v, t);
+      if (fd->collapse == 1
+         || gimple_omp_for_combined_into_p (fd->for_stmt))
+       {
+         if (POINTER_TYPE_P (type))
+           t = fold_build_pointer_plus (fd->loop.v, step);
+         else
+           t = fold_build2 (PLUS_EXPR, type, fd->loop.v, step);
+         expand_omp_build_assign (&gsi, fd->loop.v, t);
+       }
+      else if (TREE_CODE (n2) != INTEGER_CST)
+       expand_omp_build_assign (&gsi, fd->loop.v, build_one_cst (type));
+      if (altv)
+       {
+         t = fold_build2 (PLUS_EXPR, TREE_TYPE (altv), altv,
+                          build_one_cst (TREE_TYPE (altv)));
+         expand_omp_build_assign (&gsi, altv, t);
+       }
 
       if (fd->collapse > 1)
        {
@@ -4809,37 +6618,19 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
                               fd->loops[i].v, t);
            }
          expand_omp_build_assign (&gsi, fd->loops[i].v, t);
-
-         for (i = fd->collapse - 1; i > 0; i--)
-           {
-             tree itype = TREE_TYPE (fd->loops[i].v);
-             tree itype2 = TREE_TYPE (fd->loops[i - 1].v);
-             if (POINTER_TYPE_P (itype2))
-               itype2 = signed_type_for (itype2);
-             t = fold_convert (itype2, fd->loops[i - 1].step);
-             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
-                                           GSI_SAME_STMT);
-             t = build3 (COND_EXPR, itype2,
-                         build2 (fd->loops[i].cond_code, boolean_type_node,
-                                 fd->loops[i].v,
-                                 fold_convert (itype, fd->loops[i].n2)),
-                         build_int_cst (itype2, 0), t);
-             if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i - 1].v)))
-               t = fold_build_pointer_plus (fd->loops[i - 1].v, t);
-             else
-               t = fold_build2 (PLUS_EXPR, itype2, fd->loops[i - 1].v, t);
-             expand_omp_build_assign (&gsi, fd->loops[i - 1].v, t);
-
-             t = fold_convert (itype, fd->loops[i].n1);
-             t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE, true,
-                                           GSI_SAME_STMT);
-             t = build3 (COND_EXPR, itype,
-                         build2 (fd->loops[i].cond_code, boolean_type_node,
-                                 fd->loops[i].v,
-                                 fold_convert (itype, fd->loops[i].n2)),
-                         fd->loops[i].v, t);
-             expand_omp_build_assign (&gsi, fd->loops[i].v, t);
-           }
+       }
+      if (cond_var)
+       {
+         if (POINTER_TYPE_P (type)
+             || TREE_CODE (n1) != INTEGER_CST
+             || fd->loop.cond_code != LT_EXPR
+             || tree_int_cst_sgn (n1) != 1)
+           t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
+                            build_one_cst (TREE_TYPE (cond_var)));
+         else
+           t = fold_build2 (PLUS_EXPR, TREE_TYPE (cond_var), cond_var,
+                            fold_convert (TREE_TYPE (cond_var), step));
+         expand_omp_build_assign (&gsi, cond_var, t);
        }
 
       /* Remove GIMPLE_OMP_CONTINUE.  */
@@ -4849,14 +6640,40 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
   /* Emit the condition in L1_BB.  */
   gsi = gsi_start_bb (l1_bb);
 
-  t = fold_convert (type, n2);
-  t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
-                               false, GSI_CONTINUE_LINKING);
-  tree v = fd->loop.v;
-  if (DECL_P (v) && TREE_ADDRESSABLE (v))
-    v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
-                                 false, GSI_CONTINUE_LINKING);
-  t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
+  if (altv)
+    t = build2 (LT_EXPR, boolean_type_node, altv, altn2);
+  else if (fd->collapse > 1
+          && !gimple_omp_for_combined_into_p (fd->for_stmt)
+          && !broken_loop)
+    {
+      i = fd->collapse - 1;
+      tree itype = TREE_TYPE (fd->loops[i].v);
+      if (fd->loops[i].m2)
+       t = n2v = create_tmp_var (itype);
+      else
+       t = fold_convert (itype, fd->loops[i].n2);
+      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+                                   false, GSI_CONTINUE_LINKING);
+      tree v = fd->loops[i].v;
+      if (DECL_P (v) && TREE_ADDRESSABLE (v))
+       v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+                                     false, GSI_CONTINUE_LINKING);
+      t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
+    }
+  else
+    {
+      if (fd->collapse > 1 && !broken_loop)
+       t = n2var;
+      else
+       t = fold_convert (type, n2);
+      t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+                                   false, GSI_CONTINUE_LINKING);
+      tree v = fd->loop.v;
+      if (DECL_P (v) && TREE_ADDRESSABLE (v))
+       v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+                                     false, GSI_CONTINUE_LINKING);
+      t = build2 (fd->loop.cond_code, boolean_type_node, v, t);
+    }
   cond_stmt = gimple_build_cond_empty (t);
   gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
   if (walk_tree (gimple_cond_lhs_ptr (cond_stmt), expand_omp_regimplify_p,
@@ -4872,7 +6689,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
   if (is_simt)
     {
       gsi = gsi_start_bb (l2_bb);
-      step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), fd->loop.step, step);
+      step = fold_build2 (MINUS_EXPR, TREE_TYPE (step), orig_step, step);
       if (POINTER_TYPE_P (type))
        t = fold_build_pointer_plus (fd->loop.v, step);
       else
@@ -4921,15 +6738,216 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
       FALLTHRU_EDGE (entry_bb)->flags = EDGE_TRUE_VALUE;
       FALLTHRU_EDGE (entry_bb)->probability
         = profile_probability::guessed_always ().apply_scale (7, 8);
-      BRANCH_EDGE (entry_bb)->probability 
+      BRANCH_EDGE (entry_bb)->probability
         = FALLTHRU_EDGE (entry_bb)->probability.invert ();
       l2_dom_bb = entry_bb;
     }
   set_immediate_dominator (CDI_DOMINATORS, l2_bb, l2_dom_bb);
 
+  if (!broken_loop && fd->collapse > 1)
+    {
+      basic_block last_bb = l1_bb;
+      basic_block init_bb = NULL;
+      for (i = fd->collapse - 2; i >= 0; i--)
+       {
+         tree nextn2v = NULL_TREE;
+         if (EDGE_SUCC (last_bb, 0)->flags & EDGE_FALSE_VALUE)
+           e = EDGE_SUCC (last_bb, 0);
+         else
+           e = EDGE_SUCC (last_bb, 1);
+         basic_block bb = split_edge (e);
+         if (POINTER_TYPE_P (TREE_TYPE (fd->loops[i].v)))
+           {
+             t = fold_convert (sizetype, fd->loops[i].step);
+             t = fold_build_pointer_plus (fd->loops[i].v, t);
+           }
+         else
+           {
+             t = fold_convert (TREE_TYPE (fd->loops[i].v),
+                               fd->loops[i].step);
+             t = fold_build2 (PLUS_EXPR, TREE_TYPE (fd->loops[i].v),
+                              fd->loops[i].v, t);
+           }
+         gsi = gsi_after_labels (bb);
+         expand_omp_build_assign (&gsi, fd->loops[i].v, t);
+
+         bb = split_block (bb, last_stmt (bb))->dest;
+         gsi = gsi_start_bb (bb);
+         tree itype = TREE_TYPE (fd->loops[i].v);
+         if (fd->loops[i].m2)
+           t = nextn2v = create_tmp_var (itype);
+         else
+           t = fold_convert (itype, fd->loops[i].n2);
+         t = force_gimple_operand_gsi (&gsi, t, true, NULL_TREE,
+                                       false, GSI_CONTINUE_LINKING);
+         tree v = fd->loops[i].v;
+         if (DECL_P (v) && TREE_ADDRESSABLE (v))
+           v = force_gimple_operand_gsi (&gsi, v, true, NULL_TREE,
+                                         false, GSI_CONTINUE_LINKING);
+         t = build2 (fd->loops[i].cond_code, boolean_type_node, v, t);
+         cond_stmt = gimple_build_cond_empty (t);
+         gsi_insert_after (&gsi, cond_stmt, GSI_CONTINUE_LINKING);
+         if (walk_tree (gimple_cond_lhs_ptr (cond_stmt),
+                        expand_omp_regimplify_p, NULL, NULL)
+             || walk_tree (gimple_cond_rhs_ptr (cond_stmt),
+                           expand_omp_regimplify_p, NULL, NULL))
+           {
+             gsi = gsi_for_stmt (cond_stmt);
+             gimple_regimplify_operands (cond_stmt, &gsi);
+           }
+         ne = single_succ_edge (bb);
+         ne->flags = EDGE_FALSE_VALUE;
+
+         init_bb = create_empty_bb (bb);
+         set_immediate_dominator (CDI_DOMINATORS, init_bb, bb);
+         add_bb_to_loop (init_bb, bb->loop_father);
+         e = make_edge (bb, init_bb, EDGE_TRUE_VALUE);
+         e->probability
+           = profile_probability::guessed_always ().apply_scale (7, 8);
+         ne->probability = e->probability.invert ();
+
+         gsi = gsi_after_labels (init_bb);
+         t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
+                           fd->loops[i + 1].n1);
+         if (fd->loops[i + 1].m1)
+           {
+             tree t2 = fold_convert (TREE_TYPE (t),
+                                     fd->loops[i + 1
+                                               - fd->loops[i + 1].outer].v);
+             tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m1);
+             t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+             t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
+           }
+         expand_omp_build_assign (&gsi, fd->loops[i + 1].v, t);
+         if (fd->loops[i + 1].m2)
+           {
+             if (i + 2 == fd->collapse && (n2var || altv))
+               {
+                 gcc_assert (n2v == NULL_TREE);
+                 n2v = create_tmp_var (TREE_TYPE (fd->loops[i + 1].v));
+               }
+             t = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
+                               fd->loops[i + 1].n2);
+             tree t2 = fold_convert (TREE_TYPE (t),
+                                     fd->loops[i + 1
+                                               - fd->loops[i + 1].outer].v);
+             tree t3 = fold_convert (TREE_TYPE (t), fd->loops[i + 1].m2);
+             t2 = fold_build2 (MULT_EXPR, TREE_TYPE (t), t2, t3);
+             t = fold_build2 (PLUS_EXPR, TREE_TYPE (t), t, t2);
+             expand_omp_build_assign (&gsi, n2v, t);
+           }
+         if (i + 2 == fd->collapse && n2var)
+           {
+             /* For composite simd, n2 is the first iteration the current
+                task shouldn't already handle, so we effectively want to use
+                for (V3 = N31; V < N2 && V3 < N32; V++, V3 += STEP3)
+                as the vectorized loop.  Except the vectorizer will not
+                vectorize that, so instead compute N2VAR as
+                N2VAR = V + MIN (N2 - V, COUNTS3) and use
+                for (V3 = N31; V < N2VAR; V++, V3 += STEP3)
+                as the loop to vectorize.  */
+             tree t2 = fold_build2 (MINUS_EXPR, type, n2, fd->loop.v);
+             if (fd->loops[i + 1].m1 || fd->loops[i + 1].m2)
+               {
+                 t = build_int_cst (itype, (fd->loops[i + 1].cond_code
+                                            == LT_EXPR ? -1 : 1));
+                 t = fold_build2 (PLUS_EXPR, itype,
+                                  fold_convert (itype,
+                                                fd->loops[i + 1].step), t);
+                 if (fd->loops[i + 1].m2)
+                   t = fold_build2 (PLUS_EXPR, itype, t, n2v);
+                 else
+                   t = fold_build2 (PLUS_EXPR, itype, t,
+                                    fold_convert (itype,
+                                                  fd->loops[i + 1].n2));
+                 t = fold_build2 (MINUS_EXPR, itype, t,
+                                  fold_convert (itype, fd->loops[i + 1].v));
+                 tree step = fold_convert (itype, fd->loops[i + 1].step);
+                 if (TYPE_UNSIGNED (itype)
+                     && fd->loops[i + 1].cond_code == GT_EXPR)
+                   t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                                    fold_build1 (NEGATE_EXPR, itype, t),
+                                    fold_build1 (NEGATE_EXPR, itype, step));
+                 else
+                   t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+                 t = fold_convert (type, t);
+               }
+             else
+               t = counts[i + 1];
+             expand_omp_build_assign (&gsi, min_arg1, t2);
+             expand_omp_build_assign (&gsi, min_arg2, t);
+             e = split_block (init_bb, last_stmt (init_bb));
+             gsi = gsi_after_labels (e->dest);
+             init_bb = e->dest;
+             remove_edge (FALLTHRU_EDGE (entry_bb));
+             make_edge (entry_bb, init_bb, EDGE_FALLTHRU);
+             set_immediate_dominator (CDI_DOMINATORS, init_bb, entry_bb);
+             set_immediate_dominator (CDI_DOMINATORS, l1_bb, init_bb);
+             t = fold_build2 (MIN_EXPR, type, min_arg1, min_arg2);
+             t = fold_build2 (PLUS_EXPR, type, fd->loop.v, t);
+             expand_omp_build_assign (&gsi, n2var, t);
+           }
+         if (i + 2 == fd->collapse && altv)
+           {
+             /* The vectorizer currently punts on loops with non-constant
+                steps for the main IV (can't compute number of iterations
+                and gives up because of that).  As for OpenMP loops it is
+                always possible to compute the number of iterations upfront,
+                use an alternate IV as the loop iterator.  */
+             expand_omp_build_assign (&gsi, altv,
+                                      build_zero_cst (TREE_TYPE (altv)));
+             tree itype = TREE_TYPE (fd->loops[i + 1].v);
+             if (POINTER_TYPE_P (itype))
+               itype = signed_type_for (itype);
+             t = build_int_cst (itype, (fd->loops[i + 1].cond_code == LT_EXPR
+                                        ? -1 : 1));
+             t = fold_build2 (PLUS_EXPR, itype,
+                              fold_convert (itype, fd->loops[i + 1].step), t);
+             t = fold_build2 (PLUS_EXPR, itype, t,
+                              fold_convert (itype,
+                                            fd->loops[i + 1].m2
+                                            ? n2v : fd->loops[i + 1].n2));
+             t = fold_build2 (MINUS_EXPR, itype, t,
+                              fold_convert (itype, fd->loops[i + 1].v));
+             tree step = fold_convert (itype, fd->loops[i + 1].step);
+             if (TYPE_UNSIGNED (itype)
+                 && fd->loops[i + 1].cond_code == GT_EXPR)
+               t = fold_build2 (TRUNC_DIV_EXPR, itype,
+                                fold_build1 (NEGATE_EXPR, itype, t),
+                                fold_build1 (NEGATE_EXPR, itype, step));
+             else
+               t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
+             t = fold_convert (TREE_TYPE (altv), t);
+             expand_omp_build_assign (&gsi, altn2, t);
+             tree t2 = fold_convert (TREE_TYPE (fd->loops[i + 1].v),
+                                     fd->loops[i + 1].m2
+                                     ? n2v : fd->loops[i + 1].n2);
+             t2 = force_gimple_operand_gsi (&gsi, t2, true, NULL_TREE,
+                                            true, GSI_SAME_STMT);
+             t2 = fold_build2 (fd->loops[i + 1].cond_code, boolean_type_node,
+                               fd->loops[i + 1].v, t2);
+             gassign *g
+               = gimple_build_assign (altn2, COND_EXPR, t2, altn2,
+                                      build_zero_cst (TREE_TYPE (altv)));
+             gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+           }
+         n2v = nextn2v;
+
+         make_edge (init_bb, last_bb, EDGE_FALLTHRU);
+         if (!gimple_omp_for_combined_into_p (fd->for_stmt))
+           {
+             e = find_edge (entry_bb, last_bb);
+             redirect_edge_succ (e, bb);
+             set_immediate_dominator (CDI_DOMINATORS, bb, entry_bb);
+             set_immediate_dominator (CDI_DOMINATORS, last_bb, init_bb);
+           }
+
+         last_bb = bb;
+       }
+    }
   if (!broken_loop)
     {
-      struct loop *loop = alloc_loop ();
+      class loop *loop = alloc_loop ();
       loop->header = l1_bb;
       loop->latch = cont_bb;
       add_loop (loop, l1_bb->loop_father);
@@ -4947,8 +6965,17 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
          && loop->safelen > 1)
        {
          loop->force_vectorize = true;
+         if (simdlen && tree_fits_uhwi_p (OMP_CLAUSE_SIMDLEN_EXPR (simdlen)))
+           {
+             unsigned HOST_WIDE_INT v
+               = tree_to_uhwi (OMP_CLAUSE_SIMDLEN_EXPR (simdlen));
+             if (v < INT_MAX && v <= (unsigned HOST_WIDE_INT) loop->safelen)
+               loop->simdlen = v;
+           }
          cfun->has_force_vectorize_loops = true;
        }
+      else if (dont_vectorize)
+       loop->dont_vectorize = true;
     }
   else if (simduid)
     cfun->has_simduid_loops = true;
@@ -5027,7 +7054,7 @@ expand_omp_taskloop_for_outer (struct omp_region *region,
             be executed in that case, so just avoid uninit warnings.  */
          for (i = first_zero_iter; i < fd->collapse; i++)
            if (SSA_VAR_P (counts[i]))
-             TREE_NO_WARNING (counts[i]) = 1;
+             suppress_warning (counts[i], OPT_Wuninitialized);
          gsi_prev (&gsi);
          edge e = split_block (entry_bb, gsi_stmt (gsi));
          entry_bb = e->dest;
@@ -5071,15 +7098,7 @@ expand_omp_taskloop_for_outer (struct omp_region *region,
   tree endvar = OMP_CLAUSE_DECL (innerc);
   if (fd->collapse > 1 && TREE_CODE (fd->loop.n2) != INTEGER_CST)
     {
-      gcc_assert (innerc);
-      for (i = 1; i < fd->collapse; i++)
-       {
-         innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-                                   OMP_CLAUSE__LOOPTEMP_);
-         gcc_assert (innerc);
-       }
-      innerc = omp_find_clause (OMP_CLAUSE_CHAIN (innerc),
-                               OMP_CLAUSE__LOOPTEMP_);
+      innerc = find_lastprivate_looptemp (fd, innerc);
       if (innerc)
        {
          /* If needed (inner taskloop has lastprivate clause), propagate
@@ -5102,7 +7121,7 @@ expand_omp_taskloop_for_outer (struct omp_region *region,
   assign_stmt = gimple_build_assign (endvar, t1);
   gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    expand_omp_for_init_vars (fd, &gsi, counts, NULL, inner_stmt, startvar);
 
   /* Remove the GIMPLE_OMP_FOR statement.  */
   gsi = gsi_for_stmt (for_stmt);
@@ -5257,8 +7276,20 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
        assign_stmt = gimple_build_assign (fd->loop.v, NOP_EXPR, e);
       gsi_insert_after (&gsi, assign_stmt, GSI_CONTINUE_LINKING);
     }
+
+  tree *nonrect_bounds = NULL;
   if (fd->collapse > 1)
-    expand_omp_for_init_vars (fd, &gsi, counts, inner_stmt, startvar);
+    {
+      if (fd->non_rect)
+       {
+         nonrect_bounds = XALLOCAVEC (tree, fd->last_nonrect + 1);
+         memset (nonrect_bounds, 0, sizeof (tree) * (fd->last_nonrect + 1));
+       }
+      gcc_assert (gsi_bb (gsi) == entry_bb);
+      expand_omp_for_init_vars (fd, &gsi, counts, nonrect_bounds, inner_stmt,
+                               startvar);
+      entry_bb = gsi_bb (gsi);
+    }
 
   if (!broken_loop)
     {
@@ -5293,7 +7324,8 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
       gsi_remove (&gsi, true);
 
       if (fd->collapse > 1 && !gimple_omp_for_combined_p (fd->for_stmt))
-       collapse_bb = extract_omp_for_update_vars (fd, cont_bb, body_bb);
+       collapse_bb = extract_omp_for_update_vars (fd, nonrect_bounds,
+                                                  cont_bb, body_bb);
     }
 
   /* Remove the GIMPLE_OMP_FOR statement.  */
@@ -5341,7 +7373,7 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
 
   if (!broken_loop && !gimple_omp_for_combined_p (fd->for_stmt))
     {
-      struct loop *loop = alloc_loop ();
+      class loop *loop = alloc_loop ();
       loop->header = body_bb;
       if (collapse_bb == NULL)
        loop->latch = cont_bb;
@@ -5396,6 +7428,21 @@ expand_omp_taskloop_for_inner (struct omp_region *region,
 static void
 expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
 {
+  bool is_oacc_kernels_parallelized
+    = (lookup_attribute ("oacc kernels parallelized",
+                        DECL_ATTRIBUTES (current_function_decl)) != NULL);
+  {
+    bool is_oacc_kernels
+      = (lookup_attribute ("oacc kernels",
+                          DECL_ATTRIBUTES (current_function_decl)) != NULL);
+    if (is_oacc_kernels_parallelized)
+      gcc_checking_assert (is_oacc_kernels);
+  }
+  gcc_assert (gimple_in_ssa_p (cfun) == is_oacc_kernels_parallelized);
+  /* In the following, some of the 'gimple_in_ssa_p (cfun)' conditionals are
+     for SSA specifics, and some are for 'parloops' OpenACC
+     'kernels'-parallelized specifics.  */
+
   tree v = fd->loop.v;
   enum tree_code cond_code = fd->loop.cond_code;
   enum tree_code plus_code = PLUS_EXPR;
@@ -5417,6 +7464,12 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
       plus_code = POINTER_PLUS_EXPR;
       plus_type = sizetype;
     }
+  for (int ix = fd->collapse; ix--;)
+    {
+      tree diff_type2 = TREE_TYPE (fd->loops[ix].step);
+      if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (diff_type2))
+       diff_type = diff_type2;
+    }
   if (POINTER_TYPE_P (diff_type) || TYPE_UNSIGNED (diff_type))
     diff_type = signed_type_for (diff_type);
   if (TYPE_PRECISION (diff_type) < TYPE_PRECISION (integer_type_node))
@@ -5427,8 +7480,8 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
   basic_block cont_bb = region->cont; /* BB ending in OMP_CONTINUE  */
   basic_block bottom_bb = NULL;
 
-  /* entry_bb has two sucessors; the branch edge is to the exit
-     block,  fallthrough edge to body.  */
+  /* entry_bb has two successors; the branch edge is to the exit
+     block, fallthrough edge to body.  */
   gcc_assert (EDGE_COUNT (entry_bb->succs) == 2
              && BRANCH_EDGE (entry_bb)->dest == exit_bb);
 
@@ -5500,7 +7553,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
     {
       gcc_assert (!gimple_in_ssa_p (cfun) && up);
       counts = XALLOCAVEC (struct oacc_collapse, fd->collapse);
-      tree total = expand_oacc_collapse_init (fd, &gsi, counts,
+      tree total = expand_oacc_collapse_init (fd, &gsi, counts, diff_type,
                                              TREE_TYPE (fd->loop.n2), loc);
 
       if (SSA_VAR_P (fd->loop.n2))
@@ -5662,7 +7715,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
       gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
 
       if (fd->collapse > 1 || fd->tiling)
-       expand_oacc_collapse_vars (fd, false, &gsi, counts, v);
+       expand_oacc_collapse_vars (fd, false, &gsi, counts, v, diff_type);
 
       if (fd->tiling)
        {
@@ -5732,7 +7785,8 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
 
          /* Initialize the user's loop vars.  */
          gsi = gsi_start_bb (elem_body_bb);
-         expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset);
+         expand_oacc_collapse_vars (fd, true, &gsi, counts, e_offset,
+                                    diff_type);
        }
     }
 
@@ -5858,12 +7912,12 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
     {
       /* We now have one, two or three nested loops.  Update the loop
         structures.  */
-      struct loop *parent = entry_bb->loop_father;
-      struct loop *body = body_bb->loop_father;
+      class loop *parent = entry_bb->loop_father;
+      class loop *body = body_bb->loop_father;
 
       if (chunking)
        {
-         struct loop *chunk_loop = alloc_loop ();
+         class loop *chunk_loop = alloc_loop ();
          chunk_loop->header = head_bb;
          chunk_loop->latch = bottom_bb;
          add_loop (chunk_loop, parent);
@@ -5879,7 +7933,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
 
       if (parent)
        {
-         struct loop *body_loop = alloc_loop ();
+         class loop *body_loop = alloc_loop ();
          body_loop->header = body_bb;
          body_loop->latch = cont_bb;
          add_loop (body_loop, parent);
@@ -5887,7 +7941,7 @@ expand_oacc_for (struct omp_region *region, struct omp_for_data *fd)
          if (fd->tiling)
            {
              /* Insert tiling's element loop.  */
-             struct loop *inner_loop = alloc_loop ();
+             class loop *inner_loop = alloc_loop ();
              inner_loop->header = elem_body_bb;
              inner_loop->latch = elem_cont_bb;
              add_loop (inner_loop, body_loop);
@@ -5904,14 +7958,55 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
   struct omp_for_data fd;
   struct omp_for_data_loop *loops;
 
-  loops
-    = (struct omp_for_data_loop *)
-      alloca (gimple_omp_for_collapse (last_stmt (region->entry))
-             * sizeof (struct omp_for_data_loop));
+  loops = XALLOCAVEC (struct omp_for_data_loop,
+                     gimple_omp_for_collapse (last_stmt (region->entry)));
   omp_extract_for_data (as_a <gomp_for *> (last_stmt (region->entry)),
                        &fd, loops);
   region->sched_kind = fd.sched_kind;
   region->sched_modifiers = fd.sched_modifiers;
+  region->has_lastprivate_conditional = fd.lastprivate_conditional != 0;
+  if (fd.non_rect && !gimple_omp_for_combined_into_p (fd.for_stmt))
+    {
+      for (int i = fd.first_nonrect; i <= fd.last_nonrect; i++)
+       if ((loops[i].m1 || loops[i].m2)
+           && (loops[i].m1 == NULL_TREE
+               || TREE_CODE (loops[i].m1) == INTEGER_CST)
+           && (loops[i].m2 == NULL_TREE
+               || TREE_CODE (loops[i].m2) == INTEGER_CST)
+           && TREE_CODE (loops[i].step) == INTEGER_CST
+           && TREE_CODE (loops[i - loops[i].outer].step) == INTEGER_CST)
+         {
+           tree t;
+           tree itype = TREE_TYPE (loops[i].v);
+           if (loops[i].m1 && loops[i].m2)
+             t = fold_build2 (MINUS_EXPR, itype, loops[i].m2, loops[i].m1);
+           else if (loops[i].m1)
+             t = fold_build1 (NEGATE_EXPR, itype, loops[i].m1);
+           else
+             t = loops[i].m2;
+           t = fold_build2 (MULT_EXPR, itype, t,
+                            fold_convert (itype,
+                                          loops[i - loops[i].outer].step));
+           if (TYPE_UNSIGNED (itype) && loops[i].cond_code == GT_EXPR)
+             t = fold_build2 (TRUNC_MOD_EXPR, itype,
+                              fold_build1 (NEGATE_EXPR, itype, t),
+                              fold_build1 (NEGATE_EXPR, itype,
+                                           fold_convert (itype,
+                                                         loops[i].step)));
+           else
+             t = fold_build2 (TRUNC_MOD_EXPR, itype, t,
+                              fold_convert (itype, loops[i].step));
+           if (integer_nonzerop (t))
+             error_at (gimple_location (fd.for_stmt),
+                       "invalid OpenMP non-rectangular loop step; "
+                       "%<(%E - %E) * %E%> is not a multiple of loop %d "
+                       "step %qE",
+                       loops[i].m2 ? loops[i].m2 : integer_zero_node,
+                       loops[i].m1 ? loops[i].m1 : integer_zero_node,
+                       loops[i - loops[i].outer].step, i + 1,
+                       loops[i].step);
+         }
+    }
 
   gcc_assert (EDGE_COUNT (region->entry->succs) == 2);
   BRANCH_EDGE (region->entry)->flags &= ~EDGE_ABNORMAL;
@@ -5928,11 +8023,11 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
        original loops from being detected.  Fix that up.  */
     loops_state_set (LOOPS_NEED_FIXUP);
 
-  if (gimple_omp_for_kind (fd.for_stmt) & GF_OMP_FOR_SIMD)
+  if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_SIMD)
     expand_omp_simd (region, &fd);
   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_OACC_LOOP)
     {
-      gcc_assert (!inner_stmt);
+      gcc_assert (!inner_stmt && !fd.non_rect);
       expand_oacc_for (region, &fd);
     }
   else if (gimple_omp_for_kind (fd.for_stmt) == GF_OMP_FOR_KIND_TASKLOOP)
@@ -5957,21 +8052,23 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
       tree sched_arg = NULL_TREE;
 
       gcc_assert (gimple_omp_for_kind (fd.for_stmt)
-                 == GF_OMP_FOR_KIND_FOR);
+                 == GF_OMP_FOR_KIND_FOR && !fd.non_rect);
       if (fd.chunk_size == NULL
          && fd.sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
        fd.chunk_size = integer_zero_node;
       switch (fd.sched_kind)
        {
        case OMP_CLAUSE_SCHEDULE_RUNTIME:
-         if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0)
+         if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_NONMONOTONIC) != 0
+             && fd.lastprivate_conditional == 0)
            {
              gcc_assert (!fd.have_ordered);
              fn_index = 6;
              sched = 4;
            }
          else if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
-                  && !fd.have_ordered)
+                  && !fd.have_ordered
+                  && fd.lastprivate_conditional == 0)
            fn_index = 7;
          else
            {
@@ -5982,7 +8079,8 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
        case OMP_CLAUSE_SCHEDULE_DYNAMIC:
        case OMP_CLAUSE_SCHEDULE_GUIDED:
          if ((fd.sched_modifiers & OMP_CLAUSE_SCHEDULE_MONOTONIC) == 0
-             && !fd.have_ordered)
+             && !fd.have_ordered
+             && fd.lastprivate_conditional == 0)
            {
              fn_index = 3 + fd.sched_kind;
              sched = (fd.sched_kind == OMP_CLAUSE_SCHEDULE_GUIDED) + 2;
@@ -6007,7 +8105,7 @@ expand_omp_for (struct omp_region *region, gimple *inner_stmt)
       else
        start_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_START) + fn_index;
       next_ix = ((int)BUILT_IN_GOMP_LOOP_STATIC_NEXT) + fn_index;
-      if (fd.have_reductemp)
+      if (fd.have_reductemp || fd.have_pointer_condtemp)
        {
          if (fd.ordered)
            start_ix = (int)BUILT_IN_GOMP_LOOP_DOACROSS_START;
@@ -6130,21 +8228,62 @@ expand_omp_sections (struct omp_region *region)
   vin = gimple_omp_sections_control (sections_stmt);
   tree clauses = gimple_omp_sections_clauses (sections_stmt);
   tree reductmp = omp_find_clause (clauses, OMP_CLAUSE__REDUCTEMP_);
-  if (reductmp)
-    {
-      tree reductions = OMP_CLAUSE_DECL (reductmp);
-      gcc_assert (TREE_CODE (reductions) == SSA_NAME);
-      gimple *g = SSA_NAME_DEF_STMT (reductions);
-      reductions = gimple_assign_rhs1 (g);
-      OMP_CLAUSE_DECL (reductmp) = reductions;
-      gimple_stmt_iterator gsi = gsi_for_stmt (g);
+  tree condtmp = omp_find_clause (clauses, OMP_CLAUSE__CONDTEMP_);
+  tree cond_var = NULL_TREE;
+  if (reductmp || condtmp)
+    {
+      tree reductions = null_pointer_node, mem = null_pointer_node;
+      tree memv = NULL_TREE, condtemp = NULL_TREE;
+      gimple_stmt_iterator gsi = gsi_none ();
+      gimple *g = NULL;
+      if (reductmp)
+       {
+         reductions = OMP_CLAUSE_DECL (reductmp);
+         gcc_assert (TREE_CODE (reductions) == SSA_NAME);
+         g = SSA_NAME_DEF_STMT (reductions);
+         reductions = gimple_assign_rhs1 (g);
+         OMP_CLAUSE_DECL (reductmp) = reductions;
+         gsi = gsi_for_stmt (g);
+       }
+      else
+       gsi = si;
+      if (condtmp)
+       {
+         condtemp = OMP_CLAUSE_DECL (condtmp);
+         tree c = omp_find_clause (OMP_CLAUSE_CHAIN (condtmp),
+                                   OMP_CLAUSE__CONDTEMP_);
+         cond_var = OMP_CLAUSE_DECL (c);
+         tree type = TREE_TYPE (condtemp);
+         memv = create_tmp_var (type);
+         TREE_ADDRESSABLE (memv) = 1;
+         unsigned cnt = 0;
+         for (c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
+           if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE
+               && OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (c))
+             ++cnt;
+         unsigned HOST_WIDE_INT sz
+           = tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))) * cnt;
+         expand_omp_build_assign (&gsi, memv, build_int_cst (type, sz),
+                                  false);
+         mem = build_fold_addr_expr (memv);
+       }
       t = build_int_cst (unsigned_type_node, len - 1);
       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS2_START);
-      stmt = gimple_build_call (u, 3, t, reductions, null_pointer_node);
+      stmt = gimple_build_call (u, 3, t, reductions, mem);
       gimple_call_set_lhs (stmt, vin);
       gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
-      gsi_remove (&gsi, true);
-      release_ssa_name (gimple_assign_lhs (g));
+      if (condtmp)
+       {
+         expand_omp_build_assign (&gsi, condtemp, memv, false);
+         tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
+                          vin, build_one_cst (TREE_TYPE (cond_var)));
+         expand_omp_build_assign (&gsi, cond_var, t, false);
+       }
+      if (reductmp)
+       {
+         gsi_remove (&gsi, true);
+         release_ssa_name (gimple_assign_lhs (g));
+       }
     }
   else if (!is_combined_parallel (region))
     {
@@ -6160,7 +8299,7 @@ expand_omp_sections (struct omp_region *region)
       u = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
       stmt = gimple_build_call (u, 0);
     }
-  if (!reductmp)
+  if (!reductmp && !condtmp)
     {
       gimple_call_set_lhs (stmt, vin);
       gsi_insert_after (&si, stmt, GSI_SAME_STMT);
@@ -6252,7 +8391,13 @@ expand_omp_sections (struct omp_region *region)
       bfn_decl = builtin_decl_explicit (BUILT_IN_GOMP_SECTIONS_NEXT);
       stmt = gimple_build_call (bfn_decl, 0);
       gimple_call_set_lhs (stmt, vnext);
-      gsi_insert_after (&si, stmt, GSI_SAME_STMT);
+      gsi_insert_before (&si, stmt, GSI_SAME_STMT);
+      if (cond_var)
+       {
+         tree t = build2 (PLUS_EXPR, TREE_TYPE (cond_var),
+                          vnext, build_one_cst (TREE_TYPE (cond_var)));
+         expand_omp_build_assign (&si, cond_var, t, false);
+       }
       gsi_remove (&si, true);
 
       single_succ_edge (l1_bb)->flags = EDGE_FALLTHRU;
@@ -6275,7 +8420,7 @@ expand_omp_sections (struct omp_region *region)
   set_immediate_dominator (CDI_DOMINATORS, default_bb, l0_bb);
 }
 
-/* Expand code for an OpenMP single directive.  We've already expanded
+/* Expand code for an OpenMP single or scope directive.  We've already expanded
    much of the code, here we simply place the GOMP_barrier call.  */
 
 static void
@@ -6288,7 +8433,8 @@ expand_omp_single (struct omp_region *region)
   exit_bb = region->exit;
 
   si = gsi_last_nondebug_bb (entry_bb);
-  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE);
+  gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
+             || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SCOPE);
   gsi_remove (&si, true);
   single_succ_edge (entry_bb)->flags = EDGE_FALLTHRU;
 
@@ -6318,6 +8464,7 @@ expand_omp_synch (struct omp_region *region)
   si = gsi_last_nondebug_bb (entry_bb);
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_SINGLE
              || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASTER
+             || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_MASKED
              || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_TASKGROUP
              || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ORDERED
              || gimple_code (gsi_stmt (si)) == GIMPLE_OMP_CRITICAL
@@ -6340,22 +8487,58 @@ expand_omp_synch (struct omp_region *region)
     }
 }
 
+/* Translate enum omp_memory_order to enum memmodel for the embedded
+   fail clause in there.  */
+
+static enum memmodel
+omp_memory_order_to_fail_memmodel (enum omp_memory_order mo)
+{
+  switch (mo & OMP_FAIL_MEMORY_ORDER_MASK)
+    {
+    case OMP_FAIL_MEMORY_ORDER_UNSPECIFIED:
+      switch (mo & OMP_MEMORY_ORDER_MASK)
+       {
+       case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
+       case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
+       case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELAXED;
+       case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQUIRE;
+       case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
+       default: break;
+       }
+      gcc_unreachable ();
+    case OMP_FAIL_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
+    case OMP_FAIL_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
+    case OMP_FAIL_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
+    default: gcc_unreachable ();
+    }
+}
+
 /* Translate enum omp_memory_order to enum memmodel.  The two enums
    are using different numbers so that OMP_MEMORY_ORDER_UNSPECIFIED
-   is 0.  */
+   is 0 and omp_memory_order has the fail mode encoded in it too.  */
 
 static enum memmodel
 omp_memory_order_to_memmodel (enum omp_memory_order mo)
 {
-  switch (mo)
-    {
-    case OMP_MEMORY_ORDER_RELAXED: return MEMMODEL_RELAXED;
-    case OMP_MEMORY_ORDER_ACQUIRE: return MEMMODEL_ACQUIRE;
-    case OMP_MEMORY_ORDER_RELEASE: return MEMMODEL_RELEASE;
-    case OMP_MEMORY_ORDER_ACQ_REL: return MEMMODEL_ACQ_REL;
-    case OMP_MEMORY_ORDER_SEQ_CST: return MEMMODEL_SEQ_CST;
+  enum memmodel ret, fail_ret;
+  switch (mo & OMP_MEMORY_ORDER_MASK)
+    {
+    case OMP_MEMORY_ORDER_RELAXED: ret = MEMMODEL_RELAXED; break;
+    case OMP_MEMORY_ORDER_ACQUIRE: ret = MEMMODEL_ACQUIRE; break;
+    case OMP_MEMORY_ORDER_RELEASE: ret = MEMMODEL_RELEASE; break;
+    case OMP_MEMORY_ORDER_ACQ_REL: ret = MEMMODEL_ACQ_REL; break;
+    case OMP_MEMORY_ORDER_SEQ_CST: ret = MEMMODEL_SEQ_CST; break;
     default: gcc_unreachable ();
     }
+  /* If we drop the -Winvalid-memory-model warning for C++17 P0418R2,
+     we can just return ret here unconditionally.  Otherwise, work around
+     it here and make sure fail memmodel is not stronger.  */
+  if ((mo & OMP_FAIL_MEMORY_ORDER_MASK) == OMP_FAIL_MEMORY_ORDER_UNSPECIFIED)
+    return ret;
+  fail_ret = omp_memory_order_to_fail_memmodel (mo);
+  if (fail_ret > ret)
+    return fail_ret;
+  return ret;
 }
 
 /* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
@@ -6601,35 +8784,290 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   gsi = gsi_last_nondebug_bb (load_bb);
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_LOAD);
 
-  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
-     It only requires that the operation happen atomically.  Thus we can
-     use the RELAXED memory model.  */
-  call = build_call_expr_loc (loc, decl, 3, addr,
-                             fold_convert_loc (loc, itype, rhs),
-                             build_int_cst (NULL, mo));
+  /* OpenMP does not imply any barrier-like semantics on its atomic ops.
+     It only requires that the operation happen atomically.  Thus we can
+     use the RELAXED memory model.  */
+  call = build_call_expr_loc (loc, decl, 3, addr,
+                             fold_convert_loc (loc, itype, rhs),
+                             build_int_cst (NULL, mo));
+
+  if (need_old || need_new)
+    {
+      lhs = need_old ? loaded_val : stored_val;
+      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
+      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
+    }
+  else
+    call = fold_convert_loc (loc, void_type_node, call);
+  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
+  gsi_remove (&gsi, true);
+
+  gsi = gsi_last_nondebug_bb (store_bb);
+  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
+  gsi_remove (&gsi, true);
+  gsi = gsi_last_nondebug_bb (store_bb);
+  stmt = gsi_stmt (gsi);
+  gsi_remove (&gsi, true);
+
+  if (gimple_in_ssa_p (cfun))
+    {
+      release_defs (stmt);
+      update_ssa (TODO_update_ssa_no_phi);
+    }
+
+  return true;
+}
+
+/* A subroutine of expand_omp_atomic.  Attempt to implement the atomic
+   compare and exchange as an ATOMIC_COMPARE_EXCHANGE internal function.
+   Returns false if the expression is not of the proper form.  */
+
+static bool
+expand_omp_atomic_cas (basic_block load_bb, tree addr,
+                      tree loaded_val, tree stored_val, int index)
+{
+  /* We expect to find the following sequences:
+
+   load_bb:
+       GIMPLE_OMP_ATOMIC_LOAD (tmp, mem)
+
+   store_bb:
+       val = tmp == e ? d : tmp;
+       GIMPLE_OMP_ATOMIC_STORE (val)
+
+     or in store_bb instead:
+       tmp2 = tmp == e;
+       val = tmp2 ? d : tmp;
+       GIMPLE_OMP_ATOMIC_STORE (val)
+
+     or:
+       tmp3 = VIEW_CONVERT_EXPR<integral_type>(tmp);
+       val = e == tmp3 ? d : tmp;
+       GIMPLE_OMP_ATOMIC_STORE (val)
+
+     etc.  */
+
+
+  basic_block store_bb = single_succ (load_bb);
+  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (store_bb);
+  gimple *store_stmt = gsi_stmt (gsi);
+  if (!store_stmt || gimple_code (store_stmt) != GIMPLE_OMP_ATOMIC_STORE)
+    return false;
+  gsi_prev_nondebug (&gsi);
+  if (gsi_end_p (gsi))
+    return false;
+  gimple *condexpr_stmt = gsi_stmt (gsi);
+  if (!is_gimple_assign (condexpr_stmt)
+      || gimple_assign_rhs_code (condexpr_stmt) != COND_EXPR)
+    return false;
+  if (!operand_equal_p (gimple_assign_lhs (condexpr_stmt), stored_val, 0))
+    return false;
+  gimple *cond_stmt = NULL;
+  gimple *vce_stmt = NULL;
+  gsi_prev_nondebug (&gsi);
+  if (!gsi_end_p (gsi))
+    {
+      cond_stmt = gsi_stmt (gsi);
+      if (!is_gimple_assign (cond_stmt))
+       return false;
+      if (gimple_assign_rhs_code (cond_stmt) == EQ_EXPR)
+       {
+         gsi_prev_nondebug (&gsi);
+         if (!gsi_end_p (gsi))
+           {
+             vce_stmt = gsi_stmt (gsi);
+             if (!is_gimple_assign (vce_stmt)
+                 || gimple_assign_rhs_code (vce_stmt) != VIEW_CONVERT_EXPR)
+               return false;
+           }
+       }
+      else if (gimple_assign_rhs_code (cond_stmt) == VIEW_CONVERT_EXPR)
+       std::swap (vce_stmt, cond_stmt);
+      else
+       return false;
+      if (vce_stmt)
+       {
+         tree vce_rhs = gimple_assign_rhs1 (vce_stmt);
+         if (TREE_CODE (vce_rhs) != VIEW_CONVERT_EXPR
+             || !operand_equal_p (TREE_OPERAND (vce_rhs, 0), loaded_val))
+           return false;
+         if (!INTEGRAL_TYPE_P (TREE_TYPE (vce_rhs))
+             || !SCALAR_FLOAT_TYPE_P (TREE_TYPE (loaded_val))
+             || !tree_int_cst_equal (TYPE_SIZE (TREE_TYPE (vce_rhs)),
+                                     TYPE_SIZE (TREE_TYPE (loaded_val))))
+           return false;
+         gsi_prev_nondebug (&gsi);
+         if (!gsi_end_p (gsi))
+           return false;
+       }
+    }
+  tree cond = gimple_assign_rhs1 (condexpr_stmt);
+  tree cond_op1, cond_op2;
+  if (cond_stmt)
+    {
+      if (!operand_equal_p (cond, gimple_assign_lhs (cond_stmt)))
+       return false;
+      cond_op1 = gimple_assign_rhs1 (cond_stmt);
+      cond_op2 = gimple_assign_rhs2 (cond_stmt);
+    }
+  else if (TREE_CODE (cond) != EQ_EXPR && TREE_CODE (cond) != NE_EXPR)
+    return false;
+  else
+    {
+      cond_op1 = TREE_OPERAND (cond, 0);
+      cond_op2 = TREE_OPERAND (cond, 1);
+    }
+  tree d;
+  if (TREE_CODE (cond) == NE_EXPR)
+    {
+      if (!operand_equal_p (gimple_assign_rhs2 (condexpr_stmt), loaded_val))
+       return false;
+      d = gimple_assign_rhs3 (condexpr_stmt);
+    }
+  else if (!operand_equal_p (gimple_assign_rhs3 (condexpr_stmt), loaded_val))
+    return false;
+  else
+    d = gimple_assign_rhs2 (condexpr_stmt);
+  tree e = vce_stmt ? gimple_assign_lhs (vce_stmt) : loaded_val;
+  if (operand_equal_p (e, cond_op1))
+    e = cond_op2;
+  else if (operand_equal_p (e, cond_op2))
+    e = cond_op1;
+  else
+    return false;
+
+  location_t loc = gimple_location (store_stmt);
+  gimple *load_stmt = last_stmt (load_bb);
+  bool need_new = gimple_omp_atomic_need_value_p (store_stmt);
+  bool need_old = gimple_omp_atomic_need_value_p (load_stmt);
+  bool weak = gimple_omp_atomic_weak_p (load_stmt);
+  enum omp_memory_order omo = gimple_omp_atomic_memory_order (load_stmt);
+  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
+  tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
+  gcc_checking_assert (!need_old || !need_new);
+
+  enum built_in_function fncode
+    = (enum built_in_function) ((int) BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
+                               + index + 1);
+  tree cmpxchg = builtin_decl_explicit (fncode);
+  if (cmpxchg == NULL_TREE)
+    return false;
+  tree itype = TREE_TYPE (TREE_TYPE (cmpxchg));
+
+  if (!can_compare_and_swap_p (TYPE_MODE (itype), true)
+      || !can_atomic_load_p (TYPE_MODE (itype)))
+    return false;
+
+  tree type = TYPE_MAIN_VARIANT (TREE_TYPE (loaded_val));
+  if (SCALAR_FLOAT_TYPE_P (type) && !vce_stmt)
+    return false;
+
+  gsi = gsi_for_stmt (store_stmt);
+  if (!useless_type_conversion_p (itype, TREE_TYPE (e)))
+    {
+      tree ne = create_tmp_reg (itype);
+      gimple *g = gimple_build_assign (ne, NOP_EXPR, e);
+      gimple_set_location (g, loc);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      e = ne;
+    }
+  if (!useless_type_conversion_p (itype, TREE_TYPE (d)))
+    {
+      tree nd = create_tmp_reg (itype);
+      enum tree_code code;
+      if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (d)))
+       {
+         code = VIEW_CONVERT_EXPR;
+         d = build1 (VIEW_CONVERT_EXPR, itype, d);
+       }
+      else
+       code = NOP_EXPR;
+      gimple *g = gimple_build_assign (nd, code, d);
+      gimple_set_location (g, loc);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+      d = nd;
+    }
+
+  tree ctype = build_complex_type (itype);
+  int flag = int_size_in_bytes (itype) + (weak ? 256 : 0);
+  gimple *g
+    = gimple_build_call_internal (IFN_ATOMIC_COMPARE_EXCHANGE, 6, addr, e, d,
+                                 build_int_cst (integer_type_node, flag),
+                                 mo, fmo);
+  tree cres = create_tmp_reg (ctype);
+  gimple_call_set_lhs (g, cres);
+  gimple_set_location (g, loc);
+  gsi_insert_before (&gsi, g, GSI_SAME_STMT);
 
-  if (need_old || need_new)
+  if (cond_stmt || need_old || need_new)
     {
-      lhs = need_old ? loaded_val : stored_val;
-      call = fold_convert_loc (loc, TREE_TYPE (lhs), call);
-      call = build2_loc (loc, MODIFY_EXPR, void_type_node, lhs, call);
+      tree im = create_tmp_reg (itype);
+      g = gimple_build_assign (im, IMAGPART_EXPR,
+                              build1 (IMAGPART_EXPR, itype, cres));
+      gimple_set_location (g, loc);
+      gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+
+      tree re = NULL_TREE;
+      if (need_old || need_new)
+       {
+         re = create_tmp_reg (itype);
+         g = gimple_build_assign (re, REALPART_EXPR,
+                                  build1 (REALPART_EXPR, itype, cres));
+         gimple_set_location (g, loc);
+         gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+       }
+
+      if (cond_stmt)
+       {
+         g = gimple_build_assign (gimple_assign_lhs (cond_stmt),
+                                  NOP_EXPR, im);
+         gimple_set_location (g, loc);
+         gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+       }
+      else if (need_new)
+       {
+         g = gimple_build_assign (create_tmp_reg (itype), COND_EXPR,
+                                  build2 (NE_EXPR, boolean_type_node,
+                                          im, build_zero_cst (itype)),
+                                  d, re);
+         gimple_set_location (g, loc);
+         gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+         re = gimple_assign_lhs (g);
+       }
+
+      if (need_old || need_new)
+       {
+         tree v = need_old ? loaded_val : stored_val;
+         enum tree_code code;
+         if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (v)))
+           {
+             code = VIEW_CONVERT_EXPR;
+             re = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (v), re);
+           }
+         else if (!useless_type_conversion_p (TREE_TYPE (v), itype))
+           code = NOP_EXPR;
+         else
+           code = TREE_CODE (re);
+         g = gimple_build_assign (v, code, re);
+         gimple_set_location (g, loc);
+         gsi_insert_before (&gsi, g, GSI_SAME_STMT);
+       }
     }
-  else
-    call = fold_convert_loc (loc, void_type_node, call);
-  force_gimple_operand_gsi (&gsi, call, true, NULL_TREE, true, GSI_SAME_STMT);
-  gsi_remove (&gsi, true);
 
-  gsi = gsi_last_nondebug_bb (store_bb);
-  gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
   gsi_remove (&gsi, true);
-  gsi = gsi_last_nondebug_bb (store_bb);
-  stmt = gsi_stmt (gsi);
+  gsi = gsi_for_stmt (load_stmt);
   gsi_remove (&gsi, true);
-
-  if (gimple_in_ssa_p (cfun))
+  gsi = gsi_for_stmt (condexpr_stmt);
+  gsi_remove (&gsi, true);
+  if (cond_stmt)
     {
-      release_defs (stmt);
-      update_ssa (TODO_update_ssa_no_phi);
+      gsi = gsi_for_stmt (cond_stmt);
+      gsi_remove (&gsi, true);
+    }
+  if (vce_stmt)
+    {
+      gsi = gsi_for_stmt (vce_stmt);
+      gsi_remove (&gsi, true);
     }
 
   return true;
@@ -6660,8 +9098,6 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   edge e;
   enum built_in_function fncode;
 
-  /* ??? We need a non-pointer interface to __atomic_compare_exchange in
-     order to use the RELAXED memory model effectively.  */
   fncode = (enum built_in_function)((int)BUILT_IN_SYNC_VAL_COMPARE_AND_SWAP_N
                                    + index + 1);
   cmpxchg = builtin_decl_explicit (fncode);
@@ -6678,6 +9114,10 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   /* Load the initial value, replacing the GIMPLE_OMP_ATOMIC_LOAD.  */
   si = gsi_last_nondebug_bb (load_bb);
   gcc_assert (gimple_code (gsi_stmt (si)) == GIMPLE_OMP_ATOMIC_LOAD);
+  location_t loc = gimple_location (gsi_stmt (si));
+  enum omp_memory_order omo = gimple_omp_atomic_memory_order (gsi_stmt (si));
+  tree mo = build_int_cst (NULL, omp_memory_order_to_memmodel (omo));
+  tree fmo = build_int_cst (NULL, omp_memory_order_to_fail_memmodel (omo));
 
   /* For floating-point values, we'll need to view-convert them to integers
      so that we can perform the atomic compare and swap.  Simplify the
@@ -6774,7 +9214,15 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
                                  GSI_SAME_STMT);
 
   /* Build the compare&swap statement.  */
-  new_storedi = build_call_expr (cmpxchg, 3, iaddr, loadedi, storedi);
+  tree ctype = build_complex_type (itype);
+  int flag = int_size_in_bytes (itype);
+  new_storedi = build_call_expr_internal_loc (loc, IFN_ATOMIC_COMPARE_EXCHANGE,
+                                             ctype, 6, iaddr, loadedi,
+                                             storedi,
+                                             build_int_cst (integer_type_node,
+                                                            flag),
+                                             mo, fmo);
+  new_storedi = build1 (REALPART_EXPR, itype, new_storedi);
   new_storedi = force_gimple_operand_gsi (&si,
                                          fold_convert (TREE_TYPE (loadedi),
                                                        new_storedi),
@@ -6821,7 +9269,7 @@ expand_omp_atomic_pipeline (basic_block load_bb, basic_block store_bb,
   /* Remove GIMPLE_OMP_ATOMIC_STORE.  */
   gsi_remove (&si, true);
 
-  struct loop *loop = alloc_loop ();
+  class loop *loop = alloc_loop ();
   loop->header = loop_header;
   loop->latch = store_bb;
   add_loop (loop, loop_header->loop_father);
@@ -6952,6 +9400,13 @@ expand_omp_atomic (struct omp_region *region)
                                             loaded_val, stored_val, index))
            return;
 
+         /* When possible, use ATOMIC_COMPARE_EXCHANGE ifn without a loop.  */
+         if (store_bb == single_succ (load_bb)
+             && !gimple_in_ssa_p (cfun)
+             && expand_omp_atomic_cas (load_bb, addr, loaded_val, stored_val,
+                                       index))
+           return;
+
          /* If we don't have specialized __sync builtins, try and implement
             as a compare and swap loop.  */
          if (expand_omp_atomic_pipeline (load_bb, store_bb, addr,
@@ -6971,14 +9426,14 @@ static void
 mark_loops_in_oacc_kernels_region (basic_block region_entry,
                                   basic_block region_exit)
 {
-  struct loop *outer = region_entry->loop_father;
+  class loop *outer = region_entry->loop_father;
   gcc_assert (region_exit == NULL || outer == region_exit->loop_father);
 
   /* Don't parallelize the kernels region if it contains more than one outer
      loop.  */
   unsigned int nr_outer_loops = 0;
-  struct loop *single_outer = NULL;
-  for (struct loop *loop = outer->inner; loop != NULL; loop = loop->next)
+  class loop *single_outer = NULL;
+  for (class loop *loop = outer->inner; loop != NULL; loop = loop->next)
     {
       gcc_assert (loop_outer (loop) == outer);
 
@@ -6995,124 +9450,17 @@ mark_loops_in_oacc_kernels_region (basic_block region_entry,
   if (nr_outer_loops != 1)
     return;
 
-  for (struct loop *loop = single_outer->inner;
+  for (class loop *loop = single_outer->inner;
        loop != NULL;
        loop = loop->inner)
     if (loop->next)
       return;
 
   /* Mark the loops in the region.  */
-  for (struct loop *loop = single_outer; loop != NULL; loop = loop->inner)
+  for (class loop *loop = single_outer; loop != NULL; loop = loop->inner)
     loop->in_oacc_kernels_region = true;
 }
 
-/* Types used to pass grid and wortkgroup sizes to kernel invocation.  */
-
-struct GTY(()) grid_launch_attributes_trees
-{
-  tree kernel_dim_array_type;
-  tree kernel_lattrs_dimnum_decl;
-  tree kernel_lattrs_grid_decl;
-  tree kernel_lattrs_group_decl;
-  tree kernel_launch_attributes_type;
-};
-
-static GTY(()) struct grid_launch_attributes_trees *grid_attr_trees;
-
-/* Create types used to pass kernel launch attributes to target.  */
-
-static void
-grid_create_kernel_launch_attr_types (void)
-{
-  if (grid_attr_trees)
-    return;
-  grid_attr_trees = ggc_alloc <grid_launch_attributes_trees> ();
-
-  tree dim_arr_index_type
-    = build_index_type (build_int_cst (integer_type_node, 2));
-  grid_attr_trees->kernel_dim_array_type
-    = build_array_type (uint32_type_node, dim_arr_index_type);
-
-  grid_attr_trees->kernel_launch_attributes_type = make_node (RECORD_TYPE);
-  grid_attr_trees->kernel_lattrs_dimnum_decl
-    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("ndim"),
-                 uint32_type_node);
-  DECL_CHAIN (grid_attr_trees->kernel_lattrs_dimnum_decl) = NULL_TREE;
-
-  grid_attr_trees->kernel_lattrs_grid_decl
-    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("grid_size"),
-                 grid_attr_trees->kernel_dim_array_type);
-  DECL_CHAIN (grid_attr_trees->kernel_lattrs_grid_decl)
-    = grid_attr_trees->kernel_lattrs_dimnum_decl;
-  grid_attr_trees->kernel_lattrs_group_decl
-    = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("group_size"),
-                 grid_attr_trees->kernel_dim_array_type);
-  DECL_CHAIN (grid_attr_trees->kernel_lattrs_group_decl)
-    = grid_attr_trees->kernel_lattrs_grid_decl;
-  finish_builtin_struct (grid_attr_trees->kernel_launch_attributes_type,
-                        "__gomp_kernel_launch_attributes",
-                        grid_attr_trees->kernel_lattrs_group_decl, NULL_TREE);
-}
-
-/* Insert before the current statement in GSI a store of VALUE to INDEX of
-   array (of type kernel_dim_array_type) FLD_DECL of RANGE_VAR.  VALUE must be
-   of type uint32_type_node.  */
-
-static void
-grid_insert_store_range_dim (gimple_stmt_iterator *gsi, tree range_var,
-                            tree fld_decl, int index, tree value)
-{
-  tree ref = build4 (ARRAY_REF, uint32_type_node,
-                    build3 (COMPONENT_REF,
-                            grid_attr_trees->kernel_dim_array_type,
-                            range_var, fld_decl, NULL_TREE),
-                    build_int_cst (integer_type_node, index),
-                    NULL_TREE, NULL_TREE);
-  gsi_insert_before (gsi, gimple_build_assign (ref, value), GSI_SAME_STMT);
-}
-
-/* Return a tree representation of a pointer to a structure with grid and
-   work-group size information.  Statements filling that information will be
-   inserted before GSI, TGT_STMT is the target statement which has the
-   necessary information in it.  */
-
-static tree
-grid_get_kernel_launch_attributes (gimple_stmt_iterator *gsi,
-                                      gomp_target *tgt_stmt)
-{
-  grid_create_kernel_launch_attr_types ();
-  tree lattrs = create_tmp_var (grid_attr_trees->kernel_launch_attributes_type,
-                               "__kernel_launch_attrs");
-
-  unsigned max_dim = 0;
-  for (tree clause = gimple_omp_target_clauses (tgt_stmt);
-       clause;
-       clause = OMP_CLAUSE_CHAIN (clause))
-    {
-      if (OMP_CLAUSE_CODE (clause) != OMP_CLAUSE__GRIDDIM_)
-       continue;
-
-      unsigned dim = OMP_CLAUSE__GRIDDIM__DIMENSION (clause);
-      max_dim = MAX (dim, max_dim);
-
-      grid_insert_store_range_dim (gsi, lattrs,
-                                  grid_attr_trees->kernel_lattrs_grid_decl,
-                                  dim, OMP_CLAUSE__GRIDDIM__SIZE (clause));
-      grid_insert_store_range_dim (gsi, lattrs,
-                                  grid_attr_trees->kernel_lattrs_group_decl,
-                                  dim, OMP_CLAUSE__GRIDDIM__GROUP (clause));
-    }
-
-  tree dimref = build3 (COMPONENT_REF, uint32_type_node, lattrs,
-                       grid_attr_trees->kernel_lattrs_dimnum_decl, NULL_TREE);
-  gcc_checking_assert (max_dim <= 2);
-  tree dimensions = build_int_cstu (uint32_type_node, max_dim + 1);
-  gsi_insert_before (gsi, gimple_build_assign (dimref, dimensions),
-                    GSI_SAME_STMT);
-  TREE_ADDRESSABLE (lattrs) = 1;
-  return build_fold_addr_expr (lattrs);
-}
-
 /* Build target argument identifier from the DEVICE identifier, value
    identifier ID and whether the element also has a SUBSEQUENT_PARAM.  */
 
@@ -7203,16 +9551,6 @@ get_target_arguments (gimple_stmt_iterator *gsi, gomp_target *tgt_stmt)
                                           GOMP_TARGET_ARG_THREAD_LIMIT, t,
                                           &args);
 
-  /* Add HSA-specific grid sizes, if available.  */
-  if (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
-                      OMP_CLAUSE__GRIDDIM_))
-    {
-      int id = GOMP_TARGET_ARG_HSA_KERNEL_ATTRIBUTES;
-      t = get_target_argument_identifier (GOMP_DEVICE_HSA, true, id);
-      args.quick_push (t);
-      args.quick_push (grid_get_kernel_launch_attributes (gsi, tgt_stmt));
-    }
-
   /* Produce more, perhaps device specific, arguments here.  */
 
   tree argarray = create_tmp_var (build_array_type_nelts (ptr_type_node,
@@ -7247,13 +9585,15 @@ expand_omp_target (struct omp_region *region)
   gomp_target *entry_stmt;
   gimple *stmt;
   edge e;
-  bool offloaded, data_region;
+  bool offloaded;
+  int target_kind;
 
   entry_stmt = as_a <gomp_target *> (last_stmt (region->entry));
+  target_kind = gimple_omp_target_kind (entry_stmt);
   new_bb = region->entry;
 
   offloaded = is_gimple_omp_offloaded (entry_stmt);
-  switch (gimple_omp_target_kind (entry_stmt))
+  switch (target_kind)
     {
     case GF_OMP_TARGET_KIND_REGION:
     case GF_OMP_TARGET_KIND_UPDATE:
@@ -7261,15 +9601,17 @@ expand_omp_target (struct omp_region *region)
     case GF_OMP_TARGET_KIND_EXIT_DATA:
     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
     case GF_OMP_TARGET_KIND_OACC_KERNELS:
+    case GF_OMP_TARGET_KIND_OACC_SERIAL:
     case GF_OMP_TARGET_KIND_OACC_UPDATE:
-    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
+    case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
+    case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
     case GF_OMP_TARGET_KIND_OACC_DECLARE:
-      data_region = false;
-      break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
     case GF_OMP_TARGET_KIND_DATA:
     case GF_OMP_TARGET_KIND_OACC_DATA:
     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
-      data_region = true;
+    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
       break;
     default:
       gcc_unreachable ();
@@ -7291,16 +9633,44 @@ expand_omp_target (struct omp_region *region)
   entry_bb = region->entry;
   exit_bb = region->exit;
 
-  if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-    {
-      mark_loops_in_oacc_kernels_region (region->entry, region->exit);
+  if (target_kind == GF_OMP_TARGET_KIND_OACC_KERNELS)
+    mark_loops_in_oacc_kernels_region (region->entry, region->exit);
 
-      /* Further down, both OpenACC kernels and OpenACC parallel constructs
-        will be mappted to BUILT_IN_GOACC_PARALLEL, and to distinguish the
-        two, there is an "oacc kernels" attribute set for OpenACC kernels.  */
+  /* Going on, all OpenACC compute constructs are mapped to
+     'BUILT_IN_GOACC_PARALLEL', and get their compute regions outlined.
+     To distinguish between them, we attach attributes.  */
+  switch (target_kind)
+    {
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+      DECL_ATTRIBUTES (child_fn)
+       = tree_cons (get_identifier ("oacc parallel"),
+                    NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    case GF_OMP_TARGET_KIND_OACC_KERNELS:
       DECL_ATTRIBUTES (child_fn)
        = tree_cons (get_identifier ("oacc kernels"),
                     NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    case GF_OMP_TARGET_KIND_OACC_SERIAL:
+      DECL_ATTRIBUTES (child_fn)
+       = tree_cons (get_identifier ("oacc serial"),
+                    NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+      DECL_ATTRIBUTES (child_fn)
+       = tree_cons (get_identifier ("oacc parallel_kernels_parallelized"),
+                    NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
+      DECL_ATTRIBUTES (child_fn)
+       = tree_cons (get_identifier ("oacc parallel_kernels_gang_single"),
+                    NULL_TREE, DECL_ATTRIBUTES (child_fn));
+      break;
+    default:
+      /* Make sure we don't miss any.  */
+      gcc_checking_assert (!(is_gimple_omp_oacc (entry_stmt)
+                            && is_gimple_omp_offloaded (entry_stmt)));
+      break;
     }
 
   if (offloaded)
@@ -7399,11 +9769,6 @@ expand_omp_target (struct omp_region *region)
          gsi_remove (&gsi, true);
        }
 
-      /* Make sure to generate early debug for the function before
-         outlining anything.  */
-      if (! gimple_in_ssa_p (cfun))
-       (*debug_hooks->early_global_decl) (cfun->decl);
-
       /* Move the offloading region into CHILD_CFUN.  */
 
       block = gimple_block (entry_stmt);
@@ -7480,13 +9845,14 @@ expand_omp_target (struct omp_region *region)
          dump_function_header (dump_file, child_fn, dump_flags);
          dump_function_to_file (child_fn, dump_file, dump_flags);
        }
+
+      adjust_context_and_scope (region, gimple_block (entry_stmt), child_fn);
     }
 
   /* Emit a library call to launch the offloading region, or do data
      transfers.  */
-  tree t1, t2, t3, t4, device, cond, depend, c, clauses;
+  tree t1, t2, t3, t4, depend, c, clauses;
   enum built_in_function start_ix;
-  location_t clause_loc;
   unsigned int flags_i = 0;
 
   switch (gimple_omp_target_kind (entry_stmt))
@@ -7507,19 +9873,26 @@ expand_omp_target (struct omp_region *region)
       start_ix = BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA;
       flags_i |= GOMP_TARGET_FLAG_EXIT_DATA;
       break;
-    case GF_OMP_TARGET_KIND_OACC_KERNELS:
     case GF_OMP_TARGET_KIND_OACC_PARALLEL:
+    case GF_OMP_TARGET_KIND_OACC_KERNELS:
+    case GF_OMP_TARGET_KIND_OACC_SERIAL:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+    case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
       start_ix = BUILT_IN_GOACC_PARALLEL;
       break;
     case GF_OMP_TARGET_KIND_OACC_DATA:
     case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+    case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
       start_ix = BUILT_IN_GOACC_DATA_START;
       break;
     case GF_OMP_TARGET_KIND_OACC_UPDATE:
       start_ix = BUILT_IN_GOACC_UPDATE;
       break;
-    case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
-      start_ix = BUILT_IN_GOACC_ENTER_EXIT_DATA;
+    case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
+      start_ix = BUILT_IN_GOACC_ENTER_DATA;
+      break;
+    case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
+      start_ix = BUILT_IN_GOACC_EXIT_DATA;
       break;
     case GF_OMP_TARGET_KIND_OACC_DECLARE:
       start_ix = BUILT_IN_GOACC_DECLARE;
@@ -7530,49 +9903,69 @@ expand_omp_target (struct omp_region *region)
 
   clauses = gimple_omp_target_clauses (entry_stmt);
 
-  /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
-     library choose) and there is no conditional.  */
-  cond = NULL_TREE;
-  device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
-
-  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
-  if (c)
-    cond = OMP_CLAUSE_IF_EXPR (c);
-
-  c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
-  if (c)
+  tree device = NULL_TREE;
+  location_t device_loc = UNKNOWN_LOCATION;
+  tree goacc_flags = NULL_TREE;
+  if (is_gimple_omp_oacc (entry_stmt))
     {
-      /* Even if we pass it to all library function calls, it is currently only
-        defined/used for the OpenMP target ones.  */
-      gcc_checking_assert (start_ix == BUILT_IN_GOMP_TARGET
-                          || start_ix == BUILT_IN_GOMP_TARGET_DATA
-                          || start_ix == BUILT_IN_GOMP_TARGET_UPDATE
-                          || start_ix == BUILT_IN_GOMP_TARGET_ENTER_EXIT_DATA);
-
-      device = OMP_CLAUSE_DEVICE_ID (c);
-      clause_loc = OMP_CLAUSE_LOCATION (c);
+      /* By default, no GOACC_FLAGs are set.  */
+      goacc_flags = integer_zero_node;
     }
   else
-    clause_loc = gimple_location (entry_stmt);
-
-  c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
-  if (c)
-    flags_i |= GOMP_TARGET_FLAG_NOWAIT;
+    {
+      c = omp_find_clause (clauses, OMP_CLAUSE_DEVICE);
+      if (c)
+       {
+         device = OMP_CLAUSE_DEVICE_ID (c);
+         device_loc = OMP_CLAUSE_LOCATION (c);
+         if (OMP_CLAUSE_DEVICE_ANCESTOR (c))
+           sorry_at (device_loc, "%<ancestor%> not yet supported");
+       }
+      else
+       {
+         /* By default, the value of DEVICE is GOMP_DEVICE_ICV (let runtime
+            library choose).  */
+         device = build_int_cst (integer_type_node, GOMP_DEVICE_ICV);
+         device_loc = gimple_location (entry_stmt);
+       }
 
-  /* Ensure 'device' is of the correct type.  */
-  device = fold_convert_loc (clause_loc, integer_type_node, device);
+      c = omp_find_clause (clauses, OMP_CLAUSE_NOWAIT);
+      /* FIXME: in_reduction(...) nowait is unimplemented yet, pretend
+        nowait doesn't appear.  */
+      if (c && omp_find_clause (clauses, OMP_CLAUSE_IN_REDUCTION))
+       c = NULL;
+      if (c)
+       flags_i |= GOMP_TARGET_FLAG_NOWAIT;
+    }
 
-  /* If we found the clause 'if (cond)', build
-     (cond ? device : GOMP_DEVICE_HOST_FALLBACK).  */
+  /* By default, there is no conditional.  */
+  tree cond = NULL_TREE;
+  c = omp_find_clause (clauses, OMP_CLAUSE_IF);
+  if (c)
+    cond = OMP_CLAUSE_IF_EXPR (c);
+  /* If we found the clause 'if (cond)', build:
+     OpenACC: goacc_flags = (cond ? goacc_flags : flags | GOACC_FLAG_HOST_FALLBACK)
+     OpenMP: device = (cond ? device : GOMP_DEVICE_HOST_FALLBACK) */
   if (cond)
     {
+      tree *tp;
+      if (is_gimple_omp_oacc (entry_stmt))
+       tp = &goacc_flags;
+      else
+       {
+         /* Ensure 'device' is of the correct type.  */
+         device = fold_convert_loc (device_loc, integer_type_node, device);
+
+         tp = &device;
+       }
+
       cond = gimple_boolify (cond);
 
       basic_block cond_bb, then_bb, else_bb;
       edge e;
       tree tmp_var;
 
-      tmp_var = create_tmp_var (TREE_TYPE (device));
+      tmp_var = create_tmp_var (TREE_TYPE (*tp));
       if (offloaded)
        e = split_block_after_labels (new_bb);
       else
@@ -7595,13 +9988,20 @@ expand_omp_target (struct omp_region *region)
       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
       gsi = gsi_start_bb (then_bb);
-      stmt = gimple_build_assign (tmp_var, device);
+      stmt = gimple_build_assign (tmp_var, *tp);
       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
       gsi = gsi_start_bb (else_bb);
-      stmt = gimple_build_assign (tmp_var,
-                                 build_int_cst (integer_type_node,
-                                                GOMP_DEVICE_HOST_FALLBACK));
+      if (is_gimple_omp_oacc (entry_stmt))
+       stmt = gimple_build_assign (tmp_var,
+                                   BIT_IOR_EXPR,
+                                   *tp,
+                                   build_int_cst (integer_type_node,
+                                                  GOACC_FLAG_HOST_FALLBACK));
+      else
+       stmt = gimple_build_assign (tmp_var,
+                                   build_int_cst (integer_type_node,
+                                                  GOMP_DEVICE_HOST_FALLBACK));
       gsi_insert_after (&gsi, stmt, GSI_CONTINUE_LINKING);
 
       make_edge (cond_bb, then_bb, EDGE_TRUE_VALUE);
@@ -7611,14 +10011,17 @@ expand_omp_target (struct omp_region *region)
       make_edge (then_bb, new_bb, EDGE_FALLTHRU);
       make_edge (else_bb, new_bb, EDGE_FALLTHRU);
 
-      device = tmp_var;
+      *tp = tmp_var;
+
       gsi = gsi_last_nondebug_bb (new_bb);
     }
   else
     {
       gsi = gsi_last_nondebug_bb (new_bb);
-      device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
-                                        true, GSI_SAME_STMT);
+
+      if (device != NULL_TREE)
+       device = force_gimple_operand_gsi (&gsi, device, true, NULL_TREE,
+                                          true, GSI_SAME_STMT);
     }
 
   t = gimple_omp_target_data_arg (entry_stmt);
@@ -7642,7 +10045,17 @@ expand_omp_target (struct omp_region *region)
   bool tagging = false;
   /* The maximum number used by any start_ix, without varargs.  */
   auto_vec<tree, 11> args;
-  args.quick_push (device);
+  if (is_gimple_omp_oacc (entry_stmt))
+    {
+      tree goacc_flags_m = fold_build1 (GOACC_FLAGS_MARSHAL_OP,
+                                       TREE_TYPE (goacc_flags), goacc_flags);
+      goacc_flags_m = force_gimple_operand_gsi (&gsi, goacc_flags_m, true,
+                                               NULL_TREE, true,
+                                               GSI_SAME_STMT);
+      args.quick_push (goacc_flags_m);
+    }
+  else
+    args.quick_push (device);
   if (offloaded)
     args.quick_push (build_fold_addr_expr (child_fn));
   args.quick_push (t1);
@@ -7669,10 +10082,22 @@ expand_omp_target (struct omp_region *region)
        args.quick_push (get_target_arguments (&gsi, entry_stmt));
       break;
     case BUILT_IN_GOACC_PARALLEL:
-      oacc_set_fn_attrib (child_fn, clauses, &args);
+      if (lookup_attribute ("oacc serial", DECL_ATTRIBUTES (child_fn)) != NULL)
+       {
+         tree dims = NULL_TREE;
+         unsigned int ix;
+
+         /* For serial constructs we set all dimensions to 1.  */
+         for (ix = GOMP_DIM_MAX; ix--;)
+           dims = tree_cons (NULL_TREE, integer_one_node, dims);
+         oacc_replace_fn_attrib (child_fn, dims);
+       }
+      else
+       oacc_set_fn_attrib (child_fn, clauses, &args);
       tagging = true;
       /* FALLTHRU */
-    case BUILT_IN_GOACC_ENTER_EXIT_DATA:
+    case BUILT_IN_GOACC_ENTER_DATA:
+    case BUILT_IN_GOACC_EXIT_DATA:
     case BUILT_IN_GOACC_UPDATE:
       {
        tree t_async = NULL_TREE;
@@ -7708,7 +10133,9 @@ expand_omp_target (struct omp_region *region)
                                              i_async));
          }
        if (t_async)
-         args.safe_push (t_async);
+         args.safe_push (force_gimple_operand_gsi (&gsi, t_async, true,
+                                                   NULL_TREE, true,
+                                                   GSI_SAME_STMT));
 
        /* Save the argument index, and ... */
        unsigned t_wait_idx = args.length ();
@@ -7721,9 +10148,12 @@ expand_omp_target (struct omp_region *region)
        for (; c; c = OMP_CLAUSE_CHAIN (c))
          if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_WAIT)
            {
-             args.safe_push (fold_convert_loc (OMP_CLAUSE_LOCATION (c),
-                                               integer_type_node,
-                                               OMP_CLAUSE_WAIT_EXPR (c)));
+             tree arg = fold_convert_loc (OMP_CLAUSE_LOCATION (c),
+                                          integer_type_node,
+                                          OMP_CLAUSE_WAIT_EXPR (c));
+             arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE, true,
+                                             GSI_SAME_STMT);
+             args.safe_push (arg);
              num_waits++;
            }
 
@@ -7758,314 +10188,6 @@ expand_omp_target (struct omp_region *region)
       gcc_assert (g && gimple_code (g) == GIMPLE_OMP_TARGET);
       gsi_remove (&gsi, true);
     }
-  if (data_region && region->exit)
-    {
-      gsi = gsi_last_nondebug_bb (region->exit);
-      g = gsi_stmt (gsi);
-      gcc_assert (g && gimple_code (g) == GIMPLE_OMP_RETURN);
-      gsi_remove (&gsi, true);
-    }
-}
-
-/* Expand KFOR loop as a HSA grifidied kernel, i.e. as a body only with
-   iteration variable derived from the thread number.  INTRA_GROUP means this
-   is an expansion of a loop iterating over work-items within a separate
-   iteration over groups.  */
-
-static void
-grid_expand_omp_for_loop (struct omp_region *kfor, bool intra_group)
-{
-  gimple_stmt_iterator gsi;
-  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
-  gcc_checking_assert (gimple_omp_for_kind (for_stmt)
-                      == GF_OMP_FOR_KIND_GRID_LOOP);
-  size_t collapse = gimple_omp_for_collapse (for_stmt);
-  struct omp_for_data_loop *loops
-    = XALLOCAVEC (struct omp_for_data_loop,
-                 gimple_omp_for_collapse (for_stmt));
-  struct omp_for_data fd;
-
-  remove_edge (BRANCH_EDGE (kfor->entry));
-  basic_block body_bb = FALLTHRU_EDGE (kfor->entry)->dest;
-
-  gcc_assert (kfor->cont);
-  omp_extract_for_data (for_stmt, &fd, loops);
-
-  gsi = gsi_start_bb (body_bb);
-
-  for (size_t dim = 0; dim < collapse; dim++)
-    {
-      tree type, itype;
-      itype = type = TREE_TYPE (fd.loops[dim].v);
-      if (POINTER_TYPE_P (type))
-       itype = signed_type_for (type);
-
-      tree n1 = fd.loops[dim].n1;
-      tree step = fd.loops[dim].step;
-      n1 = force_gimple_operand_gsi (&gsi, fold_convert (type, n1),
-                                    true, NULL_TREE, true, GSI_SAME_STMT);
-      step = force_gimple_operand_gsi (&gsi, fold_convert (itype, step),
-                                      true, NULL_TREE, true, GSI_SAME_STMT);
-      tree threadid;
-      if (gimple_omp_for_grid_group_iter (for_stmt))
-       {
-         gcc_checking_assert (!intra_group);
-         threadid = build_call_expr (builtin_decl_explicit
-                                     (BUILT_IN_HSA_WORKGROUPID), 1,
-                                     build_int_cstu (unsigned_type_node, dim));
-       }
-      else if (intra_group)
-       threadid = build_call_expr (builtin_decl_explicit
-                                   (BUILT_IN_HSA_WORKITEMID), 1,
-                                   build_int_cstu (unsigned_type_node, dim));
-      else
-       threadid = build_call_expr (builtin_decl_explicit
-                                   (BUILT_IN_HSA_WORKITEMABSID), 1,
-                                   build_int_cstu (unsigned_type_node, dim));
-      threadid = fold_convert (itype, threadid);
-      threadid = force_gimple_operand_gsi (&gsi, threadid, true, NULL_TREE,
-                                          true, GSI_SAME_STMT);
-
-      tree startvar = fd.loops[dim].v;
-      tree t = fold_build2 (MULT_EXPR, itype, threadid, step);
-      if (POINTER_TYPE_P (type))
-       t = fold_build_pointer_plus (n1, t);
-      else
-       t = fold_build2 (PLUS_EXPR, type, t, n1);
-      t = fold_convert (type, t);
-      t = force_gimple_operand_gsi (&gsi, t,
-                                   DECL_P (startvar)
-                                   && TREE_ADDRESSABLE (startvar),
-                                   NULL_TREE, true, GSI_SAME_STMT);
-      gassign *assign_stmt = gimple_build_assign (startvar, t);
-      gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
-    }
-  /* Remove the omp for statement.  */
-  gsi = gsi_last_nondebug_bb (kfor->entry);
-  gsi_remove (&gsi, true);
-
-  /* Remove the GIMPLE_OMP_CONTINUE statement.  */
-  gsi = gsi_last_nondebug_bb (kfor->cont);
-  gcc_assert (!gsi_end_p (gsi)
-             && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_CONTINUE);
-  gsi_remove (&gsi, true);
-
-  /* Replace the GIMPLE_OMP_RETURN with a barrier, if necessary.  */
-  gsi = gsi_last_nondebug_bb (kfor->exit);
-  gcc_assert (!gsi_end_p (gsi)
-             && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
-  if (intra_group)
-    gsi_insert_before (&gsi, omp_build_barrier (NULL_TREE), GSI_SAME_STMT);
-  gsi_remove (&gsi, true);
-
-  /* Fixup the much simpler CFG.  */
-  remove_edge (find_edge (kfor->cont, body_bb));
-
-  if (kfor->cont != body_bb)
-    set_immediate_dominator (CDI_DOMINATORS, kfor->cont, body_bb);
-  set_immediate_dominator (CDI_DOMINATORS, kfor->exit, kfor->cont);
-}
-
-/* Structure passed to grid_remap_kernel_arg_accesses so that it can remap
-   argument_decls.  */
-
-struct grid_arg_decl_map
-{
-  tree old_arg;
-  tree new_arg;
-};
-
-/* Invoked through walk_gimple_op, will remap all PARM_DECLs to the ones
-   pertaining to kernel function.  */
-
-static tree
-grid_remap_kernel_arg_accesses (tree *tp, int *walk_subtrees, void *data)
-{
-  struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
-  struct grid_arg_decl_map *adm = (struct grid_arg_decl_map *) wi->info;
-  tree t = *tp;
-
-  if (t == adm->old_arg)
-    *tp = adm->new_arg;
-  *walk_subtrees = !TYPE_P (t) && !DECL_P (t);
-  return NULL_TREE;
-}
-
-/* If TARGET region contains a kernel body for loop, remove its region from the
-   TARGET and expand it in HSA gridified kernel fashion.  */
-
-static void
-grid_expand_target_grid_body (struct omp_region *target)
-{
-  if (!hsa_gen_requested_p ())
-    return;
-
-  gomp_target *tgt_stmt = as_a <gomp_target *> (last_stmt (target->entry));
-  struct omp_region **pp;
-
-  for (pp = &target->inner; *pp; pp = &(*pp)->next)
-    if ((*pp)->type == GIMPLE_OMP_GRID_BODY)
-      break;
-
-  struct omp_region *gpukernel = *pp;
-
-  tree orig_child_fndecl = gimple_omp_target_child_fn (tgt_stmt);
-  if (!gpukernel)
-    {
-      /* HSA cannot handle OACC stuff.  */
-      if (gimple_omp_target_kind (tgt_stmt) != GF_OMP_TARGET_KIND_REGION)
-       return;
-      gcc_checking_assert (orig_child_fndecl);
-      gcc_assert (!omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
-                                   OMP_CLAUSE__GRIDDIM_));
-      cgraph_node *n = cgraph_node::get (orig_child_fndecl);
-
-      hsa_register_kernel (n);
-      return;
-    }
-
-  gcc_assert (omp_find_clause (gimple_omp_target_clauses (tgt_stmt),
-                              OMP_CLAUSE__GRIDDIM_));
-  tree inside_block
-    = gimple_block (first_stmt (single_succ (gpukernel->entry)));
-  *pp = gpukernel->next;
-  for (pp = &gpukernel->inner; *pp; pp = &(*pp)->next)
-    if ((*pp)->type == GIMPLE_OMP_FOR)
-      break;
-
-  struct omp_region *kfor = *pp;
-  gcc_assert (kfor);
-  gomp_for *for_stmt = as_a <gomp_for *> (last_stmt (kfor->entry));
-  gcc_assert (gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP);
-  *pp = kfor->next;
-  if (kfor->inner)
-    {
-      if (gimple_omp_for_grid_group_iter (for_stmt))
-       {
-         struct omp_region **next_pp;
-         for (pp = &kfor->inner; *pp; pp = next_pp)
-           {
-             next_pp = &(*pp)->next;
-             if ((*pp)->type != GIMPLE_OMP_FOR)
-               continue;
-             gomp_for *inner = as_a <gomp_for *> (last_stmt ((*pp)->entry));
-             gcc_assert (gimple_omp_for_kind (inner)
-                         == GF_OMP_FOR_KIND_GRID_LOOP);
-             grid_expand_omp_for_loop (*pp, true);
-             *pp = (*pp)->next;
-             next_pp = pp;
-           }
-       }
-      expand_omp (kfor->inner);
-    }
-  if (gpukernel->inner)
-    expand_omp (gpukernel->inner);
-
-  tree kern_fndecl = copy_node (orig_child_fndecl);
-  DECL_NAME (kern_fndecl) = clone_function_name_numbered (kern_fndecl,
-                                                         "kernel");
-  SET_DECL_ASSEMBLER_NAME (kern_fndecl, DECL_NAME (kern_fndecl));
-  tree tgtblock = gimple_block (tgt_stmt);
-  tree fniniblock = make_node (BLOCK);
-  BLOCK_ABSTRACT_ORIGIN (fniniblock) = BLOCK_ORIGIN (tgtblock);
-  BLOCK_SOURCE_LOCATION (fniniblock) = BLOCK_SOURCE_LOCATION (tgtblock);
-  BLOCK_SOURCE_END_LOCATION (fniniblock) = BLOCK_SOURCE_END_LOCATION (tgtblock);
-  BLOCK_SUPERCONTEXT (fniniblock) = kern_fndecl;
-  DECL_INITIAL (kern_fndecl) = fniniblock;
-  push_struct_function (kern_fndecl);
-  cfun->function_end_locus = gimple_location (tgt_stmt);
-  init_tree_ssa (cfun);
-  pop_cfun ();
-
-  /* Make sure to generate early debug for the function before
-     outlining anything.  */
-  if (! gimple_in_ssa_p (cfun))
-    (*debug_hooks->early_global_decl) (cfun->decl);
-
-  tree old_parm_decl = DECL_ARGUMENTS (kern_fndecl);
-  gcc_assert (!DECL_CHAIN (old_parm_decl));
-  tree new_parm_decl = copy_node (DECL_ARGUMENTS (kern_fndecl));
-  DECL_CONTEXT (new_parm_decl) = kern_fndecl;
-  DECL_ARGUMENTS (kern_fndecl) = new_parm_decl;
-  gcc_assert (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (kern_fndecl))));
-  DECL_RESULT (kern_fndecl) = copy_node (DECL_RESULT (kern_fndecl));
-  DECL_CONTEXT (DECL_RESULT (kern_fndecl)) = kern_fndecl;
-  struct function *kern_cfun = DECL_STRUCT_FUNCTION (kern_fndecl);
-  kern_cfun->curr_properties = cfun->curr_properties;
-
-  grid_expand_omp_for_loop (kfor, false);
-
-  /* Remove the omp for statement.  */
-  gimple_stmt_iterator gsi = gsi_last_nondebug_bb (gpukernel->entry);
-  gsi_remove (&gsi, true);
-  /* Replace the GIMPLE_OMP_RETURN at the end of the kernel region with a real
-     return.  */
-  gsi = gsi_last_nondebug_bb (gpukernel->exit);
-  gcc_assert (!gsi_end_p (gsi)
-             && gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_RETURN);
-  gimple *ret_stmt = gimple_build_return (NULL);
-  gsi_insert_after (&gsi, ret_stmt, GSI_SAME_STMT);
-  gsi_remove (&gsi, true);
-
-  /* Statements in the first BB in the target construct have been produced by
-     target lowering and must be copied inside the GPUKERNEL, with the two
-     exceptions of the first OMP statement and the OMP_DATA assignment
-     statement.  */
-  gsi = gsi_start_bb (single_succ (gpukernel->entry));
-  tree data_arg = gimple_omp_target_data_arg (tgt_stmt);
-  tree sender = data_arg ? TREE_VEC_ELT (data_arg, 0) : NULL;
-  for (gimple_stmt_iterator tsi = gsi_start_bb (single_succ (target->entry));
-       !gsi_end_p (tsi); gsi_next (&tsi))
-    {
-      gimple *stmt = gsi_stmt (tsi);
-      if (is_gimple_omp (stmt))
-       break;
-      if (sender
-         && is_gimple_assign (stmt)
-         && TREE_CODE (gimple_assign_rhs1 (stmt)) == ADDR_EXPR
-         && TREE_OPERAND (gimple_assign_rhs1 (stmt), 0) == sender)
-       continue;
-      gimple *copy = gimple_copy (stmt);
-      gsi_insert_before (&gsi, copy, GSI_SAME_STMT);
-      gimple_set_block (copy, fniniblock);
-    }
-
-  move_sese_region_to_fn (kern_cfun, single_succ (gpukernel->entry),
-                         gpukernel->exit, inside_block);
-
-  cgraph_node *kcn = cgraph_node::get_create (kern_fndecl);
-  kcn->mark_force_output ();
-  cgraph_node *orig_child = cgraph_node::get (orig_child_fndecl);
-
-  hsa_register_kernel (kcn, orig_child);
-
-  cgraph_node::add_new_function (kern_fndecl, true);
-  push_cfun (kern_cfun);
-  cgraph_edge::rebuild_edges ();
-
-  /* Re-map any mention of the PARM_DECL of the original function to the
-     PARM_DECL of the new one.
-
-     TODO: It would be great if lowering produced references into the GPU
-     kernel decl straight away and we did not have to do this.  */
-  struct grid_arg_decl_map adm;
-  adm.old_arg = old_parm_decl;
-  adm.new_arg = new_parm_decl;
-  basic_block bb;
-  FOR_EACH_BB_FN (bb, kern_cfun)
-    {
-      for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
-       {
-         gimple *stmt = gsi_stmt (gsi);
-         struct walk_stmt_info wi;
-         memset (&wi, 0, sizeof (wi));
-         wi.info = &adm;
-         walk_gimple_op (stmt, grid_remap_kernel_arg_accesses, &wi);
-       }
-    }
-  pop_cfun ();
-
-  return;
 }
 
 /* Expand the parallel region tree rooted at REGION.  Expansion
@@ -8087,8 +10209,6 @@ expand_omp (struct omp_region *region)
         region.  */
       if (region->type == GIMPLE_OMP_PARALLEL)
        determine_parallel_type (region);
-      else if (region->type == GIMPLE_OMP_TARGET)
-       grid_expand_target_grid_body (region);
 
       if (region->type == GIMPLE_OMP_FOR
          && gimple_omp_for_combined_p (last_stmt (region->entry)))
@@ -8122,6 +10242,7 @@ expand_omp (struct omp_region *region)
          break;
 
        case GIMPLE_OMP_SINGLE:
+       case GIMPLE_OMP_SCOPE:
          expand_omp_single (region);
          break;
 
@@ -8142,6 +10263,7 @@ expand_omp (struct omp_region *region)
          }
          /* FALLTHRU */
        case GIMPLE_OMP_MASTER:
+       case GIMPLE_OMP_MASKED:
        case GIMPLE_OMP_TASKGROUP:
        case GIMPLE_OMP_CRITICAL:
        case GIMPLE_OMP_TEAMS:
@@ -8232,17 +10354,22 @@ build_omp_regions_1 (basic_block bb, struct omp_region *parent,
              switch (gimple_omp_target_kind (stmt))
                {
                case GF_OMP_TARGET_KIND_REGION:
-               case GF_OMP_TARGET_KIND_DATA:
                case GF_OMP_TARGET_KIND_OACC_PARALLEL:
                case GF_OMP_TARGET_KIND_OACC_KERNELS:
-               case GF_OMP_TARGET_KIND_OACC_DATA:
-               case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+               case GF_OMP_TARGET_KIND_OACC_SERIAL:
+               case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+               case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
                  break;
                case GF_OMP_TARGET_KIND_UPDATE:
                case GF_OMP_TARGET_KIND_ENTER_DATA:
                case GF_OMP_TARGET_KIND_EXIT_DATA:
+               case GF_OMP_TARGET_KIND_DATA:
+               case GF_OMP_TARGET_KIND_OACC_DATA:
+               case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+               case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
                case GF_OMP_TARGET_KIND_OACC_UPDATE:
-               case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
+               case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
+               case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
                case GF_OMP_TARGET_KIND_OACC_DECLARE:
                  /* ..., other than for those stand-alone directives...  */
                  region = NULL;
@@ -8456,10 +10583,11 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region,
     case GIMPLE_OMP_SINGLE:
     case GIMPLE_OMP_TEAMS:
     case GIMPLE_OMP_MASTER:
+    case GIMPLE_OMP_MASKED:
+    case GIMPLE_OMP_SCOPE:
     case GIMPLE_OMP_TASKGROUP:
     case GIMPLE_OMP_CRITICAL:
     case GIMPLE_OMP_SECTION:
-    case GIMPLE_OMP_GRID_BODY:
       cur_region = new_omp_region (bb, code, cur_region);
       fallthru = true;
       break;
@@ -8486,17 +10614,22 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region,
       switch (gimple_omp_target_kind (last))
        {
        case GF_OMP_TARGET_KIND_REGION:
-       case GF_OMP_TARGET_KIND_DATA:
        case GF_OMP_TARGET_KIND_OACC_PARALLEL:
        case GF_OMP_TARGET_KIND_OACC_KERNELS:
-       case GF_OMP_TARGET_KIND_OACC_DATA:
-       case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+       case GF_OMP_TARGET_KIND_OACC_SERIAL:
+       case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_PARALLELIZED:
+       case GF_OMP_TARGET_KIND_OACC_PARALLEL_KERNELS_GANG_SINGLE:
          break;
        case GF_OMP_TARGET_KIND_UPDATE:
        case GF_OMP_TARGET_KIND_ENTER_DATA:
        case GF_OMP_TARGET_KIND_EXIT_DATA:
+       case GF_OMP_TARGET_KIND_DATA:
+       case GF_OMP_TARGET_KIND_OACC_DATA:
+       case GF_OMP_TARGET_KIND_OACC_HOST_DATA:
+       case GF_OMP_TARGET_KIND_OACC_DATA_KERNELS:
        case GF_OMP_TARGET_KIND_OACC_UPDATE:
-       case GF_OMP_TARGET_KIND_OACC_ENTER_EXIT_DATA:
+       case GF_OMP_TARGET_KIND_OACC_ENTER_DATA:
+       case GF_OMP_TARGET_KIND_OACC_EXIT_DATA:
        case GF_OMP_TARGET_KIND_OACC_DECLARE:
          cur_region = cur_region->outer;
          break;
@@ -8600,5 +10733,3 @@ omp_make_gimple_edges (basic_block bb, struct omp_region **region,
 
   return fallthru;
 }
-
-#include "gt-omp-expand.h"