]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
PR tree-optimization/41783
authormatz <matz@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 19 Jan 2010 16:05:57 +0000 (16:05 +0000)
committermatz <matz@138bc75d-0d04-0410-961f-82ee72b054a4>
Tue, 19 Jan 2010 16:05:57 +0000 (16:05 +0000)
* tree-data-ref.c (toplevel): Include flags.h.
(dump_data_dependence_relation):  Also dump the inputs if the
result will be unknown.
(split_constant_offset_1): Look through some conversions.
* tree-predcom.c (determine_roots_comp): Restart a new chain if
the offset from last element is too large.
(ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF.
(reassociate_to_the_same_stmt): Handle vector registers.
* tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations
(e.g. conversions).
* tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add
wide_prolog_niters argument, emit widening instructions.
(vect_do_peeling_for_alignment): Adjust caller, use widened
variant of the iteration cound.
* Makefile.in (tree-data-ref.o): Add $(FLAGS_H).

testsuite/
* gfortran.dg/vect/fast-math-mgrid-resid.f: New.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@156043 138bc75d-0d04-0410-961f-82ee72b054a4

gcc/ChangeLog
gcc/Makefile.in
gcc/testsuite/ChangeLog
gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f [new file with mode: 0644]
gcc/tree-data-ref.c
gcc/tree-predcom.c
gcc/tree-vect-data-refs.c
gcc/tree-vect-loop-manip.c

index 1b5a0d3ec2632dad43046f12626739130f24da97..ca67d10d031c65194d058a4992e51c80cc2a3b2e 100644 (file)
@@ -1,10 +1,29 @@
+2010-01-19  Michael Matz  <matz@suse.de>
+
+       PR tree-optimization/41783
+       * tree-data-ref.c (toplevel): Include flags.h.
+       (dump_data_dependence_relation):  Also dump the inputs if the
+       result will be unknown.
+       (split_constant_offset_1): Look through some conversions.
+       * tree-predcom.c (determine_roots_comp): Restart a new chain if
+       the offset from last element is too large.
+       (ref_at_iteration): Deal also with MISALIGNED_INDIRECT_REF.
+       (reassociate_to_the_same_stmt): Handle vector registers.
+       * tree-vect-data-refs.c (vect_equal_offsets): Handle unary operations
+       (e.g. conversions).
+       * tree-vect-loop-manip.c (vect_gen_niters_for_prolog_loop): Add 
+       wide_prolog_niters argument, emit widening instructions.
+       (vect_do_peeling_for_alignment): Adjust caller, use widened
+       variant of the iteration cound.
+       * Makefile.in (tree-data-ref.o): Add $(FLAGS_H).
+
 2010-01-19  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
 
-       PR target/38697
-       * config/arm/neon-testgen.m (emit_automatics): New parameter
+       PR target/38697
+       * config/arm/neon-testgen.m (emit_automatics): New parameter
        features. Adjust for Fixed_return_reg feature.
        (test_intrinsic): Call emit_automatics with new feature.
-       * config/arm/neon.ml: Update copyright years.
+       * config/arm/neon.ml: Update copyright years.
        (features): New Fixed_return_reg feature.
        (ops): Update feature for Vget_low.
 
index d6a57c459e744477cdd59ebb1c37730ae112dd36..7c08ea2a4de20801c2affdb7869abe9ccc9adc7c 100644 (file)
@@ -2548,7 +2548,7 @@ tree-scalar-evolution.o: tree-scalar-evolution.c $(CONFIG_H) $(SYSTEM_H) \
    $(TIMEVAR_H) $(CFGLOOP_H) $(SCEV_H) $(TREE_PASS_H) $(FLAGS_H) \
    gt-tree-scalar-evolution.h
 tree-data-ref.o: tree-data-ref.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
-   $(GGC_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
+   $(GGC_H) $(FLAGS_H) $(TREE_H) $(RTL_H) $(BASIC_BLOCK_H) $(DIAGNOSTIC_H) \
    $(TREE_FLOW_H) $(TREE_DUMP_H) $(TIMEVAR_H) $(CFGLOOP_H) \
    $(TREE_DATA_REF_H) $(TREE_PASS_H) langhooks.h
 sese.o: sese.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
index 8c02782de0ebf300ba08ac0447dbae9916c9b033..58666a60a77230a1b54c0cb2d1f956913b822d4b 100644 (file)
@@ -1,3 +1,8 @@
+2010-01-19  Michael Matz  <matz@suse.de>
+
+       PR tree-optimization/41783
+       * gfortran.dg/vect/fast-math-mgrid-resid.f: New.
+
 2010-01-19  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
 
        PR target/38697.
diff --git a/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f b/gcc/testsuite/gfortran.dg/vect/fast-math-mgrid-resid.f
new file mode 100644 (file)
index 0000000..2d1844b
--- /dev/null
@@ -0,0 +1,44 @@
+! { dg-do compile }
+! { dg-require-effective-target vect_double }
+! { dg-options "-O3 -ffast-math -fpredictive-commoning -ftree-vectorize -fdump-tree-optimized" }
+
+******* RESID COMPUTES THE RESIDUAL:  R = V - AU
+*
+*      THIS SIMPLE IMPLEMENTATION COSTS  27A + 4M PER RESULT, WHERE
+*      A AND M DENOTE THE COSTS OF ADDITION (OR SUBTRACTION) AND 
+*      MULTIPLICATION, RESPECTIVELY.  BY USING SEVERAL TWO-DIMENSIONAL 
+*      BUFFERS ONE CAN REDUCE THIS COST TO  13A + 4M IN THE GENERAL 
+*      CASE, OR  10A + 3M WHEN THE COEFFICIENT A(1) IS ZERO.
+*
+      SUBROUTINE RESID(U,V,R,N,A)
+      INTEGER N
+      REAL*8 U(N,N,N),V(N,N,N),R(N,N,N),A(0:3)
+      INTEGER I3, I2, I1
+C
+      DO 600 I3=2,N-1
+      DO 600 I2=2,N-1
+      DO 600 I1=2,N-1
+ 600  R(I1,I2,I3)=V(I1,I2,I3)
+     >      -A(0)*( U(I1,  I2,  I3  ) )
+     >      -A(1)*( U(I1-1,I2,  I3  ) + U(I1+1,I2,  I3  )
+     >                 +  U(I1,  I2-1,I3  ) + U(I1,  I2+1,I3  )
+     >                 +  U(I1,  I2,  I3-1) + U(I1,  I2,  I3+1) )
+     >      -A(2)*( U(I1-1,I2-1,I3  ) + U(I1+1,I2-1,I3  )
+     >                 +  U(I1-1,I2+1,I3  ) + U(I1+1,I2+1,I3  )
+     >                 +  U(I1,  I2-1,I3-1) + U(I1,  I2+1,I3-1)
+     >                 +  U(I1,  I2-1,I3+1) + U(I1,  I2+1,I3+1)
+     >                 +  U(I1-1,I2,  I3-1) + U(I1-1,I2,  I3+1)
+     >                 +  U(I1+1,I2,  I3-1) + U(I1+1,I2,  I3+1) )
+     >      -A(3)*( U(I1-1,I2-1,I3-1) + U(I1+1,I2-1,I3-1)
+     >                 +  U(I1-1,I2+1,I3-1) + U(I1+1,I2+1,I3-1)
+     >                 +  U(I1-1,I2-1,I3+1) + U(I1+1,I2-1,I3+1)
+     >                 +  U(I1-1,I2+1,I3+1) + U(I1+1,I2+1,I3+1) )
+C
+      RETURN
+      END
+! we want to check that predictive commoning did something on the
+! vectorized loop, which means we have to have exactly 13 vector
+! additions.
+! { dg-final { scan-tree-dump-times "vect_var\[^\\n\]*\\+ " 13 "optimized" } }
+! { dg-final { cleanup-tree-dump "vect" } }
+! { dg-final { cleanup-tree-dump "optimized" } }
index dbdd3237d5e02726e9600130d583f57ea2d44bc4..9f5a623b2fa9e161b85f7c0c6acfe9a0abe014d4 100644 (file)
@@ -79,6 +79,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "coretypes.h"
 #include "tm.h"
 #include "ggc.h"
+#include "flags.h"
 #include "tree.h"
 
 /* These RTL headers are needed for basic-block.h.  */
@@ -380,6 +381,19 @@ dump_data_dependence_relation (FILE *outf,
 
   if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
     {
+      if (ddr)
+       {
+         dra = DDR_A (ddr);
+         drb = DDR_B (ddr);
+         if (dra)
+           dump_data_reference (outf, dra);
+         else
+           fprintf (outf, "    (nil)\n");
+         if (drb)
+           dump_data_reference (outf, drb);
+         else
+           fprintf (outf, "    (nil)\n");
+       }
       fprintf (outf, "    (don't know)\n)\n");
       return;
     }
@@ -631,6 +645,24 @@ split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
 
        return split_constant_offset_1 (type, var0, subcode, var1, var, off);
       }
+    CASE_CONVERT:
+      {
+       /* We must not introduce undefined overflow, and we must not change the value.
+          Hence we're okay if the inner type doesn't overflow to start with
+          (pointer or signed), the outer type also is an integer or pointer
+          and the outer precision is at least as large as the inner.  */
+       tree itype = TREE_TYPE (op0);
+       if ((POINTER_TYPE_P (itype)
+            || (INTEGRAL_TYPE_P (itype) && TYPE_OVERFLOW_UNDEFINED (itype)))
+           && TYPE_PRECISION (type) >= TYPE_PRECISION (itype)
+           && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)))
+         {
+           split_constant_offset (op0, &var0, off);
+           *var = fold_convert (type, var0);
+           return true;
+         }
+       return false;
+      }
 
     default:
       return false;
index 78d45b88364cf906d583c58ba14d090a443b619a..f31c39203667dce87fcf9c05283144dd3ccdc548 100644 (file)
@@ -1180,6 +1180,7 @@ determine_roots_comp (struct loop *loop,
   unsigned i;
   dref a;
   chain_p chain = NULL;
+  double_int last_ofs = double_int_zero;
 
   /* Invariants are handled specially.  */
   if (comp->comp_step == RS_INVARIANT)
@@ -1194,13 +1195,20 @@ determine_roots_comp (struct loop *loop,
 
   for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
     {
-      if (!chain || !DR_IS_READ (a->ref))
+      if (!chain || !DR_IS_READ (a->ref)
+         || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE),
+                             double_int_add (a->offset,
+                                             double_int_neg (last_ofs))) <= 0)
        {
          if (nontrivial_chain_p (chain))
-           VEC_safe_push (chain_p, heap, *chains, chain);
+           {
+             add_looparound_copies (loop, chain);
+             VEC_safe_push (chain_p, heap, *chains, chain);
+           }
          else
            release_chain (chain);
          chain = make_rooted_chain (a);
+         last_ofs = a->offset;
          continue;
        }
 
@@ -1338,9 +1346,11 @@ ref_at_iteration (struct loop *loop, tree ref, int iter)
   else if (!INDIRECT_REF_P (ref))
     return unshare_expr (ref);
 
-  if (TREE_CODE (ref) == INDIRECT_REF)
+  if (INDIRECT_REF_P (ref))
     {
-      ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
+      /* Take care for INDIRECT_REF and MISALIGNED_INDIRECT_REF at
+         the same time.  */
+      ret = copy_node (ref);
       idx = TREE_OPERAND (ref, 0);
       idx_p = &TREE_OPERAND (ret, 0);
     }
@@ -2205,11 +2215,17 @@ reassociate_to_the_same_stmt (tree name1, tree name2)
   /* Insert the new statement combining NAME1 and NAME2 before S1, and
      combine it with the rhs of S1.  */
   var = create_tmp_var (type, "predreastmp");
+  if (TREE_CODE (type) == COMPLEX_TYPE
+      || TREE_CODE (type) == VECTOR_TYPE)
+    DECL_GIMPLE_REG_P (var) = 1;
   add_referenced_var (var);
   new_name = make_ssa_name (var, NULL);
   new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
 
   var = create_tmp_var (type, "predreastmp");
+  if (TREE_CODE (type) == COMPLEX_TYPE
+      || TREE_CODE (type) == VECTOR_TYPE)
+    DECL_GIMPLE_REG_P (var) = 1;
   add_referenced_var (var);
   tmp_name = make_ssa_name (var, NULL);
 
index 89918535372463f5f1a75333c5543b6638bfb46e..37ae9b5e3bf94153b4217c81196be4e968fe05e3 100644 (file)
@@ -294,7 +294,7 @@ vect_update_interleaving_chain (struct data_reference *drb,
 static bool
 vect_equal_offsets (tree offset1, tree offset2)
 {
-  bool res0, res1;
+  bool res;
 
   STRIP_NOPS (offset1);
   STRIP_NOPS (offset2);
@@ -303,16 +303,19 @@ vect_equal_offsets (tree offset1, tree offset2)
     return true;
 
   if (TREE_CODE (offset1) != TREE_CODE (offset2)
-      || !BINARY_CLASS_P (offset1)
-      || !BINARY_CLASS_P (offset2))
+      || (!BINARY_CLASS_P (offset1) && !UNARY_CLASS_P (offset1)))
     return false;
 
-  res0 = vect_equal_offsets (TREE_OPERAND (offset1, 0),
-                            TREE_OPERAND (offset2, 0));
-  res1 = vect_equal_offsets (TREE_OPERAND (offset1, 1),
-                            TREE_OPERAND (offset2, 1));
+  res = vect_equal_offsets (TREE_OPERAND (offset1, 0),
+                           TREE_OPERAND (offset2, 0));
 
-  return (res0 && res1);
+  if (!res || !BINARY_CLASS_P (offset1))
+    return res;
+
+  res = vect_equal_offsets (TREE_OPERAND (offset1, 1),
+                           TREE_OPERAND (offset2, 1));
+
+  return res;
 }
 
 
index a0d3ce57cab59679a5b7a6b374152dc13e1d3777..f4056b058912091abc85adf92a79d76b1db91e5b 100644 (file)
@@ -1961,7 +1961,8 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
    use TYPE_VECTOR_SUBPARTS.  */
 
 static tree
-vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
+vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters,
+                                tree *wide_prolog_niters)
 {
   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
@@ -2045,6 +2046,19 @@ vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters)
   add_referenced_var (var);
   stmts = NULL;
   iters_name = force_gimple_operand (iters, &stmts, false, var);
+  if (types_compatible_p (sizetype, niters_type))
+    *wide_prolog_niters = iters_name;
+  else
+    {
+      gimple_seq seq = NULL;
+      tree wide_iters = fold_convert (sizetype, iters);
+      var = create_tmp_var (sizetype, "prolog_loop_niters");
+      add_referenced_var (var);
+      *wide_prolog_niters = force_gimple_operand (wide_iters, &seq, false,
+                                                 var);
+      if (seq)
+       gimple_seq_add_seq (&stmts, seq);
+    }
 
   /* Insert stmt on loop preheader edge.  */
   if (stmts)
@@ -2115,6 +2129,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
   tree niters_of_prolog_loop, ni_name;
   tree n_iters;
+  tree wide_prolog_niters;
   struct loop *new_loop;
   unsigned int th = 0;
   int min_profitable_iters;
@@ -2125,7 +2140,8 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
   initialize_original_copy_tables ();
 
   ni_name = vect_build_loop_niters (loop_vinfo, NULL);
-  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name);
+  niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name,
+                                                          &wide_prolog_niters);
 
 
   /* Get profitability threshold for vectorized loop.  */
@@ -2150,7 +2166,7 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo)
                TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop);
 
   /* Update the init conditions of the access functions of all data refs.  */
-  vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop);
+  vect_update_inits_of_drs (loop_vinfo, wide_prolog_niters);
 
   /* After peeling we have to reset scalar evolution analyzer.  */
   scev_reset ();