]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
l_fma_float_?.c: Update.
authorJan Hubicka <jh@suse.cz>
Tue, 6 Nov 2012 13:49:30 +0000 (14:49 +0100)
committerJan Hubicka <hubicka@gcc.gnu.org>
Tue, 6 Nov 2012 13:49:30 +0000 (13:49 +0000)
* gcc.target/i386/l_fma_float_?.c: Update.
* gcc.target/i386/l_fma_double_?.c: Update.

* tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
vect_do_peeling_for_alignment): Fix loop bound computation.
* tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.

From-SVN: r193241

16 files changed:
gcc/ChangeLog
gcc/testsuite/ChangeLog
gcc/testsuite/gcc.target/i386/l_fma_double_1.c
gcc/testsuite/gcc.target/i386/l_fma_double_2.c
gcc/testsuite/gcc.target/i386/l_fma_double_3.c
gcc/testsuite/gcc.target/i386/l_fma_double_4.c
gcc/testsuite/gcc.target/i386/l_fma_double_5.c
gcc/testsuite/gcc.target/i386/l_fma_double_6.c
gcc/testsuite/gcc.target/i386/l_fma_float_1.c
gcc/testsuite/gcc.target/i386/l_fma_float_2.c
gcc/testsuite/gcc.target/i386/l_fma_float_3.c
gcc/testsuite/gcc.target/i386/l_fma_float_4.c
gcc/testsuite/gcc.target/i386/l_fma_float_5.c
gcc/testsuite/gcc.target/i386/l_fma_float_6.c
gcc/tree-vect-loop-manip.c
gcc/tree-vect-loop.c

index b22409ec0a37dd3ef6ca304b8e6eedc98dd2ca72..ad525d3d2d65cc33987eafc47161386d9de1fac6 100644 (file)
@@ -1,3 +1,9 @@
+2012-11-06  Jan Hubicka  <jh@suse.cz>
+
+       * tree-vect-loop-manip.c (vect_do_peeling_for_loop_bound,
+       vect_do_peeling_for_alignment): Fix loop bound computation.
+       * tree-vect-loop.c (vect_transform_loop): Maintain loop bounds.
+
 2012-11-06  Oleg Endo  <olegendo@gcc.gnu.org>
 
        PR target/54089
index 9ff850068308c6bac8f522067dc34b4b6e14edef..62f9f0c2783a35ca7ffc6b2e832c97e8c5a5bf01 100644 (file)
@@ -1,3 +1,8 @@
+2012-11-06  Jan Hubicka  <jh@suse.cz>
+
+       * gcc.target/i386/l_fma_float_?.c: Update.
+       * gcc.target/i386/l_fma_double_?.c: Update.
+
 2012-11-06  Oleg Endo  <olegendo@gcc.gnu.org>
 
        PR target/54089
index 716acfef65ceef0826049f9ab0135c26ca01a00f..270659359f497c0d4e3e3a46385ed0bd9e1896b0 100644 (file)
 /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfmadd213sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfmsub213sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213sd" 20  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmadd213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmsub213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmadd213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmsub213sd" 16  } } */
index 01173afb223176842f3003b8e055e49c3a68e3bf..e8933e25d534d67666ea6aa2a074c6b707591b8e 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 40  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
index 8cda521a8701d0fb79737df9401dabe1532f3163..00c756775c81ea1c30b0e9a30fea2df5fccb43e1 100644 (file)
 /* { dg-final { scan-assembler-times "vfnmadd231pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231pd" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfmadd213sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfmsub213sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 20  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213sd" 20  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmadd213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfmsub213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmadd213sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 16  } } */
+/* { dg-final { scan-assembler-times "vfnmsub213sd" 16  } } */
index 9f2331b51e827333a01326017b84255da3270b75..09970bdb5c64d469b590b4df7d164180fc5d8710 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 40  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
index 9e33975b1e454c4f8ca720ce73560f5ad308e502..2a1428e4d9c9c0d09a2e4257a15215ed39c41b95 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 40  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
index 28d264dd20d2528deb04efb0af7c5510466ed9b4..092032aa0b5bcd0c604e72f203bcc5b82beb2f51 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132pd" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132pd" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132sd" 40 } } */
-/* { dg-final { scan-assembler-times "vfmsub132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132sd" 40  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132sd" 40  } } */
+/* { dg-final { scan-assembler-times "vfmadd132sd" 32 } } */
+/* { dg-final { scan-assembler-times "vfmsub132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132sd" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132sd" 32  } } */
index fea0b20619d858e104206184dc76a76ef1010b20..4bcd81de9da4e234d91f43246100395754b7d324 100644 (file)
 /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfmadd213ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfmsub213ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213ss" 36  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub213ss" 32  } } */
index dd5f543f58c6d4bf15f563c99f587835fa32beba..34b7fcb6dd5b89f0320ac756bbae1ce62d0bd1e5 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 72  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
index 38853353b017c8b370c00f1ebcec351fb7e5c13c..6ff2c6eacd54ea9048e750a7f5b60b74dcd05039 100644 (file)
 /* { dg-final { scan-assembler-times "vfnmadd231ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 4  } } */
 /* { dg-final { scan-assembler-times "vfnmsub231ps" 4  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfmadd213ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfmsub213ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmadd213ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 36  } } */
-/* { dg-final { scan-assembler-times "vfnmsub213ss" 36  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmadd213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfmsub213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmadd213ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 32  } } */
+/* { dg-final { scan-assembler-times "vfnmsub213ss" 32  } } */
index 5a7bb217836041ba276d9b9d766e4d331fe2ba24..39548bfa76b74f6e83e8139e92b535ad727d9a24 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 72  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
index 0b0454ed336d3d11263153ee4c5271b2e67280af..83d795125921b5cdc2a8216c7ed140fca3d3aaf0 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 72  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
index 03bf8e84835816ccf3dfd0a918b2ea0455664fb1..1eefc817c3681325d53266c1d62c46962820fe06 100644 (file)
@@ -12,7 +12,7 @@
 /* { dg-final { scan-assembler-times "vfmsub132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmadd132ps" 8  } } */
 /* { dg-final { scan-assembler-times "vfnmsub132ps" 8  } } */
-/* { dg-final { scan-assembler-times "vfmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfmsub132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmadd132ss" 72  } } */
-/* { dg-final { scan-assembler-times "vfnmsub132ss" 72  } } */
+/* { dg-final { scan-assembler-times "vfmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfmsub132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmadd132ss" 64  } } */
+/* { dg-final { scan-assembler-times "vfnmsub132ss" 64  } } */
index 3c356e35b6fd44206ac4647f6d152e87d881cd2c..58ded23399e16f9fe3b1cbc01b0221eff4a2a099 100644 (file)
@@ -1954,9 +1954,16 @@ vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio,
      by ratio_mult_vf_name steps.  */
   vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e);
 
-  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+  /* For vectorization factor N, we need to copy last N-1 values in epilogue
+     and this means N-2 loopback edge executions.
+
+     PEELING_FOR_GAPS works by subtracting last iteration and thus the epilogue
+     will execute at least LOOP_VINFO_VECT_FACTOR times.  */
+  max_iter = (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+             ? LOOP_VINFO_VECT_FACTOR (loop_vinfo) * 2
+             : LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 2;
   if (check_profitability)
-    max_iter = MAX (max_iter, (int) th);
+    max_iter = MAX (max_iter, (int) th - 1);
   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
   dump_printf (MSG_OPTIMIZED_LOCATIONS,
                "Setting upper bound of nb iterations for epilogue "
@@ -2186,9 +2193,11 @@ vect_do_peeling_for_alignment (loop_vec_info loop_vinfo,
 #ifdef ENABLE_CHECKING
   slpeel_verify_cfg_after_peeling (new_loop, loop);
 #endif
-  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 1;
+  /* For vectorization factor N, we need to copy at most N-1 values 
+     for alignment and this means N-2 loopback edge executions.  */
+  max_iter = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - 2;
   if (check_profitability)
-    max_iter = MAX (max_iter, (int) th);
+    max_iter = MAX (max_iter, (int) th - 1);
   record_niter_bound (new_loop, double_int::from_shwi (max_iter), false, true);
   dump_printf (MSG_OPTIMIZED_LOCATIONS,
                "Setting upper bound of nb iterations for prologue "
index 908caed0b57137b36a12447401e73d92e1ee535b..5e99857efd6f40bbb66c0516da07a9b7b01f6bc3 100644 (file)
@@ -5448,10 +5448,16 @@ vect_transform_loop (loop_vec_info loop_vinfo)
   bool transform_pattern_stmt = false;
   bool check_profitability = false;
   int th;
+  /* Record number of iterations before we started tampering with the profile. */
+  gcov_type expected_iterations = expected_loop_iterations_unbounded (loop);
 
   if (dump_enabled_p ())
     dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===");
 
+  /* If profile is inprecise, we have chance to fix it up.  */
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+    expected_iterations = LOOP_VINFO_INT_NITERS (loop_vinfo);
+
   /* Use the more conservative vectorization threshold.  If the number
      of iterations is constant assume the cost check has been performed
      by our caller.  If the threshold makes all loops profitable that
@@ -5735,6 +5741,25 @@ vect_transform_loop (loop_vec_info loop_vinfo)
 
   slpeel_make_loop_iterate_ntimes (loop, ratio);
 
+  /* Reduce loop iterations by the vectorization factor.  */
+  scale_loop_profile (loop, RDIV (REG_BR_PROB_BASE , vectorization_factor),
+                     expected_iterations / vectorization_factor);
+  loop->nb_iterations_upper_bound
+    = loop->nb_iterations_upper_bound.udiv (double_int::from_uhwi (vectorization_factor),
+                                           FLOOR_DIV_EXPR);
+  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+      && loop->nb_iterations_upper_bound != double_int_zero)
+    loop->nb_iterations_upper_bound = loop->nb_iterations_upper_bound - double_int_one;
+  if (loop->any_estimate)
+    {
+      loop->nb_iterations_estimate
+        = loop->nb_iterations_estimate.udiv (double_int::from_uhwi (vectorization_factor),
+                                            FLOOR_DIV_EXPR);
+       if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
+          && loop->nb_iterations_estimate != double_int_zero)
+        loop->nb_iterations_estimate = loop->nb_iterations_estimate - double_int_one;
+    }
+
   /* The memory tags and pointers in vectorized statements need to
      have their SSA forms updated.  FIXME, why can't this be delayed
      until all the loops have been transformed?  */