Rename the tuning option and related functions to enable the Newton series for the...

author Evandro Menezes <e.menezes@samsung.com>

Fri, 26 Feb 2016 23:41:53 +0000 (23:41 +0000)

committer Evandro Menezes <evandro@gcc.gnu.org>

Fri, 26 Feb 2016 23:41:53 +0000 (23:41 +0000)
author Evandro Menezes <e.menezes@samsung.com>
Fri, 26 Feb 2016 23:41:53 +0000 (23:41 +0000)
committer Evandro Menezes <evandro@gcc.gnu.org>
Fri, 26 Feb 2016 23:41:53 +0000 (23:41 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index 471fea533e993ba72aadc790fa1f7829ae2ce8bf..8cece6bde41797d062ee64ef905bdbd23d7a4e14 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,23 @@
+2016-02-26  Evandro Menezes  <e.menezes@samsung.com>
+
+       Rename the AArch64 tuning option and related functions to enable the
+       Newton series for the reciprocal square root to reflect its
+       approximative characteristic.
+
+       gcc/
+       * config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename
+       function to "aarch64_emit_approx_rsqrt".
+       * config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to
+       AARCH64_EXTRA_TUNE_APPROX_RSQRT.
+       * config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name.
+       (xgene1_tunings): Likewise.
+       (use_rsqrt_p): Likewise.
+       (aarch64_emit_swrsqrt): Use new function name.
+       * config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise.
+       * config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the
+       text explaining this option.
+       * doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise.
+
  2016-02-26  Jakub Jelinek  <jakub@redhat.com>
  
         PR target/69969
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index 78870e275279b1409dfc98b8e46b89b97c76274e..acf2062245f12e049e0a8639ab6a5d95ca842e6d 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -360,8 +360,7 @@ void aarch64_emit_call_insn (rtx);
  void aarch64_register_pragmas (void);
  void aarch64_relayout_simd_types (void);
  void aarch64_reset_previous_fndecl (void);
-
-void aarch64_emit_swrsqrt (rtx, rtx);
+void aarch64_emit_approx_rsqrt (rtx, rtx);
  
  /* Initialize builtins for SIMD intrinsics.  */
  void init_aarch64_simd_builtins (void);
@@ -413,9 +412,7 @@ rtx aarch64_expand_builtin (tree exp,
                             machine_mode mode ATTRIBUTE_UNUSED,
                             int ignore ATTRIBUTE_UNUSED);
  tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
-
  tree aarch64_builtin_rsqrt (unsigned int);
-
  tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
  
  extern void aarch64_split_combinev16qi (rtx operands[3]);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index d8497abdb515cdb048ee13a06e4decfc16cdcb36..bd73bce64414e8bc01732d14311d742cf28f4586 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -405,7 +405,7 @@
                      UNSPEC_RSQRT))]
    "TARGET_SIMD"
  {
-  aarch64_emit_swrsqrt (operands[0], operands[1]);
+  aarch64_emit_approx_rsqrt (operands[0], operands[1]);
    DONE;
  })
  
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def

index 8036cfe1ee7c71868e5244d3ceb8c64a22e3cbbd..7e45a0c735dfbaaa8d078fd49ee178daa44ac908 100644 (file)
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -29,5 +29,5 @@
       AARCH64_TUNE_ to give an enum name. */
  
  AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
-AARCH64_EXTRA_TUNING_OPTION ("recip_sqrt", RECIP_SQRT)
+AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
  
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c

index 3519c7bf3abd5c6016f21930b209f067ac6adc09..801f95ab7fd6dd88a9ae44e81b980100e3dcbcf0 100644 (file)
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -538,7 +538,7 @@ static const struct tune_params exynosm1_tunings =
    48,  /* max_case_values.  */
    64,  /* cache_line_size.  */
    tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags.  */
  };
  
  static const struct tune_params thunderx_tunings =
@@ -586,7 +586,7 @@ static const struct tune_params xgene1_tunings =
    0,   /* max_case_values.  */
    0,   /* cache_line_size.  */
    tune_params::AUTOPREFETCHER_OFF,     /* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_RECIP_SQRT)      /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_APPROX_RSQRT)    /* tune_flags.  */
  };
  
  /* Support for fine-grained override of the tuning structures.  */
@@ -7460,8 +7460,8 @@ aarch64_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
    return aarch64_tune_params.memmov_cost;
  }
  
-/* Return true if it is safe and beneficial to use the rsqrt optabs to
-   optimize 1.0/sqrt.  */
+/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
+   to optimize 1.0/sqrt.  */
  
  static bool
  use_rsqrt_p (void)
@@ -7469,12 +7469,12 @@ use_rsqrt_p (void)
    return (!flag_trapping_math
           && flag_unsafe_math_optimizations
           && ((aarch64_tune_params.extra_tuning_flags
-              & AARCH64_EXTRA_TUNE_RECIP_SQRT)
+              & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
               || flag_mrecip_low_precision_sqrt));
  }
  
-/* Function to decide when to use
-   reciprocal square root builtins.  */
+/* Function to decide when to use the approximate reciprocal square root
+   builtin.  */
  
  static tree
  aarch64_builtin_reciprocal (tree fndecl)
@@ -7522,12 +7522,12 @@ get_rsqrts_type (machine_mode mode)
    }
  }
  
-/* Emit instruction sequence to compute
-   reciprocal square root.  Use two Newton-Raphson steps
-   for single precision and three for double precision.  */
+/* Emit instruction sequence to compute the reciprocal square root using the
+   Newton-Raphson series.  Iterate over the series twice for SF
+   and thrice for DF.  */
  
  void
-aarch64_emit_swrsqrt (rtx dst, rtx src)
+aarch64_emit_approx_rsqrt (rtx dst, rtx src)
  {
    machine_mode mode = GET_MODE (src);
    gcc_assert (
@@ -7544,6 +7544,7 @@ aarch64_emit_swrsqrt (rtx dst, rtx src)
  
    int iterations = double_mode ? 3 : 2;
  
+  /* Optionally iterate over the series one less time than otherwise.  */
    if (flag_mrecip_low_precision_sqrt)
      iterations--;
  
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt

index 5cbd4cd04b9d1a55c6b98db580a4969b3c27565c..49ef0c64e2b05261534d445fecb1d6a8d5946f4d 100644 (file)
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -151,5 +151,5 @@ PC relative literal loads.
  
  mlow-precision-recip-sqrt
  Common Var(flag_mrecip_low_precision_sqrt) Optimization
-When calculating a sqrt approximation, run fewer steps.
-This reduces precision, but can result in faster computation.
+When calculating the reciprocal square root approximation,
+uses one less step than otherwise, thus reducing latency and precision.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi

index 18b2b8f31075b6286d63d1f489c2769daac6cec5..4b5df0b7edd03a8a969258dcf3432c73596e2968 100644 (file)
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -12884,12 +12884,10 @@ corresponding flag to the linker.
  @item -mno-low-precision-recip-sqrt
  @opindex -mlow-precision-recip-sqrt
  @opindex -mno-low-precision-recip-sqrt
-The square root estimate uses two steps instead of three for double-precision,
-and one step instead of two for single-precision.
-Thus reducing latency and precision.
-This is only relevant if @option{-ffast-math} activates
-reciprocal square root estimate instructions.
-Which in turn depends on the target processor.
+When calculating the reciprocal square root approximation,
+uses one less step than otherwise, thus reducing latency and precision.
+This is only relevant if @option{-ffast-math} enables the reciprocal square root
+approximation, which in turn depends on the target processor.
  
  @item -march=@var{name}
  @opindex march
author	Evandro Menezes <e.menezes@samsung.com>
	Fri, 26 Feb 2016 23:41:53 +0000 (23:41 +0000)
committer	Evandro Menezes <evandro@gcc.gnu.org>
	Fri, 26 Feb 2016 23:41:53 +0000 (23:41 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-tuning-flags.def		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.c		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.opt		patch \| blob \| blame \| history
gcc/doc/invoke.texi		patch \| blob \| blame \| history