+2016-02-26 Evandro Menezes <e.menezes@samsung.com>
+
+ Rename the AArch64 tuning option and related functions to enable the
+ Newton series for the reciprocal square root to reflect its
+ approximative characteristic.
+
+ gcc/
+ * config/aarch64/aarch64-protos.h (aarch64_emit_swrsqrt): Rename
+ function to "aarch64_emit_approx_rsqrt".
+ * config/aarch64/aarch64-tuning-flags.def: Rename tuning flag to
+ AARCH64_EXTRA_TUNE_APPROX_RSQRT.
+ * config/aarch64/aarch64.c (exynosm1_tunigs): Use new flag name.
+ (xgene1_tunings): Likewise.
+ (use_rsqrt_p): Likewise.
+ (aarch64_emit_swrsqrt): Use new function name.
+ * config/aarch64/aarch64-simd.md (aarch64_rsqrts_*): Likewise.
+ * config/aarch64/aarch64.opt (mlow-precision-recip-sqrt): Reword the
+ text explaining this option.
+ * doc/invoke.texi (-mlow-precision-recip-sqrt): Likewise.
+
2016-02-26 Jakub Jelinek <jakub@redhat.com>
PR target/69969
void aarch64_register_pragmas (void);
void aarch64_relayout_simd_types (void);
void aarch64_reset_previous_fndecl (void);
-
-void aarch64_emit_swrsqrt (rtx, rtx);
+void aarch64_emit_approx_rsqrt (rtx, rtx);
/* Initialize builtins for SIMD intrinsics. */
void init_aarch64_simd_builtins (void);
machine_mode mode ATTRIBUTE_UNUSED,
int ignore ATTRIBUTE_UNUSED);
tree aarch64_builtin_decl (unsigned, bool ATTRIBUTE_UNUSED);
-
tree aarch64_builtin_rsqrt (unsigned int);
-
tree aarch64_builtin_vectorized_function (unsigned int, tree, tree);
extern void aarch64_split_combinev16qi (rtx operands[3]);
UNSPEC_RSQRT))]
"TARGET_SIMD"
{
- aarch64_emit_swrsqrt (operands[0], operands[1]);
+ aarch64_emit_approx_rsqrt (operands[0], operands[1]);
DONE;
})
AARCH64_TUNE_ to give an enum name. */
AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
-AARCH64_EXTRA_TUNING_OPTION ("recip_sqrt", RECIP_SQRT)
+AARCH64_EXTRA_TUNING_OPTION ("approx_rsqrt", APPROX_RSQRT)
48, /* max_case_values. */
64, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
};
static const struct tune_params thunderx_tunings =
0, /* max_case_values. */
0, /* cache_line_size. */
tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
- (AARCH64_EXTRA_TUNE_RECIP_SQRT) /* tune_flags. */
+ (AARCH64_EXTRA_TUNE_APPROX_RSQRT) /* tune_flags. */
};
/* Support for fine-grained override of the tuning structures. */
return aarch64_tune_params.memmov_cost;
}
-/* Return true if it is safe and beneficial to use the rsqrt optabs to
- optimize 1.0/sqrt. */
+/* Return true if it is safe and beneficial to use the approximate rsqrt optabs
+ to optimize 1.0/sqrt. */
static bool
use_rsqrt_p (void)
return (!flag_trapping_math
&& flag_unsafe_math_optimizations
&& ((aarch64_tune_params.extra_tuning_flags
- & AARCH64_EXTRA_TUNE_RECIP_SQRT)
+ & AARCH64_EXTRA_TUNE_APPROX_RSQRT)
|| flag_mrecip_low_precision_sqrt));
}
-/* Function to decide when to use
- reciprocal square root builtins. */
+/* Function to decide when to use the approximate reciprocal square root
+ builtin. */
static tree
aarch64_builtin_reciprocal (tree fndecl)
}
}
-/* Emit instruction sequence to compute
- reciprocal square root. Use two Newton-Raphson steps
- for single precision and three for double precision. */
+/* Emit instruction sequence to compute the reciprocal square root using the
+ Newton-Raphson series. Iterate over the series twice for SF
+ and thrice for DF. */
void
-aarch64_emit_swrsqrt (rtx dst, rtx src)
+aarch64_emit_approx_rsqrt (rtx dst, rtx src)
{
machine_mode mode = GET_MODE (src);
gcc_assert (
int iterations = double_mode ? 3 : 2;
+ /* Optionally iterate over the series one less time than otherwise. */
if (flag_mrecip_low_precision_sqrt)
iterations--;
mlow-precision-recip-sqrt
Common Var(flag_mrecip_low_precision_sqrt) Optimization
-When calculating a sqrt approximation, run fewer steps.
-This reduces precision, but can result in faster computation.
+When calculating the reciprocal square root approximation,
+uses one less step than otherwise, thus reducing latency and precision.
@item -mno-low-precision-recip-sqrt
@opindex -mlow-precision-recip-sqrt
@opindex -mno-low-precision-recip-sqrt
-The square root estimate uses two steps instead of three for double-precision,
-and one step instead of two for single-precision.
-Thus reducing latency and precision.
-This is only relevant if @option{-ffast-math} activates
-reciprocal square root estimate instructions.
-Which in turn depends on the target processor.
+When calculating the reciprocal square root approximation,
+uses one less step than otherwise, thus reducing latency and precision.
+This is only relevant if @option{-ffast-math} enables the reciprocal square root
+approximation, which in turn depends on the target processor.
@item -march=@var{name}
@opindex march