From 0a2b23511f6a851818b615ac45b56e96b9779afa Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Tue, 28 Jul 2020 15:55:47 +0200 Subject: [PATCH] nvptx: Support floating point reciprocal instructions The following patch addds support for PTX's rcp.rn.f32 and rcp.rn.f64 instructions. Note that the "rcp.rn" forms of this instruction calculate the fully IEEE compliant result for the reciprocal, unlike the rcp.approx variants that just provide fast approximations. This patch has been tested on nvptx-none hosted on x86_64-pc-linux-gnu with "make" and "make check" with no new regressions. 2020-07-12 Roger Sayle gcc/ChangeLog: * config/nvptx/nvptx.md (recip2): New instruction. gcc/testsuite/ChangeLog: * gcc.target/nvptx/recip-1.c: New test. --- gcc/config/nvptx/nvptx.md | 9 +++++++++ gcc/testsuite/gcc.target/nvptx/recip-1.c | 18 ++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 gcc/testsuite/gcc.target/nvptx/recip-1.c diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 0538e834a4c2..746d6ec4124a 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -879,6 +879,15 @@ "" "%.\\tfma%#%t0\\t%0, %1, %2, %3;") +(define_insn "*recip2" + [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") + (div:SDFM + (match_operand:SDFM 2 "const_double_operand" "F") + (match_operand:SDFM 1 "nvptx_register_operand" "R")))] + "CONST_DOUBLE_P (operands[2]) + && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)" + "%.\\trcp%#%t0\\t%0, %1;") + (define_insn "div3" [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R") (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R") diff --git a/gcc/testsuite/gcc.target/nvptx/recip-1.c b/gcc/testsuite/gcc.target/nvptx/recip-1.c new file mode 100644 index 000000000000..18127876046f --- /dev/null +++ b/gcc/testsuite/gcc.target/nvptx/recip-1.c @@ -0,0 +1,18 @@ +/* { dg-do assemble } */ +/* { dg-options "-O2 -save-temps" } */ + +double +foo (double x) +{ + return 1.0 / x; +} + +float +foof (float x) +{ + return 1.0f / x; +} + +/* { dg-final { scan-assembler-times "rcp.rn.f64" 1 } } */ +/* { dg-final { scan-assembler-times "rcp.rn.f32" 1 } } */ + -- 2.47.2