Add support for vectorized fma.

author Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>

Tue, 11 Sep 2012 12:53:00 +0000 (12:53 +0000)

committer Ramana Radhakrishnan <ramana@gcc.gnu.org>

Tue, 11 Sep 2012 12:53:00 +0000 (12:53 +0000)
author Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
Tue, 11 Sep 2012 12:53:00 +0000 (12:53 +0000)
committer Ramana Radhakrishnan <ramana@gcc.gnu.org>
Tue, 11 Sep 2012 12:53:00 +0000 (12:53 +0000)
diff --git a/gcc/ChangeLog b/gcc/ChangeLog

index b1ac091e2bc6311a02e1a67a9c9f05cc1368b566..e65785a9e3121f70c3b4d5c2101fbd7e05e38fe2 100644 (file)
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,10 @@
+2012-09-11  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+            Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+       * config/arm/neon.md (fma<VCVTF:mode>4): New pattern.
+       (*fmsub<VCVTF:mode>4): Likewise.
+       * doc/sourcebuild.texi (arm_neon_v2_ok, arm_neon_v2_hw):  Document it.
+
  2012-09-11  Aldy Hernandez  <aldyh@redhat.com>
  
         PR middle-end/54149
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md

index faf909078b44999b4f6b8cb93f9a42e7f9bd2010..fe0618c225858624d697c981f655bfe776a6a9b0 100644 (file)
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -707,6 +707,33 @@
                                      (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
  )
  
+;; Fused multiply-accumulate
+(define_insn "fma<VCVTF:mode>4"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
+                (match_operand:VCVTF 2 "register_operand" "w")
+                (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+       (if_then_else (match_test "<Is_d_reg>")
+                     (const_string "neon_fp_vmla_ddd")
+                     (const_string "neon_fp_vmla_qqq")))]
+)
+
+(define_insn "*fmsub<VCVTF:mode>4"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
+                  (match_operand:VCVTF 2 "register_operand" "w")
+                  (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+       (if_then_else (match_test "<Is_d_reg>")
+                     (const_string "neon_fp_vmla_ddd")
+                     (const_string "neon_fp_vmla_qqq")))]
+)
+
  (define_insn "ior<mode>3"
    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
         (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi

index 7e9dbe31c2d96b10ece1383af2746f0574435cbc..3fe52ad2356448dd16d5315164356f511759f238 100644 (file)
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -1525,11 +1525,19 @@ ARM target supports generating NEON instructions.
  @item arm_neon_hw
  Test system supports executing NEON instructions.
  
+@item arm_neonv2_hw
+Test system supports executing NEON v2 instructions.
+
  @item arm_neon_ok
  @anchor{arm_neon_ok}
  ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible
  options.  Some multilibs may be incompatible with these options.
  
+@item arm_neonv2_ok
+@anchor{arm_neon_ok}
+ARM Target supports @code{-mfpu=neon -mfloat-abi=softfp} or compatible
+options.  Some multilibs may be incompatible with these options.
+
  @item arm_neon_fp16_ok
  @anchor{arm_neon_fp16_ok}
  ARM Target supports @code{-mfpu=neon-fp16 -mfloat-abi=softfp} or compatible
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog

index a68b6230fe6843a0d0c86fd92ae44a8243836837..47def6320280526c7cade82d32d952e7673eb48d 100644 (file)
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,18 @@
+2012-09-11  Ramana Radhakrishnan  <ramana.radhakrishnan@arm.com>
+           Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
+
+       * gcc.target/arm/neon-vfma-1.c: New testcase.
+       * gcc.target/arm/neon-vfms-1.c: Likewise.
+       * gcc.target/arm/neon-vmla-1.c: Update test to use int instead
+       of float.
+       * gcc.target/arm/neon-vmls-1.c: Likewise.
+       * lib/target-supports.exp (add_options_for_arm_neonv2): New
+       function.
+       (check_effective_target_arm_neonv2_ok_nocache): Likewise.
+       (check_effective_target_arm_neonv2_ok): Likewise.
+       (check_effective_target_arm_neonv2_hw): Likewise.
+       (check_effective_target_arm_neonv2): Likewise.
+
  2012-09-11  Richard Guenther  <rguenther@suse.de>
  
         PR middle-end/54515
diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c

new file mode 100644 (file)

index 0000000..a003a82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-add-options arm_neonv2 } */
+/* { dg-final { scan-assembler "vfma\\.f32\[   \]+\[dDqQ]" } } */
+
+/* Verify that VFMA is used.  */
+void f1(int n, float a, float x[], float y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = a * x[i] + y[i];
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c

new file mode 100644 (file)

index 0000000..8cefd8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-add-options arm_neonv2 } */
+/* { dg-final { scan-assembler "vfms\\.f32\[   \]+\[dDqQ]" } } */
+
+/* Verify that VFMS is used.  */
+void f1(int n, float a, float x[], float y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = a * -x[i] + y[i];
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c

index 9d239ed47d09c95a57cb68545e34eaa322d92f3b..c60c014e0c2fd3dee903a56f5af3029bb329e546 100644 (file)
--- a/gcc/testsuite/gcc.target/arm/neon-vmla-1.c
+++ b/gcc/testsuite/gcc.target/arm/neon-vmla-1.c
@@ -1,10 +1,10 @@
  /* { dg-require-effective-target arm_neon_hw } */
  /* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
  /* { dg-add-options arm_neon } */
-/* { dg-final { scan-assembler "vmla\\.f32" } } */
+/* { dg-final { scan-assembler "vmla\\.i32" } } */
  
  /* Verify that VMLA is used.  */
-void f1(int n, float a, float x[], float y[]) {
+void f1(int n, int a, int x[], int y[]) {
    int i;
    for (i = 0; i < n; ++i)
      y[i] = a * x[i] + y[i];
diff --git a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c

index 2beaebe17cf4ce0c210c3e04a31091aeb505982c..89ee82b0fe81093722bbefd7fa1d410cceeb0ee1 100644 (file)
--- a/gcc/testsuite/gcc.target/arm/neon-vmls-1.c
+++ b/gcc/testsuite/gcc.target/arm/neon-vmls-1.c
@@ -1,10 +1,10 @@
  /* { dg-require-effective-target arm_neon_hw } */
  /* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
  /* { dg-add-options arm_neon } */
-/* { dg-final { scan-assembler "vmls\\.f32" } } */
+/* { dg-final { scan-assembler "vmls\\.i32" } } */
  
  /* Verify that VMLS is used.  */
-void f1(int n, float a, float x[], float y[]) {
+void f1(int n, int a, int x[], int y[]) {
    int i;
    for (i = 0; i < n; ++i)
      y[i] = y[i] - a * x[i];
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp

index f597316ac4dbcd73bcecdf1179bb4485cbe1444a..8f793b7e50995b45d3ae5ef4d232afca159980af 100644 (file)
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2099,6 +2099,19 @@ proc add_options_for_arm_neon { flags } {
      return "$flags $et_arm_neon_flags"
  }
  
+# Add the options needed for NEON.  We need either -mfloat-abi=softfp
+# or -mfloat-abi=hard, but if one is already specified by the
+# multilib, use it.  Similarly, if a -mfpu option already enables
+# NEON, do not add -mfpu=neon.
+
+proc add_options_for_arm_neonv2 { flags } {
+    if { ! [check_effective_target_arm_neonv2_ok] } {
+       return "$flags"
+    }
+    global et_arm_neonv2_flags
+    return "$flags $et_arm_neonv2_flags"
+}
+
  # Return 1 if this is an ARM target supporting -mfpu=neon
  # -mfloat-abi=softfp or equivalent options.  Some multilibs may be
  # incompatible with these options.  Also set et_arm_neon_flags to the
@@ -2127,6 +2140,38 @@ proc check_effective_target_arm_neon_ok { } {
                 check_effective_target_arm_neon_ok_nocache]
  }
  
+# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
+# -mfloat-abi=softfp or equivalent options.  Some multilibs may be
+# incompatible with these options.  Also set et_arm_neonv2_flags to the
+# best options to add.
+
+proc check_effective_target_arm_neonv2_ok_nocache { } {
+    global et_arm_neonv2_flags
+    set et_arm_neonv2_flags ""
+    if { [check_effective_target_arm32] } {
+       foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
+           if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
+               #include "arm_neon.h"
+               float32x2_t 
+               foo (float32x2_t a, float32x2_t b, float32x2_t c)
+                {
+                  return vfma_f32 (a, b, c);
+                }
+           } "$flags"] } {
+               set et_arm_neonv2_flags $flags
+               return 1
+           }
+       }
+    }
+
+    return 0
+}
+
+proc check_effective_target_arm_neonv2_ok { } {
+    return [check_cached_effective_target arm_neonv2_ok \
+               check_effective_target_arm_neonv2_ok_nocache]
+}
+
  # Add the options needed for NEON.  We need either -mfloat-abi=softfp
  # or -mfloat-abi=hard, but if one is already specified by the
  # multilib, use it.
@@ -2318,6 +2363,21 @@ proc check_effective_target_arm_neon_hw { } {
      } [add_options_for_arm_neon ""]]
  }
  
+proc check_effective_target_arm_neonv2_hw { } {
+    return [check_runtime arm_neon_hwv2_available {
+       #include "arm_neon.h"
+       int
+       main (void)
+       {
+         float32x2_t a, b, c;
+         asm ("vfma.f32 %P0, %P1, %P2"
+              : "=w" (a)
+              : "w" (b), "w" (c));
+         return 0;
+       }
+    } [add_options_for_arm_neonv2 ""]]
+}
+
  # Return 1 if this is a ARM target with NEON enabled.
  
  proc check_effective_target_arm_neon { } {
@@ -2334,6 +2394,24 @@ proc check_effective_target_arm_neon { } {
      }
  }
  
+proc check_effective_target_arm_neonv2 { } {
+    if { [check_effective_target_arm32] } {
+       return [check_no_compiler_messages arm_neon object {
+           #ifndef __ARM_NEON__
+           #error not NEON
+           #else
+           #ifndef __ARM_FEATURE_FMA
+           #error not NEONv2
+            #else
+           int dummy;
+           #endif
+           #endif
+       }]
+    } else {
+       return 0
+    }
+}
+
  # Return 1 if this a Loongson-2E or -2F target using an ABI that supports
  # the Loongson vector modes.
author	Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
	Tue, 11 Sep 2012 12:53:00 +0000 (12:53 +0000)
committer	Ramana Radhakrishnan <ramana@gcc.gnu.org>
	Tue, 11 Sep 2012 12:53:00 +0000 (12:53 +0000)
gcc/ChangeLog		patch \| blob \| blame \| history
gcc/config/arm/neon.md		patch \| blob \| blame \| history
gcc/doc/sourcebuild.texi		patch \| blob \| blame \| history
gcc/testsuite/ChangeLog		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/arm/neon-vfma-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-vfms-1.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/arm/neon-vmla-1.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/arm/neon-vmls-1.c		patch \| blob \| blame \| history
gcc/testsuite/lib/target-supports.exp		patch \| blob \| blame \| history