aarch64: Optimize AND with certain vector of immediates as FMOV [PR100165]

author Pengxuan Zheng <quic_pzheng@quicinc.com>

Mon, 12 May 2025 17:12:11 +0000 (10:12 -0700)

committer Pengxuan Zheng <quic_pzheng@quicinc.com>

Fri, 16 May 2025 18:25:08 +0000 (11:25 -0700)
author Pengxuan Zheng <quic_pzheng@quicinc.com>
Mon, 12 May 2025 17:12:11 +0000 (10:12 -0700)
committer Pengxuan Zheng <quic_pzheng@quicinc.com>
Fri, 16 May 2025 18:25:08 +0000 (11:25 -0700)
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index b59eecf5bdffe5add0eacc4346e34d23941a4e98..8f37e56d440e950d331e61484004f215a7802612 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -933,6 +933,7 @@ char *aarch64_output_simd_mov_imm (rtx, unsigned);
  char *aarch64_output_simd_orr_imm (rtx, unsigned);
  char *aarch64_output_simd_and_imm (rtx, unsigned);
  char *aarch64_output_simd_xor_imm (rtx, unsigned);
+char *aarch64_output_fmov (rtx);
  
  char *aarch64_output_sve_mov_immediate (rtx);
  char *aarch64_output_sve_ptrues (rtx);
@@ -948,6 +949,7 @@ bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
  bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
  bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
  bool aarch64_simd_valid_and_imm (rtx);
+bool aarch64_simd_valid_and_imm_fmov (rtx, unsigned int * = NULL);
  bool aarch64_simd_valid_mov_imm (rtx);
  bool aarch64_simd_valid_orr_imm (rtx);
  bool aarch64_simd_valid_xor_imm (rtx);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 1099e742cbf7418b75a16ec68f4243fc55e96506..6e30dc48934c3094afaf70d99d0fc0f4fb771f8d 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1117,17 +1117,17 @@
    [(set_attr "type" "neon_fp_abd_<stype><q>")]
  )
  
-;; For AND (vector, register) and BIC (vector, immediate)
+;; For AND (vector, register), BIC (vector, immediate) and FMOV (register)
  (define_insn "and<mode>3<vczle><vczbe>"
    [(set (match_operand:VDQ_I 0 "register_operand")
         (and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
                    (match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
    "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2   ]
-     [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
-     [ w        , 0 , Db  ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
+  {@ [ cons: =0 , 1 , 2  ; attrs: type   ]
+     [ w        , w , w  ; neon_logic<q> ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+     [ w        , w , Df ; fmov          ] << aarch64_output_fmov (operands[2]);
+     [ w        , 0 , Db ; neon_logic<q> ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
    }
-  [(set_attr "type" "neon_logic<q>")]
  )
  
  ;; For ORR (vector, register) and ORR (vector, immediate)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 34f9725485d2016f6268ee1cb22a1d6fd3f1b6fc..1da615c8955a4b7b4c5434f4ae67c517361a96aa 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -23620,6 +23620,36 @@ aarch64_simd_valid_and_imm (rtx op)
    return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND);
  }
  
+/* Return true if OP is a valid SIMD and immediate which allows the and to be
+   optimized as fmov.  If ELT_BITSIZE is nonnull, use it to return the number of
+   bits to move.  */
+bool
+aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_bitsize)
+{
+  machine_mode mode = GET_MODE (op);
+  gcc_assert (!aarch64_sve_mode_p (mode));
+
+  auto_vec<target_unit, 16> buffer;
+  unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant ();
+  buffer.reserve (n_bytes);
+
+  bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes);
+  gcc_assert (ok);
+
+  auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes * BITS_PER_UNIT);
+  int set_bit = wi::exact_log2 (mask + 1);
+  if ((set_bit == 16 && TARGET_SIMD_F16INST)
+      || set_bit == 32
+      || set_bit == 64)
+    {
+      if (elt_bitsize)
+       *elt_bitsize = set_bit;
+      return true;
+    }
+
+  return false;
+}
+
  /* Return true if OP is a valid SIMD xor immediate for SVE.  */
  bool
  aarch64_simd_valid_xor_imm (rtx op)
@@ -25754,6 +25784,26 @@ aarch64_float_const_representable_p (rtx x)
    return aarch64_real_float_const_representable_p (r);
  }
  
+/* Returns the string with the fmov instruction which is equivalent to an and
+   instruction with the SIMD immediate CONST_VECTOR.  */
+char*
+aarch64_output_fmov (rtx const_vector)
+{
+  bool is_valid;
+  static char templ[40];
+  char element_char;
+  unsigned int elt_bitsize;
+
+  is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_bitsize);
+  gcc_assert (is_valid);
+
+  element_char = sizetochar (elt_bitsize);
+  snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1", element_char,
+           element_char);
+
+  return templ;
+}
+
  /* Returns the string with the instruction for the SIMD immediate
   * CONST_VECTOR of MODE and WIDTH.  WHICH selects a move, and(bic) or orr.  */
  char*
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md

index e8321c4d2fbd923d0c0c65c1e714b95719ba9d84..e9f69f823a6bd9744d3c0efb1df74ac51e4e9557 100644 (file)
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -466,6 +466,13 @@
   (and (match_code "const_vector")
        (match_test "aarch64_simd_valid_orr_imm (op)")))
  
+(define_constraint "Df"
+  "@internal
+   A constraint that matches a vector of immediates for and which can be
+   optimized as fmov."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_and_imm_fmov (op)")))
+
  (define_constraint "Db"
    "@internal
     A constraint that matches vector of immediates for and/bic."
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md

index 1ab1c696c62c09d34cd3ca38390e36b1aa171a8c..2c6af831eae144a6f0f60bd59411c2f4e6209297 100644 (file)
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -123,7 +123,8 @@
  (define_predicate "aarch64_reg_or_and_imm"
     (ior (match_operand 0 "register_operand")
         (and (match_code "const_vector")
-            (match_test "aarch64_simd_valid_and_imm (op)"))))
+            (ior (match_test "aarch64_simd_valid_and_imm (op)")
+                 (match_test "aarch64_simd_valid_and_imm_fmov (op)")))))
  
  (define_predicate "aarch64_reg_or_xor_imm"
     (ior (match_operand 0 "register_operand")
diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c b/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c

new file mode 100644 (file)

index 0000000..4227c67
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmov-1-be.c
@@ -0,0 +1,151 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8-a")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v4hi:
+**     fmov    s0, s0
+**     ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+  return x & (v4hi){ 0, 0, 0xffff, 0xffff };
+}
+
+/*
+** g_v4hi:
+**     movi    d([0-9]+), 0xffff00000000ffff
+**     and     v0.8b, (?:v0.8b, v\1.8b|v\1.8b, v0.8b)
+**     ret
+*/
+v4hi
+g_v4hi (v4hi x)
+{
+  return x & (v4hi){ 0xffff, 0, 0, 0xffff };
+}
+
+/*
+** f_v8hi:
+**     fmov    s0, s0
+**     ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+  return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0xffff, 0xffff };
+}
+
+/*
+** g_v8hi:
+**     fmov    d0, d0
+**     ret
+*/
+v8hi
+g_v8hi (v8hi x)
+{
+  return x & (v8hi){ 0, 0, 0, 0, 0xffff, 0xffff, 0xffff, 0xffff };
+}
+
+/*
+** f_v2si:
+**     fmov    s0, s0
+**     ret
+*/
+v2si
+f_v2si (v2si x)
+{
+  return x & (v2si){ 0, 0xffffffff };
+}
+
+/*
+** f_v2di:
+**     fmov    d0, d0
+**     ret
+*/
+v2di
+f_v2di (v2di x)
+{
+  return x & (v2di){ 0, 0xffffffffffffffff };
+}
+
+/*
+** g_v2di:
+**     fmov    s0, s0
+**     ret
+*/
+v2di
+g_v2di (v2di x)
+{
+  return x & (v2di){ 0, 0xffffffff };
+}
+
+/*
+** f_v4si:
+**     fmov    s0, s0
+**     ret
+*/
+v4si
+f_v4si (v4si x)
+{
+  return x & (v4si){ 0, 0, 0, 0xffffffff };
+}
+
+/*
+** h_v4si:
+**     fmov    d0, d0
+**     ret
+*/
+v4si
+h_v4si (v4si x)
+{
+  return x & (v4si){ 0, 0, 0xffffffff, 0xffffffff };
+}
+
+/*
+** f_v8qi:
+**     fmov    s0, s0
+**     ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+  return x & (v8qi){ 0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff };
+}
+
+/*
+** f_v16qi:
+**     fmov    d0, d0
+**     ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+  return x & (v16qi){ 0,    0,    0,    0,    0,    0,    0,    0,
+                     0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff };
+}
+
+/*
+** g_v16qi:
+**     fmov    s0, s0
+**     ret
+*/
+v16qi
+g_v16qi (v16qi x)
+{
+  return x & (v16qi){ 0, 0, 0, 0, 0,    0,    0,    0,
+                     0, 0, 0, 0, 0xff, 0xff, 0xff, 0xff };
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c b/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c

new file mode 100644 (file)

index 0000000..618702a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmov-1-le.c
@@ -0,0 +1,151 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8-a")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef float v2sf __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef double v2df __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef float v4sf __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v4hi:
+**     fmov    s0, s0
+**     ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+  return x & (v4hi){ 0xffff, 0xffff, 0, 0 };
+}
+
+/*
+** g_v4hi:
+**     movi    d([0-9]+), 0xffff00000000ffff
+**     and     v0.8b, (?:v0.8b, v\1.8b|v\1.8b, v0.8b)
+**     ret
+*/
+v4hi
+g_v4hi (v4hi x)
+{
+  return x & (v4hi){ 0xffff, 0, 0, 0xffff };
+}
+
+/*
+** f_v8hi:
+**     fmov    s0, s0
+**     ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+  return x & (v8hi){ 0xffff, 0xffff, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** g_v8hi:
+**     fmov    d0, d0
+**     ret
+*/
+v8hi
+g_v8hi (v8hi x)
+{
+  return x & (v8hi){ 0xffff, 0xffff, 0xffff, 0xffff, 0, 0, 0, 0 };
+}
+
+/*
+** f_v2si:
+**     fmov    s0, s0
+**     ret
+*/
+v2si
+f_v2si (v2si x)
+{
+  return x & (v2si){ 0xffffffff, 0 };
+}
+
+/*
+** f_v2di:
+**     fmov    d0, d0
+**     ret
+*/
+v2di
+f_v2di (v2di x)
+{
+  return x & (v2di){ 0xffffffffffffffff, 0 };
+}
+
+/*
+** g_v2di:
+**     fmov    s0, s0
+**     ret
+*/
+v2di
+g_v2di (v2di x)
+{
+  return x & (v2di){ 0xffffffff, 0 };
+}
+
+/*
+** f_v4si:
+**     fmov    s0, s0
+**     ret
+*/
+v4si
+f_v4si (v4si x)
+{
+  return x & (v4si){ 0xffffffff, 0, 0, 0 };
+}
+
+/*
+** h_v4si:
+**     fmov    d0, d0
+**     ret
+*/
+v4si
+h_v4si (v4si x)
+{
+  return x & (v4si){ 0xffffffff, 0xffffffff, 0, 0 };
+}
+
+/*
+** f_v8qi:
+**     fmov    s0, s0
+**     ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+  return x & (v8qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0 };
+}
+
+/*
+** f_v16qi:
+**     fmov    d0, d0
+**     ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+  return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
+                     0,    0,    0,    0,    0,    0,    0,    0 };
+}
+
+/*
+** g_v16qi:
+**     fmov    s0, s0
+**     ret
+*/
+v16qi
+g_v16qi (v16qi x)
+{
+  return x & (v16qi){ 0xff, 0xff, 0xff, 0xff, 0, 0, 0, 0,
+                     0,    0,    0,    0,    0, 0, 0, 0 };
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c b/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c

new file mode 100644 (file)

index 0000000..1e38066
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmov-2-be.c
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mbig-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v2di:
+**     fmov    h0, h0
+**     ret
+*/
+v2di
+f_v2di (v2di x)
+{
+  return x & (v2di){ 0, 0xffff };
+}
+
+/*
+** f_v4si:
+**     fmov    h0, h0
+**     ret
+*/
+v4si
+f_v4si (v4si x)
+{
+  return x & (v4si){ 0, 0, 0, 0xffff };
+}
+
+/*
+** f_v2si:
+**     fmov    h0, h0
+**     ret
+*/
+v2si
+f_v2si (v2si x)
+{
+  return x & (v2si){ 0, 0xffff };
+}
+
+/*
+** f_v8hi:
+**     fmov    h0, h0
+**     ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+  return x & (v8hi){ 0, 0, 0, 0, 0, 0, 0, 0xffff };
+}
+
+/*
+** f_v4hi:
+**     fmov    h0, h0
+**     ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+  return x & (v4hi){ 0, 0, 0, 0xffff };
+}
+
+/*
+** f_v16qi:
+**     fmov    h0, h0
+**     ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+  return x & (v16qi){ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+}
+
+/*
+** f_v8qi:
+**     fmov    h0, h0
+**     ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+  return x & (v8qi){ 0, 0, 0, 0, 0, 0, 0xff, 0xff };
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c b/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c

new file mode 100644 (file)

index 0000000..7627680
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/fmov-2-le.c
@@ -0,0 +1,90 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -mlittle-endian" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+#pragma GCC target ("arch=armv8.2-a+fp16")
+
+typedef int v2si __attribute__ ((vector_size (8)));
+typedef short v4hi __attribute__ ((vector_size (8)));
+typedef char v8qi __attribute__ ((vector_size (8)));
+typedef long v2di __attribute__ ((vector_size (16)));
+typedef int v4si __attribute__ ((vector_size (16)));
+typedef short v8hi __attribute__ ((vector_size (16)));
+typedef char v16qi __attribute__ ((vector_size (16)));
+
+/*
+** f_v2di:
+**     fmov    h0, h0
+**     ret
+*/
+v2di
+f_v2di (v2di x)
+{
+  return x & (v2di){ 0xffff, 0 };
+}
+
+/*
+** f_v4si:
+**     fmov    h0, h0
+**     ret
+*/
+v4si
+f_v4si (v4si x)
+{
+  return x & (v4si){ 0xffff, 0, 0, 0 };
+}
+
+/*
+** f_v2si:
+**     fmov    h0, h0
+**     ret
+*/
+v2si
+f_v2si (v2si x)
+{
+  return x & (v2si){ 0xffff, 0 };
+}
+
+/*
+** f_v8hi:
+**     fmov    h0, h0
+**     ret
+*/
+v8hi
+f_v8hi (v8hi x)
+{
+  return x & (v8hi){ 0xffff, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** f_v4hi:
+**     fmov    h0, h0
+**     ret
+*/
+v4hi
+f_v4hi (v4hi x)
+{
+  return x & (v4hi){ 0xffff, 0, 0, 0 };
+}
+
+/*
+** f_v16qi:
+**     fmov    h0, h0
+**     ret
+*/
+v16qi
+f_v16qi (v16qi x)
+{
+  return x & (v16qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
+}
+
+/*
+** f_v8qi:
+**     fmov    h0, h0
+**     ret
+*/
+v8qi
+f_v8qi (v8qi x)
+{
+  return x & (v8qi){ 0xff, 0xff, 0, 0, 0, 0, 0, 0 };
+}
author	Pengxuan Zheng <quic_pzheng@quicinc.com>
	Mon, 12 May 2025 17:12:11 +0000 (10:12 -0700)
committer	Pengxuan Zheng <quic_pzheng@quicinc.com>
	Fri, 16 May 2025 18:25:08 +0000 (11:25 -0700)
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/constraints.md		patch \| blob \| blame \| history
gcc/config/aarch64/predicates.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/fmov-1-be.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fmov-1-le.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fmov-2-be.c	[new file with mode: 0644]	patch \| blob
gcc/testsuite/gcc.target/aarch64/fmov-2-le.c	[new file with mode: 0644]	patch \| blob