aarch64: introduce partial AdvSIMD vector modes

author Artemiy Volkov <artemiy.volkov@arm.com>

Mon, 18 May 2026 10:21:18 +0000 (10:21 +0000)

committer Artemiy Volkov <artemiy.volkov@arm.com>

Thu, 28 May 2026 11:26:39 +0000 (11:26 +0000)
author Artemiy Volkov <artemiy.volkov@arm.com>
Mon, 18 May 2026 10:21:18 +0000 (10:21 +0000)
committer Artemiy Volkov <artemiy.volkov@arm.com>
Thu, 28 May 2026 11:26:39 +0000 (11:26 +0000)
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def

index d9bff61adec151a90e1a4c05c6b9feedec7b1564..d5a54689f7aab067038c6d185fb8f76a9cee33e2 100644 (file)
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -79,8 +79,10 @@ VECTOR_MODES (FLOAT, 8);      /*                 V2SF.  */
  VECTOR_MODES (FLOAT, 16);     /*            V4SF V2DF.  */
  VECTOR_MODE (INT, DI, 1);     /*                 V1DI.  */
  VECTOR_MODE (FLOAT, DF, 1);   /*                 V1DF.  */
-VECTOR_MODE (FLOAT, HF, 2);   /*                 V2HF.  */
  
+VECTOR_MODES (INT, 2);        /*                 V2QI.  */
+VECTOR_MODES (INT, 4);        /*            V4QI V2HI.  */
+VECTOR_MODES (FLOAT, 4);      /*            V2BF V2HF.  */
  
  /* Integer vector modes used to represent intermediate widened values in some
     instructions.  Not intended to be moved to and from registers or memory.  */
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h

index 24da650da76fc5ceed8955338e47b8fb27d0109a..513b556398fac7c733de47c27587c3aaa07cbcf3 100644 (file)
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -872,6 +872,7 @@ bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
  int aarch64_branch_cost (bool, bool);
  enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
  bool aarch64_advsimd_struct_mode_p (machine_mode mode);
+bool aarch64_advsimd_sub_dword_mode_p (machine_mode mode);
  opt_machine_mode aarch64_v64_mode (scalar_mode);
  opt_machine_mode aarch64_v128_mode (scalar_mode);
  opt_machine_mode aarch64_full_sve_mode (scalar_mode);
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md

index 7496da3a70c1819d01b7fcd99c15b4e3f13d4e92..2b7f6b467c625bbd665a7931f6014c1620f2f976 100644 (file)
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -49,8 +49,8 @@
  (define_subst_attr "vczbe" "add_vec_concat_subst_be" "" "_vec_concatz_be")
  
  (define_expand "mov<mode>"
-  [(set (match_operand:VALL_F16 0 "nonimmediate_operand")
-       (match_operand:VALL_F16 1 "general_operand"))]
+  [(set (match_operand:VALL_F16_SUB64 0 "nonimmediate_operand")
+       (match_operand:VALL_F16_SUB64 1 "general_operand"))]
    "TARGET_FLOAT"
    "
    /* Force the operand into a register if it is not an
@@ -77,7 +77,8 @@
           aarch64_expand_vector_init (operands[0], operands[1]);
           DONE;
         }
-      else if (!aarch64_simd_imm_zero (operands[1], <MODE>mode)
+      else if (!aarch64_advsimd_sub_dword_mode_p (<MODE>mode)
+              && !aarch64_simd_imm_zero (operands[1], <MODE>mode)
                && !aarch64_simd_special_constant_p (operands[1], <MODE>mode)
                && !aarch64_simd_valid_mov_imm (operands[1])
                && !aarch64_const_vec_fmov_p (operands[1]))
@@ -244,6 +245,63 @@
    }
  )
  
+(define_insn_and_split "*aarch64_simd_mov<mode>"
+  [(set (match_operand:VSUB64 0 "nonimmediate_operand")
+       (match_operand:VSUB64 1 "general_operand"))]
+  "TARGET_FLOAT
+   && (register_operand (operands[0], <MODE>mode)
+       || aarch64_simd_reg_or_zero (operands[1], <MODE>mode)
+       || CONST_VECTOR_P (operands[1]))"
+   {@ [cons: =0, 1; attrs: type, arch]
+     [r , Dz ; mov_imm          , *    ] mov\t%w0, 0
+     [r , rZ ; mov_reg          , *    ] mov\t%w0, %w1
+     [r , Da ; mov_imm          , *    ] #
+     [r , w  ; mov_reg          , simd ] #
+     [r , m  ; load_4           , *    ] ldr<size>\t%w0, %1
+     [w , w  ; neon_logic       , simd ] mov\t%0.8b, %1.8b
+     [w , m  ; neon_load1_1reg  , simd ] ldr\t%<vstype>0, %1
+     [w , Dz ; neon_move        , simd ] movi\t%0.2d, #0
+     [m , rZ ; store_4          , *    ] str<size>\t%w1, %0
+     [m , w  ; neon_store1_1reg , simd ] str\t%<vstype>1, %0
+  }
+  "&& reload_completed
+   && REG_P (operands[0])"
+  [(const_int 0)]
+  {
+    if (CONST_VECTOR_P (operands[1]))
+      {
+       int elt_bitsize
+        = GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (operands[1])));
+       int n_elts = CONST_VECTOR_NUNITS (operands[1]).to_constant ();
+       int val = 0;
+       bool int_vector_p = CONST_INT_P (CONST_VECTOR_ELT (operands[1], 0));
+       unsigned HOST_WIDE_INT eltval;
+       rtx elt;
+       for (int i = 0; i < n_elts; i++)
+        {
+           elt = CONST_VECTOR_ELT (operands[1], BYTES_BIG_ENDIAN
+                                                ? i
+                                                : n_elts - 1 - i);
+           if (int_vector_p)
+            eltval = INTVAL (elt);
+           else
+            {
+               bool res = aarch64_reinterpret_float_as_int (elt, &eltval);
+               gcc_assert (res);
+            }
+
+           val = (val << elt_bitsize) + (eltval & ((1 << elt_bitsize) - 1));
+        }
+       emit_move_insn (gen_rtx_REG (SImode, REGNO (operands[0])),
+                      GEN_INT (val));
+      }
+    else if (REG_P (operands[1]))
+      aarch64_simd_emit_reg_reg_move (operands, <VSC>mode, 1);
+    DONE;
+  }
+  [(set_attr "type" "mov_reg")]
+)
+
  ;; When storing lane zero we can use the normal STR and its more permissive
  ;; addressing modes.
  
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc

index 40bbb92ed74018449e35a786688f5543f8e42a36..4ed24c86965217b410452433e2f0ba3bc227aec5 100644 (file)
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1778,6 +1778,13 @@ aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)
      case E_V4x2DFmode:
        return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD | VEC_STRUCT : 0;
  
+    /* 16-bit Advanced SIMD vectors.  */
+    case E_V2QImode:
+    /* 32-bit Advanced SIMD vectors.  */
+    case E_V2HFmode:
+    case E_V2BFmode:
+    case E_V2HImode:
+    case E_V4QImode:
      /* 64-bit Advanced SIMD vectors.  */
      case E_V8QImode:
      case E_V4HImode:
@@ -1856,6 +1863,14 @@ aarch64_advsimd_full_struct_mode_p (machine_mode mode)
    return (aarch64_classify_vector_mode (mode) == (VEC_ADVSIMD | VEC_STRUCT));
  }
  
+/* Return true if MODE is a partial (sub-64-bit) Advanced SIMD mode.  */
+bool
+aarch64_advsimd_sub_dword_mode_p (machine_mode mode)
+{
+  return (aarch64_classify_vector_mode (mode) == VEC_ADVSIMD)
+        && known_lt (GET_MODE_BITSIZE (mode), 64);
+}
+
  /* Return true if MODE is any of the data vector modes, including
     structure modes.  */
  static bool
@@ -28415,6 +28430,9 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
  {
    struct expand_vec_perm_d d;
  
+  if (aarch64_advsimd_sub_dword_mode_p (op_mode))
+    return false;
+
    /* Check whether the mask can be applied to a single vector.  */
    if (sel.ninputs () == 1
        || (op0 && rtx_equal_p (op0, op1)))
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md

index 8760220835b74e87497a696865a5ea7304b9d406..829b2c949d071dd205d1f999a9de1d84366c36f2 100644 (file)
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -531,6 +531,11 @@
   (and (match_code "const_int")
        (match_test "aarch64_simd_scalar_immediate_valid_for_move (op,
                                                  QImode)")))
+(define_constraint "Da"
+  "@internal
+  A constraint that matches all sub-64-bit AdvSIMD vectors."
+  (and (match_code "const_vector")
+       (match_test "aarch64_advsimd_sub_dword_mode_p (GET_MODE (op))")))
  
  (define_constraint "Dt"
    "@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md

index 39b1e84edcc291bc4505e77659e50e4f9d75529e..dfca3327f1fa571c7844af03b28c9b7b47422d6f 100644 (file)
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -227,10 +227,17 @@
  ;; All Advanced SIMD integer modes
  (define_mode_iterator VALLI [VDQ_BHSI V2DI])
  
+;; All sub-64-bit vector modes.
+(define_mode_iterator VSUB64 [V2QI V4QI V2HI V2HF V2BF])
+
  ;; All Advanced SIMD modes suitable for moving, loading, and storing.
  (define_mode_iterator VALL_F16 [V8QI V16QI V4HI V8HI V2SI V4SI V2DI
                                 V4HF V8HF V4BF V8BF V2SF V4SF V2DF])
  
+;; All Advanced SIMD modes suitable for moving, loading, and storing,
+;; plus all sub-64-bit vector modes.
+(define_mode_iterator VALL_F16_SUB64 [VALL_F16 VSUB64])
+
  ;; The VALL_F16 modes except the 128-bit 2-element ones.
  (define_mode_iterator VALL_F16_NO_V2Q [V8QI V16QI V4HI V8HI V2SI V4SI
                                 V4HF V8HF V2SF V4SF])
@@ -1466,7 +1473,9 @@
  (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
  
  ;; Give the length suffix letter for a sign- or zero-extension.
-(define_mode_attr size [(QI "b") (HI "h") (SI "w")])
+(define_mode_attr size [(QI "b") (HI "h") (SI "w") (HF "") (BF "") (SF "")
+                       (V2QI "h") (V4QI "") (V2HI "")
+                       (V2HF "") (V2BF "")])
  
  ;; Give the number of bits in the mode
  (define_mode_attr sizen [(QI "8") (HI "16") (SI "32") (DI "64")])
@@ -1883,6 +1892,10 @@
                         (VNx4SI  "v2si") (VNx4SF "v2sf")
                         (VNx2DI  "di") (VNx2DF "df")])
  
+;; Sub-64-bit vector mode to equivalent scalar mode.
+(define_mode_attr VSC [(V4QI "SI") (V2QI "HI")
+                      (V2HI "SI") (V2HF "SF") (V2BF "SF")])
+
  (define_mode_attr vnx [(V4SI "vnx4si") (V2DI "vnx2di")])
  
  ;; 64-bit container modes the inner or scalar source mode.
@@ -2169,6 +2182,10 @@
                                 (V2SI "q") (V2SF "q")
                                 (DI   "q") (DF   "q")])
  
+;; Scalar size of a sub-64-bit vector mode.
+(define_mode_attr vstype [(V4QI "s") (V2QI "h")
+                         (V2HI "s") (V2BF "s") (V2HF "s")])
+
  ;; Define corresponding core/FP element mode for each vector mode.
  (define_mode_attr vw [(V8QI "w") (V16QI "w")
                       (V4HI "w") (V8HI "w")
diff --git a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-half-float.c b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-half-float.c

index 3f1cce5695581fafc8d0b2a4c7962faa1c2d5707..2cd2d9112cc1af63a784760309594e678c7d7572 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-half-float.c
+++ b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-half-float.c
@@ -12,3 +12,6 @@
  
  /* { dg-final { scan-tree-dump "add new stmt: \[^\n\r]*COMPLEX_ADD_ROT270" "slp1" { xfail *-*-* } } } */
  /* { dg-final { scan-tree-dump "add new stmt: \[^\n\r]*COMPLEX_ADD_ROT90" "slp1" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT90" "slp1" { xfail arm*-*-* } } } */
+/* { dg-final { scan-tree-dump "Found COMPLEX_ADD_ROT270" "slp1" { xfail arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mla-half-float.c b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mla-half-float.c

index 33e500f3f4cd05c829be5f261785d1f0c7e05c3c..e7a349b49c69a3f34bf393a80837a322a821b3a7 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mla-half-float.c
+++ b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mla-half-float.c
@@ -8,5 +8,7 @@
  #define N 16
  #include "complex-mla-template.c"
  
+/* { dg-final { scan-tree-dump-times "add new stmt:\[^\n\r]*COMPLEX_FMA" 1 "slp1" { xfail *-*-* } } } */
+
  /* { dg-final { scan-tree-dump "Found COMPLEX_FMA_CONJ" "slp1" { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1"  { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump "Found COMPLEX_FMA" "slp1" { xfail arm*-*-* } } */
diff --git a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mul-half-float.c b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mul-half-float.c

index 259dd6b2e0676461de594cc7fa66469690674271..06d08da41ad6fee09facfdc1de95b3386b6ad4b6 100644 (file)
--- a/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mul-half-float.c
+++ b/gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mul-half-float.c
@@ -8,5 +8,8 @@
  #define N 16
  #include "complex-mul-template.c"
  
-/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1"  { xfail *-*-* } } } */
-/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "slp1"  { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "add new stmt:\[^\n\r]*COMPLEX_MUL_CONJ" 1 "slp1" { xfail *-*-* } } } */
+/* { dg-final { scan-tree-dump-times "add new stmt:\[^\n\r]*COMPLEX_MUL" 1 "slp1" { xfail *-*-* } } } */
+
+/* { dg-final { scan-tree-dump "Found COMPLEX_MUL_CONJ" "slp1" { xfail arm*-*-* } } } */
+/* { dg-final { scan-tree-dump "Found COMPLEX_MUL" "slp1" { xfail arm*-*-* } } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c

index 07d71a63414b1066ea431e287286ad048515711a..739e63a96a1c5c0e4a2f25e8f722fbf1f1e5989e 100644 (file)
--- a/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/slp_1.c
@@ -30,12 +30,14 @@ vec_slp_##TYPE (TYPE *restrict a, TYPE b, TYPE c, int n)    \
  TEST_ALL (VEC_PERM)
  
  /* We should use one DUP for each of the 8-, 16- and 32-bit types,
-   although we currently use LD1RW for _Float16.  We should use two
+   (for now, insert both elements with umov + ins for _Float16).  We should use two
     DUPs for each of the three 64-bit types.  */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.h, [hw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 2 } } */
-/* { dg-final { scan-assembler-times {\tld1rw\tz[0-9]+\.s, } 1 } } */
+/* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.s, [sw]} 3 } } */
  /* { dg-final { scan-assembler-times {\tmov\tz[0-9]+\.d, [dx]} 9 } } */
+/* { dg-final { scan-assembler-times {\tumov\tw[0-9]+, v[0-9]+\.h} 2 } } */
+/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[0\], w[0-9]+} 3 } } */
+/* { dg-final { scan-assembler-times {\tins\tv[0-9]+\.h\[1\], w[0-9]+} 3 } } */
  /* { dg-final { scan-assembler-times {\tzip1\tz[0-9]+\.d, z[0-9]+\.d, z[0-9]+\.d\n} 3 } } */
  /* { dg-final { scan-assembler-not {\tzip2\t} } } */
  
@@ -53,7 +55,6 @@ TEST_ALL (VEC_PERM)
  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.s} 6 } } */
  /* { dg-final { scan-assembler-times {\twhilelo\tp[0-7]\.d} 6 } } */
  /* { dg-final { scan-assembler-not {\tldr} } } */
-/* { dg-final { scan-assembler-times {\tstr} 2 } } */
-/* { dg-final { scan-assembler-times {\tstr\th[0-9]+} 2 } } */
+/* { dg-final { scan-assembler-not {\tstr} } } */
  
  /* { dg-final { scan-assembler-not {\tuqdec} } } */
author	Artemiy Volkov <artemiy.volkov@arm.com>
	Mon, 18 May 2026 10:21:18 +0000 (10:21 +0000)
committer	Artemiy Volkov <artemiy.volkov@arm.com>
	Thu, 28 May 2026 11:26:39 +0000 (11:26 +0000)
gcc/config/aarch64/aarch64-modes.def		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-protos.h		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64-simd.md		patch \| blob \| blame \| history
gcc/config/aarch64/aarch64.cc		patch \| blob \| blame \| history
gcc/config/aarch64/constraints.md		patch \| blob \| blame \| history
gcc/config/aarch64/iterators.md		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-add-half-float.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mla-half-float.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.dg/vect/complex/bb-slp-complex-mul-half-float.c		patch \| blob \| blame \| history
gcc/testsuite/gcc.target/aarch64/sve/slp_1.c		patch \| blob \| blame \| history