]> git.ipfire.org Git - thirdparty/gcc.git/commitdiff
Aarch64: Add support for addhn vectorizer optabs for Adv.SIMD
authorTamar Christina <tamar.christina@arm.com>
Fri, 12 Sep 2025 07:29:46 +0000 (08:29 +0100)
committerTamar Christina <tamar.christina@arm.com>
Fri, 12 Sep 2025 07:29:46 +0000 (08:29 +0100)
This implements the new vector optabs vec_<su>addh_narrow<mode>
adding support for in-vectorizer use for early break.

gcc/ChangeLog:

* config/aarch64/aarch64-simd.md (vec_addh_narrow<mode>): New.

gcc/testsuite/ChangeLog:

* gcc.target/aarch64/vect-addhn_1.c: New test.

gcc/config/aarch64/aarch64-simd.md
gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c [new file with mode: 0644]

index 82db2402f135cf6cede7c59a46cb9fa27dc49297..0d5b02a739fa74724d6dc8b658638d55b8db6890 100644 (file)
   }
 )
 
+(define_expand "vec_trunc_add_high<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand")
+       (plus:VQN (match_operand:VQN 1 "register_operand")
+                 (match_operand:VQN 2 "register_operand")))]
+  "TARGET_SIMD"
+  {
+    emit_insn (gen_aarch64_addhn<mode> (operands[0], operands[1], operands[2]));
+    DONE;
+  }
+)
+
 (define_insn "aarch64_<su>abal<mode>"
   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
        (plus:<VWIDE>
diff --git a/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c b/gcc/testsuite/gcc.target/aarch64/vect-addhn_1.c
new file mode 100644 (file)
index 0000000..0ea673e
--- /dev/null
@@ -0,0 +1,86 @@
+/* { dg-require-effective-target vect_int } */
+/* { dg-options "-O3 -std=c99" } */
+
+#include <stdint.h>
+#include <stdio.h>
+
+#define N 1000
+#define CHECK_ERROR(cond, fmt, ...) \
+  do { if (cond) { printf(fmt "\n", ##__VA_ARGS__); __builtin_abort (); } } while (0)
+
+// Generates all test components for a given type combo
+#define TEST_COMBO(A_TYPE, C_TYPE, CAST_TYPE, SHIFT)                                  \
+  A_TYPE a_##A_TYPE##_##C_TYPE[N];                                                    \
+  A_TYPE b_##A_TYPE##_##C_TYPE[N];                                                    \
+  C_TYPE c_##A_TYPE##_##C_TYPE[N];                                                    \
+  C_TYPE ref_##A_TYPE##_##C_TYPE[N];                                                  \
+                                                                                      \
+  void init_##A_TYPE##_##C_TYPE() {                                                   \
+    _Pragma ("GCC novector")                                                         \
+    for (int i = 0; i < N; i++) {                                                     \
+      a_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 3);                                     \
+      b_##A_TYPE##_##C_TYPE[i] = (A_TYPE)(i * 7);                                     \
+    }                                                                                 \
+  }                                                                                   \
+                                                                                      \
+  void foo_##A_TYPE##_##C_TYPE() {                                                    \
+    for (int i = 0; i < N; i++)                                                       \
+      c_##A_TYPE##_##C_TYPE[i] =                                                      \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                        \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                               \
+  }                                                                                   \
+                                                                                      \
+  void ref_##A_TYPE##_##C_TYPE##_compute() {                                          \
+    _Pragma ("GCC novector")                                                         \
+    for (int i = 0; i < N; i++)                                                       \
+      ref_##A_TYPE##_##C_TYPE[i] =                                                    \
+        ((CAST_TYPE)a_##A_TYPE##_##C_TYPE[i] +                                        \
+         (CAST_TYPE)b_##A_TYPE##_##C_TYPE[i]) >> SHIFT;                               \
+  }                                                                                   \
+                                                                                      \
+  void validate_##A_TYPE##_##C_TYPE(const char* variant_name) {                       \
+    _Pragma ("GCC novector")                                                         \
+    for (int i = 0; i < N; i++) {                                                     \
+      if (c_##A_TYPE##_##C_TYPE[i] != ref_##A_TYPE##_##C_TYPE[i]) {                   \
+        printf("FAIL [%s]: Index %d: got %lld, expected %lld\n",                      \
+               variant_name, i,                                                       \
+               (long long)c_##A_TYPE##_##C_TYPE[i],                                   \
+               (long long)ref_##A_TYPE##_##C_TYPE[i]);                                \
+        __builtin_abort ();                                                           \
+      }                                                                               \
+    }                                                                                 \
+  }
+
+// Runs the test for one combo with name output
+#define RUN_COMBO(A_TYPE, C_TYPE)                          \
+  do {                                                     \
+    init_##A_TYPE##_##C_TYPE();                            \
+    foo_##A_TYPE##_##C_TYPE();                             \
+    ref_##A_TYPE##_##C_TYPE##_compute();                   \
+    validate_##A_TYPE##_##C_TYPE(#A_TYPE " -> " #C_TYPE);  \
+  } while (0)
+
+// Instantiate all valid combinations
+TEST_COMBO(int16_t, int8_t, int32_t, 8)
+TEST_COMBO(uint16_t, uint8_t, uint32_t, 8)
+TEST_COMBO(int32_t, int16_t, int64_t, 16)
+TEST_COMBO(uint32_t, uint16_t, uint64_t, 16)
+#if defined(__aarch64__)
+TEST_COMBO(int64_t, int32_t, __int128_t, 32)
+TEST_COMBO(uint64_t, uint32_t, unsigned __int128, 32)
+#endif
+
+int main() {
+
+  RUN_COMBO(int16_t, int8_t);
+  RUN_COMBO(uint16_t, uint8_t);
+  RUN_COMBO(int32_t, int16_t);
+  RUN_COMBO(uint32_t, uint16_t);
+#if defined(__aarch64__)
+  RUN_COMBO(int64_t, int32_t);
+  RUN_COMBO(uint64_t, uint32_t);
+#endif
+
+  return 0;
+}
+