AArch64: Add vector cospi routines

author Joe Ramsay <Joe.Ramsay@arm.com>

Fri, 3 Jan 2025 20:12:10 +0000 (20:12 +0000)

committer Wilco Dijkstra <wilco.dijkstra@arm.com>

Fri, 3 Jan 2025 21:39:56 +0000 (21:39 +0000)
author Joe Ramsay <Joe.Ramsay@arm.com>
Fri, 3 Jan 2025 20:12:10 +0000 (20:12 +0000)
committer Wilco Dijkstra <wilco.dijkstra@arm.com>
Fri, 3 Jan 2025 21:39:56 +0000 (21:39 +0000)
diff --git a/bits/libm-simd-decl-stubs.h b/bits/libm-simd-decl-stubs.h

index fd5bae4c9868f7047701f35905b1966fd9bfa005..4b940e9115a5447bbcdcd2bb121cd6e4be166cc2 100644 (file)
--- a/bits/libm-simd-decl-stubs.h
+++ b/bits/libm-simd-decl-stubs.h
@@ -351,4 +351,15 @@
  #define __DECL_SIMD_sinpif32x
  #define __DECL_SIMD_sinpif64x
  #define __DECL_SIMD_sinpif128x
+
+#define __DECL_SIMD_cospi
+#define __DECL_SIMD_cospif
+#define __DECL_SIMD_cospil
+#define __DECL_SIMD_cospif16
+#define __DECL_SIMD_cospif32
+#define __DECL_SIMD_cospif64
+#define __DECL_SIMD_cospif128
+#define __DECL_SIMD_cospif32x
+#define __DECL_SIMD_cospif64x
+#define __DECL_SIMD_cospif128x
  #endif
diff --git a/math/bits/mathcalls.h b/math/bits/mathcalls.h

index 4bd48dc13f85ab19afd14bdaf9177699f66c4084..2f63a3beef47972d59306feaf08cc13ebbb242f3 100644 (file)
--- a/math/bits/mathcalls.h
+++ b/math/bits/mathcalls.h
@@ -76,7 +76,7 @@ __MATHCALL (atanpi,, (_Mdouble_ __x));
  __MATHCALL (atan2pi,, (_Mdouble_ __y, _Mdouble_ __x));
  
  /* Cosine of pi * X.  */
-__MATHCALL (cospi,, (_Mdouble_ __x));
+__MATHCALL_VEC (cospi,, (_Mdouble_ __x));
  /* Sine of pi * X.  */
  __MATHCALL_VEC (sinpi,, (_Mdouble_ __x));
  /* Tangent of pi * X.  */
diff --git a/sysdeps/aarch64/fpu/Makefile b/sysdeps/aarch64/fpu/Makefile

index 915da37a06292b89e3b46f9f9b6c66aed17a17bd..6d1e55c4e655426869728c7142bf44d77dab687a 100644 (file)
--- a/sysdeps/aarch64/fpu/Makefile
+++ b/sysdeps/aarch64/fpu/Makefile
@@ -8,6 +8,7 @@ libmvec-supported-funcs = acos \
                            cbrt \
                            cos \
                            cosh \
+                          cospi \
                            erf \
                            erfc \
                            exp \
diff --git a/sysdeps/aarch64/fpu/Versions b/sysdeps/aarch64/fpu/Versions

index 4cbb90602277f03d00c1ba7da482c1647e5b03e3..f8581cf881e53a1c6799fd15bb6173d79896fc09 100644 (file)
--- a/sysdeps/aarch64/fpu/Versions
+++ b/sysdeps/aarch64/fpu/Versions
@@ -136,6 +136,11 @@ libmvec {
      _ZGVsMxv_tanhf;
    }
    GLIBC_2.41 {
+    _ZGVnN2v_cospi;
+    _ZGVnN2v_cospif;
+    _ZGVnN4v_cospif;
+    _ZGVsMxv_cospi;
+    _ZGVsMxv_cospif;
      _ZGVnN2v_logp1;
      _ZGVnN2v_logp1f;
      _ZGVnN4v_logp1f;
diff --git a/sysdeps/aarch64/fpu/advsimd_f32_protos.h b/sysdeps/aarch64/fpu/advsimd_f32_protos.h

index 1b3040719002df1b0934701fb854009dff4da762..8e9c064b5bdee0105f72d0665a154c358592f2f2 100644 (file)
--- a/sysdeps/aarch64/fpu/advsimd_f32_protos.h
+++ b/sysdeps/aarch64/fpu/advsimd_f32_protos.h
@@ -26,6 +26,7 @@ libmvec_hidden_proto (V_NAME_F1(atanh));
  libmvec_hidden_proto (V_NAME_F1(cbrt));
  libmvec_hidden_proto (V_NAME_F1(cos));
  libmvec_hidden_proto (V_NAME_F1(cosh));
+libmvec_hidden_proto (V_NAME_F1(cospi));
  libmvec_hidden_proto (V_NAME_F1(erf));
  libmvec_hidden_proto (V_NAME_F1(erfc));
  libmvec_hidden_proto (V_NAME_F1(exp10));
diff --git a/sysdeps/aarch64/fpu/bits/math-vector.h b/sysdeps/aarch64/fpu/bits/math-vector.h

index a4ee9fbf76f56f572b1b8f1d99e9f8dfec240f16..2c3967489acf4268da2ca4c64479fb6def2e816f 100644 (file)
--- a/sysdeps/aarch64/fpu/bits/math-vector.h
+++ b/sysdeps/aarch64/fpu/bits/math-vector.h
@@ -69,6 +69,10 @@
  # define __DECL_SIMD_cosh __DECL_SIMD_aarch64
  # undef __DECL_SIMD_coshf
  # define __DECL_SIMD_coshf __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cospi
+# define __DECL_SIMD_cospi __DECL_SIMD_aarch64
+# undef __DECL_SIMD_cospif
+# define __DECL_SIMD_cospif __DECL_SIMD_aarch64
  # undef __DECL_SIMD_erf
  # define __DECL_SIMD_erf __DECL_SIMD_aarch64
  # undef __DECL_SIMD_erff
@@ -177,6 +181,7 @@ __vpcs __f32x4_t _ZGVnN4v_atanhf (__f32x4_t);
  __vpcs __f32x4_t _ZGVnN4v_cbrtf (__f32x4_t);
  __vpcs __f32x4_t _ZGVnN4v_cosf (__f32x4_t);
  __vpcs __f32x4_t _ZGVnN4v_coshf (__f32x4_t);
+__vpcs __f32x4_t _ZGVnN4v_cospif (__f32x4_t);
  __vpcs __f32x4_t _ZGVnN4v_erff (__f32x4_t);
  __vpcs __f32x4_t _ZGVnN4v_erfcf (__f32x4_t);
  __vpcs __f32x4_t _ZGVnN4v_expf (__f32x4_t);
@@ -206,6 +211,7 @@ __vpcs __f64x2_t _ZGVnN2v_atanh (__f64x2_t);
  __vpcs __f64x2_t _ZGVnN2v_cbrt (__f64x2_t);
  __vpcs __f64x2_t _ZGVnN2v_cos (__f64x2_t);
  __vpcs __f64x2_t _ZGVnN2v_cosh (__f64x2_t);
+__vpcs __f64x2_t _ZGVnN2v_cospi (__f64x2_t);
  __vpcs __f64x2_t _ZGVnN2v_erf (__f64x2_t);
  __vpcs __f64x2_t _ZGVnN2v_erfc (__f64x2_t);
  __vpcs __f64x2_t _ZGVnN2v_exp (__f64x2_t);
@@ -240,6 +246,7 @@ __sv_f32_t _ZGVsMxv_atanhf (__sv_f32_t, __sv_bool_t);
  __sv_f32_t _ZGVsMxv_cbrtf (__sv_f32_t, __sv_bool_t);
  __sv_f32_t _ZGVsMxv_cosf (__sv_f32_t, __sv_bool_t);
  __sv_f32_t _ZGVsMxv_coshf (__sv_f32_t, __sv_bool_t);
+__sv_f32_t _ZGVsMxv_cospif (__sv_f32_t, __sv_bool_t);
  __sv_f32_t _ZGVsMxv_erff (__sv_f32_t, __sv_bool_t);
  __sv_f32_t _ZGVsMxv_erfcf (__sv_f32_t, __sv_bool_t);
  __sv_f32_t _ZGVsMxv_expf (__sv_f32_t, __sv_bool_t);
@@ -269,6 +276,7 @@ __sv_f64_t _ZGVsMxv_atanh (__sv_f64_t, __sv_bool_t);
  __sv_f64_t _ZGVsMxv_cbrt (__sv_f64_t, __sv_bool_t);
  __sv_f64_t _ZGVsMxv_cos (__sv_f64_t, __sv_bool_t);
  __sv_f64_t _ZGVsMxv_cosh (__sv_f64_t, __sv_bool_t);
+__sv_f64_t _ZGVsMxv_cospi (__sv_f64_t, __sv_bool_t);
  __sv_f64_t _ZGVsMxv_erf (__sv_f64_t, __sv_bool_t);
  __sv_f64_t _ZGVsMxv_erfc (__sv_f64_t, __sv_bool_t);
  __sv_f64_t _ZGVsMxv_exp (__sv_f64_t, __sv_bool_t);
diff --git a/sysdeps/aarch64/fpu/cospi_advsimd.c b/sysdeps/aarch64/fpu/cospi_advsimd.c

new file mode 100644 (file)

index 0000000..dcd12c8
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cospi_advsimd.c
@@ -0,0 +1,87 @@
+/* Double-precision (Advanced SIMD) cospi function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+#include "poly_advsimd_f64.h"
+
+static const struct data
+{
+  float64x2_t poly[10];
+  float64x2_t range_val;
+} data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see sinpi.sollya for details.  */
+  .poly = { V2 (0x1.921fb54442d184p1), V2 (-0x1.4abbce625be53p2),
+           V2 (0x1.466bc6775ab16p1), V2 (-0x1.32d2cce62dc33p-1),
+           V2 (0x1.507834891188ep-4), V2 (-0x1.e30750a28c88ep-8),
+           V2 (0x1.e8f48308acda4p-12), V2 (-0x1.6fc0032b3c29fp-16),
+           V2 (0x1.af86ae521260bp-21), V2 (-0x1.012a9870eeb7dp-25) },
+  .range_val = V2 (0x1p63),
+};
+
+static float64x2_t VPCS_ATTR NOINLINE
+special_case (float64x2_t x, float64x2_t y, uint64x2_t odd, uint64x2_t cmp)
+{
+  /* Fall back to scalar code.  */
+  y = vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+  return v_call_f64 (cospi, x, y, cmp);
+}
+
+/* Approximation for vector double-precision cospi(x).
+   Maximum Error 3.06 ULP:
+  _ZGVnN2v_cospi(0x1.7dd4c0b03cc66p-5) got 0x1.fa854babfb6bep-1
+                                     want 0x1.fa854babfb6c1p-1.  */
+float64x2_t VPCS_ATTR V_NAME_D1 (cospi) (float64x2_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+  float64x2_t r = vabsq_f64 (x);
+  uint64x2_t cmp = vcaleq_f64 (v_f64 (0x1p64), x);
+
+  /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd
+     to avoid them overflowing and throwing exceptions.  */
+  r = v_zerofy_f64 (r, cmp);
+  uint64x2_t odd = vshlq_n_u64 (vcvtnq_u64_f64 (r), 63);
+
+#else
+  float64x2_t r = x;
+  uint64x2_t cmp = vcageq_f64 (r, d->range_val);
+  uint64x2_t odd
+      = vshlq_n_u64 (vreinterpretq_u64_s64 (vcvtaq_s64_f64 (r)), 63);
+
+#endif
+
+  r = vsubq_f64 (r, vrndaq_f64 (r));
+
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  r = vsubq_f64 (v_f64 (0.5), vabsq_f64 (r));
+
+  /* y = sin(r).  */
+  float64x2_t r2 = vmulq_f64 (r, r);
+  float64x2_t r4 = vmulq_f64 (r2, r2);
+  float64x2_t y = vmulq_f64 (v_pw_horner_9_f64 (r2, r4, d->poly), r);
+
+  /* Fallback to scalar.  */
+  if (__glibc_unlikely (v_any_u64 (cmp)))
+    return special_case (x, y, odd, cmp);
+
+  /* Reintroduce the sign bit for inputs which round to odd.  */
+  return vreinterpretq_f64_u64 (veorq_u64 (vreinterpretq_u64_f64 (y), odd));
+}
diff --git a/sysdeps/aarch64/fpu/cospi_sve.c b/sysdeps/aarch64/fpu/cospi_sve.c

new file mode 100644 (file)

index 0000000..dd98815
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cospi_sve.c
@@ -0,0 +1,65 @@
+/* Double-precision (SVE) cospi function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+#include "poly_sve_f64.h"
+
+static const struct data
+{
+  double poly[10];
+  double range_val;
+} data = {
+  /* Polynomial coefficients generated using Remez algorithm,
+     see sinpi.sollya for details.  */
+  .poly = { 0x1.921fb54442d184p1, -0x1.4abbce625be53p2, 0x1.466bc6775ab16p1,
+           -0x1.32d2cce62dc33p-1, 0x1.507834891188ep-4, -0x1.e30750a28c88ep-8,
+           0x1.e8f48308acda4p-12, -0x1.6fc0032b3c29fp-16,
+           0x1.af86ae521260bp-21, -0x1.012a9870eeb7dp-25 },
+  .range_val = 0x1p53,
+};
+
+/* A fast SVE implementation of cospi.
+   Maximum error 3.20 ULP:
+   _ZGVsMxv_cospi(0x1.f18ba32c63159p-6) got 0x1.fdabf595f9763p-1
+                                      want 0x1.fdabf595f9766p-1.  */
+svfloat64_t SV_NAME_D1 (cospi) (svfloat64_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* Using cospi(x) = sinpi(0.5 - x)
+     range reduction and offset into sinpi range -1/2 .. 1/2
+     r = 0.5 - |x - rint(x)|.  */
+  svfloat64_t n = svrinta_x (pg, x);
+  svfloat64_t r = svsub_x (pg, x, n);
+  r = svsub_x (pg, sv_f64 (0.5), svabs_x (pg, r));
+
+  /* Result should be negated based on if n is odd or not.
+     If ax >= 2^53, the result will always be positive.  */
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint64_t intn = svreinterpret_u64 (svcvt_s64_z (pg, n));
+  svuint64_t sign = svlsl_z (cmp, intn, 63);
+
+  /* y = sin(r).  */
+  svfloat64_t r2 = svmul_x (pg, r, r);
+  svfloat64_t r4 = svmul_x (pg, r2, r2);
+  svfloat64_t y = sv_pw_horner_9_f64_x (pg, r2, r4, d->poly);
+  y = svmul_x (pg, y, r);
+
+  return svreinterpret_f64 (sveor_x (pg, svreinterpret_u64 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/cospif_advsimd.c b/sysdeps/aarch64/fpu/cospif_advsimd.c

new file mode 100644 (file)

index 0000000..a81471f
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cospif_advsimd.c
@@ -0,0 +1,87 @@
+/* Single-precision (Advanced SIMD) cospi function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "v_math.h"
+#include "poly_advsimd_f32.h"
+
+static const struct data
+{
+  float32x4_t poly[6];
+  float32x4_t range_val;
+} data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { V4 (0x1.921fb6p1f), V4 (-0x1.4abbcep2f), V4 (0x1.466bc6p1f),
+           V4 (-0x1.32d2ccp-1f), V4 (0x1.50783p-4f), V4 (-0x1.e30750p-8f) },
+  .range_val = V4 (0x1p31f),
+};
+
+static float32x4_t VPCS_ATTR NOINLINE
+special_case (float32x4_t x, float32x4_t y, uint32x4_t odd, uint32x4_t cmp)
+{
+  y = vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+  return v_call_f32 (cospif, x, y, cmp);
+}
+
+/* Approximation for vector single-precision cospi(x)
+    Maximum Error: 3.17 ULP:
+    _ZGVnN4v_cospif(0x1.d341a8p-5) got 0x1.f7cd56p-1
+                                 want 0x1.f7cd5p-1.  */
+float32x4_t VPCS_ATTR V_NAME_F1 (cospi) (float32x4_t x)
+{
+  const struct data *d = ptr_barrier (&data);
+
+#if WANT_SIMD_EXCEPT
+  float32x4_t r = vabsq_f32 (x);
+  uint32x4_t cmp = vcaleq_f32 (v_f32 (0x1p32f), x);
+
+  /* When WANT_SIMD_EXCEPT = 1, special lanes should be zero'd
+     to avoid them overflowing and throwing exceptions.  */
+  r = v_zerofy_f32 (r, cmp);
+  uint32x4_t odd = vshlq_n_u32 (vcvtnq_u32_f32 (r), 31);
+
+#else
+  float32x4_t r = x;
+  uint32x4_t cmp = vcageq_f32 (r, d->range_val);
+
+  uint32x4_t odd
+      = vshlq_n_u32 (vreinterpretq_u32_s32 (vcvtaq_s32_f32 (r)), 31);
+
+#endif
+
+  /* r = x - rint(x).  */
+  r = vsubq_f32 (r, vrndaq_f32 (r));
+
+  /* cospi(x) = sinpi(0.5 - abs(x)) for values -1/2 .. 1/2.  */
+  r = vsubq_f32 (v_f32 (0.5f), vabsq_f32 (r));
+
+  /* Pairwise Horner approximation for y = sin(r * pi).  */
+  float32x4_t r2 = vmulq_f32 (r, r);
+  float32x4_t r4 = vmulq_f32 (r2, r2);
+  float32x4_t y = vmulq_f32 (v_pw_horner_5_f32 (r2, r4, d->poly), r);
+
+  /* Fallback to scalar.  */
+  if (__glibc_unlikely (v_any_u32 (cmp)))
+    return special_case (x, y, odd, cmp);
+
+  /* Reintroduce the sign bit for inputs which round to odd.  */
+  return vreinterpretq_f32_u32 (veorq_u32 (vreinterpretq_u32_f32 (y), odd));
+}
+
+libmvec_hidden_def (V_NAME_F1 (cospi))
+HALF_WIDTH_ALIAS_F1 (cospi)
diff --git a/sysdeps/aarch64/fpu/cospif_sve.c b/sysdeps/aarch64/fpu/cospif_sve.c

new file mode 100644 (file)

index 0000000..e8980da
--- /dev/null
+++ b/sysdeps/aarch64/fpu/cospif_sve.c
@@ -0,0 +1,61 @@
+/* Single-precision (SVE) cospi function
+
+   Copyright (C) 2024 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Lesser General Public
+   License as published by the Free Software Foundation; either
+   version 2.1 of the License, or (at your option) any later version.
+
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public
+   License along with the GNU C Library; if not, see
+   <https://www.gnu.org/licenses/>.  */
+
+#include "sv_math.h"
+#include "poly_sve_f32.h"
+
+static const struct data
+{
+  float poly[6];
+  float range_val;
+} data = {
+  /* Taylor series coefficents for sin(pi * x).  */
+  .poly = { 0x1.921fb6p1f, -0x1.4abbcep2f, 0x1.466bc6p1f, -0x1.32d2ccp-1f,
+           0x1.50783p-4f, -0x1.e30750p-8f },
+  .range_val = 0x1p31f,
+};
+
+/* A fast SVE implementation of cospif.
+   Maximum error: 2.60 ULP:
+   _ZGVsMxv_cospif(+/-0x1.cae664p-4) got 0x1.e09c9ep-1
+                                   want 0x1.e09c98p-1.  */
+svfloat32_t SV_NAME_F1 (cospi) (svfloat32_t x, const svbool_t pg)
+{
+  const struct data *d = ptr_barrier (&data);
+
+  /* Using cospi(x) = sinpi(0.5 - x)
+     range reduction and offset into sinpi range -1/2 .. 1/2
+     r = 0.5 - |x - rint(x)|.  */
+  svfloat32_t n = svrinta_x (pg, x);
+  svfloat32_t r = svsub_x (pg, x, n);
+  r = svsub_x (pg, sv_f32 (0.5f), svabs_x (pg, r));
+
+  /* Result should be negated based on if n is odd or not.
+     If ax >= 2^31, the result will always be positive.  */
+  svbool_t cmp = svaclt (pg, x, d->range_val);
+  svuint32_t intn = svreinterpret_u32 (svcvt_s32_x (pg, n));
+  svuint32_t sign = svlsl_z (cmp, intn, 31);
+
+  /* y = sin(r).  */
+  svfloat32_t r2 = svmul_x (pg, r, r);
+  svfloat32_t y = sv_horner_5_f32_x (pg, r2, d->poly);
+  y = svmul_x (pg, y, r);
+
+  return svreinterpret_f32 (sveor_x (pg, svreinterpret_u32 (y), sign));
+}
diff --git a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c

index b2332e6a4be9a09c5b87a7e489d89cd541485c78..0b0cc657b4832733a689e47d37e15d5c0de495d5 100644 (file)
--- a/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c
@@ -33,6 +33,7 @@ VPCS_VECTOR_WRAPPER_ff (atan2_advsimd, _ZGVnN2vv_atan2)
  VPCS_VECTOR_WRAPPER (cbrt_advsimd, _ZGVnN2v_cbrt)
  VPCS_VECTOR_WRAPPER (cos_advsimd, _ZGVnN2v_cos)
  VPCS_VECTOR_WRAPPER (cosh_advsimd, _ZGVnN2v_cosh)
+VPCS_VECTOR_WRAPPER (cospi_advsimd, _ZGVnN2v_cospi)
  VPCS_VECTOR_WRAPPER (erf_advsimd, _ZGVnN2v_erf)
  VPCS_VECTOR_WRAPPER (erfc_advsimd, _ZGVnN2v_erfc)
  VPCS_VECTOR_WRAPPER (exp_advsimd, _ZGVnN2v_exp)
diff --git a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c

index e833a840679f1b3351e94ef415c938ada95f7a6b..fb4f6c487b4d0c040e5ecd5d43cabae994b7f4a7 100644 (file)
--- a/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-double-sve-wrappers.c
@@ -52,6 +52,7 @@ SVE_VECTOR_WRAPPER_ff (atan2_sve, _ZGVsMxvv_atan2)
  SVE_VECTOR_WRAPPER (cbrt_sve, _ZGVsMxv_cbrt)
  SVE_VECTOR_WRAPPER (cos_sve, _ZGVsMxv_cos)
  SVE_VECTOR_WRAPPER (cosh_sve, _ZGVsMxv_cosh)
+SVE_VECTOR_WRAPPER (cospi_sve, _ZGVsMxv_cospi)
  SVE_VECTOR_WRAPPER (erf_sve, _ZGVsMxv_erf)
  SVE_VECTOR_WRAPPER (erfc_sve, _ZGVsMxv_erfc)
  SVE_VECTOR_WRAPPER (exp_sve, _ZGVsMxv_exp)
diff --git a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c

index bede18bba1d190e178e3082421fc760d90e7342a..51b05cbb3756968ee3b9296f70e6fdac247b4db9 100644 (file)
--- a/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c
@@ -33,6 +33,7 @@ VPCS_VECTOR_WRAPPER_ff (atan2f_advsimd, _ZGVnN4vv_atan2f)
  VPCS_VECTOR_WRAPPER (cbrtf_advsimd, _ZGVnN4v_cbrtf)
  VPCS_VECTOR_WRAPPER (cosf_advsimd, _ZGVnN4v_cosf)
  VPCS_VECTOR_WRAPPER (coshf_advsimd, _ZGVnN4v_coshf)
+VPCS_VECTOR_WRAPPER (cospif_advsimd, _ZGVnN4v_cospif)
  VPCS_VECTOR_WRAPPER (erff_advsimd, _ZGVnN4v_erff)
  VPCS_VECTOR_WRAPPER (erfcf_advsimd, _ZGVnN4v_erfcf)
  VPCS_VECTOR_WRAPPER (expf_advsimd, _ZGVnN4v_expf)
diff --git a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c

index d5497b2f1f0ddbf8f1ee9f40a5abef57fc03be81..a10de3f9ce6f3e1b850a34bb64af89d2873cfa75 100644 (file)
--- a/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
+++ b/sysdeps/aarch64/fpu/test-float-sve-wrappers.c
@@ -52,6 +52,7 @@ SVE_VECTOR_WRAPPER_ff (atan2f_sve, _ZGVsMxvv_atan2f)
  SVE_VECTOR_WRAPPER (cbrtf_sve, _ZGVsMxv_cbrtf)
  SVE_VECTOR_WRAPPER (cosf_sve, _ZGVsMxv_cosf)
  SVE_VECTOR_WRAPPER (coshf_sve, _ZGVsMxv_coshf)
+SVE_VECTOR_WRAPPER (cospif_sve, _ZGVsMxv_cospif)
  SVE_VECTOR_WRAPPER (erff_sve, _ZGVsMxv_erff)
  SVE_VECTOR_WRAPPER (erfcf_sve, _ZGVsMxv_erfcf)
  SVE_VECTOR_WRAPPER (expf_sve, _ZGVsMxv_expf)
diff --git a/sysdeps/aarch64/libm-test-ulps b/sysdeps/aarch64/libm-test-ulps

index c095c72fe2fe9cd3657423510c32f4b249c5fc39..12f1b775764556d220a2afb2673081f795ccf20d 100644 (file)
--- a/sysdeps/aarch64/libm-test-ulps
+++ b/sysdeps/aarch64/libm-test-ulps
@@ -803,11 +803,19 @@ double: 2
  float: 2
  ldouble: 2
  
+Function: "cospi_advsimd":
+double: 2
+float: 1
+
  Function: "cospi_downward":
  double: 1
  float: 2
  ldouble: 2
  
+Function: "cospi_sve":
+double: 2
+float: 1
+
  Function: "cospi_towardzero":
  double: 1
  float: 1
diff --git a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist

index dd69f818c1c07ee38f4675b8c4d20f2c1b429f80..c081f5fb28c4a103b73c0bb0df6a6c92f8041561 100644 (file)
--- a/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
+++ b/sysdeps/unix/sysv/linux/aarch64/libmvec.abilist
@@ -128,12 +128,17 @@ GLIBC_2.40 _ZGVsMxvv_hypot F
  GLIBC_2.40 _ZGVsMxvv_hypotf F
  GLIBC_2.40 _ZGVsMxvv_pow F
  GLIBC_2.40 _ZGVsMxvv_powf F
+GLIBC_2.41 _ZGVnN2v_cospi F
+GLIBC_2.41 _ZGVnN2v_cospif F
  GLIBC_2.41 _ZGVnN2v_logp1 F
  GLIBC_2.41 _ZGVnN2v_logp1f F
  GLIBC_2.41 _ZGVnN2v_sinpi F
  GLIBC_2.41 _ZGVnN2v_sinpif F
+GLIBC_2.41 _ZGVnN4v_cospif F
  GLIBC_2.41 _ZGVnN4v_logp1f F
  GLIBC_2.41 _ZGVnN4v_sinpif F
+GLIBC_2.41 _ZGVsMxv_cospi F
+GLIBC_2.41 _ZGVsMxv_cospif F
  GLIBC_2.41 _ZGVsMxv_logp1 F
  GLIBC_2.41 _ZGVsMxv_logp1f F
  GLIBC_2.41 _ZGVsMxv_sinpi F
author	Joe Ramsay <Joe.Ramsay@arm.com>
	Fri, 3 Jan 2025 20:12:10 +0000 (20:12 +0000)
committer	Wilco Dijkstra <wilco.dijkstra@arm.com>
	Fri, 3 Jan 2025 21:39:56 +0000 (21:39 +0000)
bits/libm-simd-decl-stubs.h		patch \| blob \| blame \| history
math/bits/mathcalls.h		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/Makefile		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/Versions		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/advsimd_f32_protos.h		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/bits/math-vector.h		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/cospi_advsimd.c	[new file with mode: 0644]	patch \| blob
sysdeps/aarch64/fpu/cospi_sve.c	[new file with mode: 0644]	patch \| blob
sysdeps/aarch64/fpu/cospif_advsimd.c	[new file with mode: 0644]	patch \| blob
sysdeps/aarch64/fpu/cospif_sve.c	[new file with mode: 0644]	patch \| blob
sysdeps/aarch64/fpu/test-double-advsimd-wrappers.c		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/test-double-sve-wrappers.c		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/test-float-advsimd-wrappers.c		patch \| blob \| blame \| history
sysdeps/aarch64/fpu/test-float-sve-wrappers.c		patch \| blob \| blame \| history
sysdeps/aarch64/libm-test-ulps		patch \| blob \| blame \| history
sysdeps/unix/sysv/linux/aarch64/libmvec.abilist		patch \| blob \| blame \| history