[thirdparty/glibc.git] / sysdeps / aarch64 / fpu / v_math.h

/* Utilities for Advanced SIMD libmvec routines.
   Copyright (C) 2023 Free Software Foundation, Inc.
   This file is part of the GNU C Library.

   The GNU C Library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
   License as published by the Free Software Foundation; either
   version 2.1 of the License, or (at your option) any later version.

   The GNU C Library is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   Lesser General Public License for more details.

   You should have received a copy of the GNU Lesser General Public
   License along with the GNU C Library; if not, see
   <https://www.gnu.org/licenses/>.  */

#ifndef _V_MATH_H
#define _V_MATH_H

#include <arm_neon.h>
#include "vecmath_config.h"

#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))

#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
#define V_NAME_D1(fun) _ZGVnN2v_##fun
#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
#define V_NAME_D2(fun) _ZGVnN2vv_##fun

/* Shorthand helpers for declaring constants.  */
#define V2(X) { X, X }
#define V4(X) { X, X, X, X }
#define V8(X) { X, X, X, X, X, X, X, X }

static inline int
v_any_u16h (uint16x4_t x)
{
  return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
}

static inline float32x4_t
v_f32 (float x)
{
  return (float32x4_t) V4 (x);
}
static inline uint32x4_t
v_u32 (uint32_t x)
{
  return (uint32x4_t) V4 (x);
}
static inline int32x4_t
v_s32 (int32_t x)
{
  return (int32x4_t) V4 (x);
}

/* true if any elements of a vector compare result is non-zero.  */
static inline int
v_any_u32 (uint32x4_t x)
{
  /* assume elements in x are either 0 or -1u.  */
  return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
}
static inline int
v_any_u32h (uint32x2_t x)
{
  return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
}
static inline float32x4_t
v_lookup_f32 (const float *tab, uint32x4_t idx)
{
  return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
}
static inline uint32x4_t
v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
{
  return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
}
static inline float32x4_t
v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
{
  return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
			p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
}
static inline float32x4_t
v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
	     float32x4_t y, uint32x4_t p)
{
  return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
			p[1] ? f (x1[1], x2[1]) : y[1],
			p[2] ? f (x1[2], x2[2]) : y[2],
			p[3] ? f (x1[3], x2[3]) : y[3] };
}

static inline float64x2_t
v_f64 (double x)
{
  return (float64x2_t) V2 (x);
}
static inline uint64x2_t
v_u64 (uint64_t x)
{
  return (uint64x2_t) V2 (x);
}
static inline int64x2_t
v_s64 (int64_t x)
{
  return (int64x2_t) V2 (x);
}

/* true if any elements of a vector compare result is non-zero.  */
static inline int
v_any_u64 (uint64x2_t x)
{
  /* assume elements in x are either 0 or -1u.  */
  return vpaddd_u64 (x) != 0;
}
/* true if all elements of a vector compare result is 1.  */
static inline int
v_all_u64 (uint64x2_t x)
{
  /* assume elements in x are either 0 or -1u.  */
  return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
}
static inline float64x2_t
v_lookup_f64 (const double *tab, uint64x2_t idx)
{
  return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
}
static inline uint64x2_t
v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
{
  return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
}
static inline float64x2_t
v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
{
  return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
}
static inline float64x2_t
v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
	     float64x2_t y, uint64x2_t p)
{
  return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
			p[1] ? f (x1[1], x2[1]) : y[1] };
}

#endif
Commit	Line	Data
aed39a3a JR	1	/* Utilities for Advanced SIMD libmvec routines.
	2	Copyright (C) 2023 Free Software Foundation, Inc.
	3	This file is part of the GNU C Library.
	4
	5	The GNU C Library is free software; you can redistribute it and/or
	6	modify it under the terms of the GNU Lesser General Public
	7	License as published by the Free Software Foundation; either
	8	version 2.1 of the License, or (at your option) any later version.
	9
	10	The GNU C Library is distributed in the hope that it will be useful,
	11	but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	13	Lesser General Public License for more details.
	14
	15	You should have received a copy of the GNU Lesser General Public
	16	License along with the GNU C Library; if not, see
	17	<https://www.gnu.org/licenses/>. */
	18
	19	#ifndef _V_MATH_H
	20	#define _V_MATH_H
	21
	22	#include <arm_neon.h>
	23	#include "vecmath_config.h"
	24
	25	#define VPCS_ATTR __attribute__ ((aarch64_vector_pcs))
	26
	27	#define V_NAME_F1(fun) _ZGVnN4v_##fun##f
	28	#define V_NAME_D1(fun) _ZGVnN2v_##fun
	29	#define V_NAME_F2(fun) _ZGVnN4vv_##fun##f
	30	#define V_NAME_D2(fun) _ZGVnN2vv_##fun
	31
	32	/* Shorthand helpers for declaring constants. */
5a4b6f8e JR	33	#define V2(X) { X, X }
	34	#define V4(X) { X, X, X, X }
	35	#define V8(X) { X, X, X, X, X, X, X, X }
aed39a3a	36
5a4b6f8e JR	37	static inline int
	38	v_any_u16h (uint16x4_t x)
	39	{
	40	return vget_lane_u64 (vreinterpret_u64_u16 (x), 0) != 0;
	41	}
aed39a3a JR	42
	43	static inline float32x4_t
	44	v_f32 (float x)
	45	{
	46	return (float32x4_t) V4 (x);
	47	}
	48	static inline uint32x4_t
	49	v_u32 (uint32_t x)
	50	{
	51	return (uint32x4_t) V4 (x);
	52	}
	53	static inline int32x4_t
	54	v_s32 (int32_t x)
	55	{
	56	return (int32x4_t) V4 (x);
	57	}
	58
	59	/* true if any elements of a vector compare result is non-zero. */
	60	static inline int
	61	v_any_u32 (uint32x4_t x)
	62	{
	63	/* assume elements in x are either 0 or -1u. */
	64	return vpaddd_u64 (vreinterpretq_u64_u32 (x)) != 0;
	65	}
5a4b6f8e JR	66	static inline int
	67	v_any_u32h (uint32x2_t x)
	68	{
	69	return vget_lane_u64 (vreinterpret_u64_u32 (x), 0) != 0;
	70	}
aed39a3a JR	71	static inline float32x4_t
	72	v_lookup_f32 (const float *tab, uint32x4_t idx)
	73	{
	74	return (float32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
	75	}
	76	static inline uint32x4_t
	77	v_lookup_u32 (const uint32_t *tab, uint32x4_t idx)
	78	{
	79	return (uint32x4_t){ tab[idx[0]], tab[idx[1]], tab[idx[2]], tab[idx[3]] };
	80	}
	81	static inline float32x4_t
	82	v_call_f32 (float (*f) (float), float32x4_t x, float32x4_t y, uint32x4_t p)
	83	{
	84	return (float32x4_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1],
	85	p[2] ? f (x[2]) : y[2], p[3] ? f (x[3]) : y[3] };
	86	}
	87	static inline float32x4_t
	88	v_call2_f32 (float (*f) (float, float), float32x4_t x1, float32x4_t x2,
	89	float32x4_t y, uint32x4_t p)
	90	{
	91	return (float32x4_t){ p[0] ? f (x1[0], x2[0]) : y[0],
	92	p[1] ? f (x1[1], x2[1]) : y[1],
	93	p[2] ? f (x1[2], x2[2]) : y[2],
	94	p[3] ? f (x1[3], x2[3]) : y[3] };
	95	}
	96
	97	static inline float64x2_t
	98	v_f64 (double x)
	99	{
	100	return (float64x2_t) V2 (x);
	101	}
	102	static inline uint64x2_t
	103	v_u64 (uint64_t x)
	104	{
	105	return (uint64x2_t) V2 (x);
	106	}
	107	static inline int64x2_t
	108	v_s64 (int64_t x)
	109	{
	110	return (int64x2_t) V2 (x);
	111	}
	112
	113	/* true if any elements of a vector compare result is non-zero. */
	114	static inline int
	115	v_any_u64 (uint64x2_t x)
	116	{
	117	/* assume elements in x are either 0 or -1u. */
	118	return vpaddd_u64 (x) != 0;
	119	}
	120	/* true if all elements of a vector compare result is 1. */
	121	static inline int
	122	v_all_u64 (uint64x2_t x)
	123	{
	124	/* assume elements in x are either 0 or -1u. */
	125	return vpaddd_s64 (vreinterpretq_s64_u64 (x)) == -2;
	126	}
	127	static inline float64x2_t
	128	v_lookup_f64 (const double *tab, uint64x2_t idx)
	129	{
	130	return (float64x2_t){ tab[idx[0]], tab[idx[1]] };
	131	}
	132	static inline uint64x2_t
	133	v_lookup_u64 (const uint64_t *tab, uint64x2_t idx)
	134	{
135	return (uint64x2_t){ tab[idx[0]], tab[idx[1]] };
136	}
137	static inline float64x2_t
138	v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
139	{
140	return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
141	}
142	static inline float64x2_t
143	v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,
144	float64x2_t y, uint64x2_t p)
145	{
146	return (float64x2_t){ p[0] ? f (x1[0], x2[0]) : y[0],
147	p[1] ? f (x1[1], x2[1]) : y[1] };
148	}
149
150	#endif