Avoids emitting many saves/restores of vector registers, reduces the
amount of code generated around the scalar fallback.
static inline float64x2_t
v_call_f64 (double (*f) (double), float64x2_t x, float64x2_t y, uint64x2_t p)
{
- return (float64x2_t){ p[0] ? f (x[0]) : y[0], p[1] ? f (x[1]) : y[1] };
+ double p1 = p[1];
+ double x1 = x[1];
+ if (__glibc_likely (p[0]))
+ y[0] = f (x[0]);
+ if (__glibc_likely (p1))
+ y[1] = f (x1);
+ return y;
}
static inline float64x2_t
v_call2_f64 (double (*f) (double, double), float64x2_t x1, float64x2_t x2,