#define AVX512FP16
#define AVX512BF16
#include "avx512f-helper.h"
-#include "avx512f-mask-type.h"
#include <stdint.h>
#define NOINLINE __attribute__((noinline,noclone))
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
UNION_TYPE (AVX512F_LEN, bf16_uw) src1, src2;
MASK_TYPE mask = MASK_VALUE;
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
float x = 0.5;
float y = 0.25;
#include <math.h>
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (__bf16 *s, short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > SCHAR_MAX)
tmp = SCHAR_MAX;
UNION_TYPE (AVX512F_LEN, bf16_bf) s;
UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[DST_SIZE] = { 0 };
+ short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
res1.x = INTRINSIC (_ipcvtnebf16_epi16) (s.x);
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
abort ();
- MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
abort ();
}
#include <math.h>
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (__bf16 *s, unsigned short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UCHAR_MAX)
tmp = UCHAR_MAX;
UNION_TYPE (AVX512F_LEN, bf16_bf) s;
UNION_TYPE (AVX512F_LEN, i_uw) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[DST_SIZE] = { 0 };
+ unsigned short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
res1.x = INTRINSIC (_ipcvtnebf16_epu16) (s.x);
if (UNION_CHECK (AVX512F_LEN, i_uw) (res1, res_ref))
abort ();
- MASK_MERGE (i_uw) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_uw) (res2, res_ref))
abort ();
- MASK_ZERO (i_uw) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_uw) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (_Float16 *s, short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > SCHAR_MAX)
tmp = SCHAR_MAX;
UNION_TYPE (AVX512F_LEN, h) s;
UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[DST_SIZE] = { 0 };
+ short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
abort ();
- MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (_Float16 *s, short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UCHAR_MAX)
tmp = UCHAR_MAX;
UNION_TYPE (AVX512F_LEN, h) s;
UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[DST_SIZE] = { 0 };
+ short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
abort ();
- MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
abort ();
}
#include <limits.h>
#include <math.h>
-#define SRC_SIZE ((AVX512F_LEN) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 32)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, int *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > SCHAR_MAX)
tmp = SCHAR_MAX;
UNION_TYPE (AVX512F_LEN, ) s;
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[DST_SIZE] = { 0 };
+ int res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
- MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
#include <limits.h>
#include <math.h>
-#define SRC_SIZE ((AVX512F_LEN) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 32)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, int *r)
{
int i;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UCHAR_MAX)
r[i] = UCHAR_MAX;
UNION_TYPE (AVX512F_LEN, ) s;
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[DST_SIZE] = { 0 };
+ int res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
- MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
#include <math.h>
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (__bf16 *s, short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > SCHAR_MAX)
tmp = SCHAR_MAX;
UNION_TYPE (AVX512F_LEN, bf16_bf) s;
UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[DST_SIZE] = { 0 };
+ short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
res1.x = INTRINSIC (_ipcvttnebf16_epi16) (s.x);
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
abort ();
- MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
abort ();
}
#include <math.h>
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (__bf16 *s, short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UCHAR_MAX)
tmp = UCHAR_MAX;
UNION_TYPE (AVX512F_LEN, bf16_bf) s;
UNION_TYPE (AVX512F_LEN, i_uw) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[DST_SIZE] = { 0 };
+ unsigned short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
res1.x = INTRINSIC (_ipcvttnebf16_epu16) (s.x);
if (UNION_CHECK (AVX512F_LEN, i_uw) (res1, res_ref))
abort ();
- MASK_MERGE (i_uw) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_uw) (res2, res_ref))
abort ();
- MASK_ZERO (i_uw) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_uw) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 64)
-#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+#define SRC_SIZE (AVX512F_LEN / 64)
+#define SIZE (AVX512F_LEN_HALF / 32)
+#include "avx512f-mask-type.h"
static void
CALC (double *s, int *r)
UNION_TYPE (AVX512F_LEN, d) s;
UNION_TYPE (AVX512F_LEN_HALF, i_d) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[DST_SIZE] = { 0 };
+ int res_ref[SIZE] = { 0 };
int i, sign = 1;
for (i = 0; i < SRC_SIZE; i++)
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 64)
-#define DST_SIZE ((AVX512F_LEN) / 64)
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
static void
CALC (double *s, long long *r)
{
int i;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > LLONG_MAX)
r[i] = LLONG_MAX;
UNION_TYPE (AVX512F_LEN, d) s;
UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- long long res_ref[DST_SIZE] = { 0 };
+ long long res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
- MASK_MERGE (i_q) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
abort ();
- MASK_ZERO (i_q) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 64)
-#define DST_SIZE ((AVX512F_LEN_HALF) / 32)
+#define SRC_SIZE (AVX512F_LEN / 64)
+#define SIZE (AVX512F_LEN_HALF / 32)
+#include "avx512f-mask-type.h"
static void
CALC (double *s, unsigned int *r)
UNION_TYPE (AVX512F_LEN, d) s;
UNION_TYPE (AVX512F_LEN_HALF, i_ud) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- unsigned int res_ref[DST_SIZE] = { 0 };
+ unsigned int res_ref[SIZE] = { 0 };
int i, sign = 1;
for (i = 0; i < SRC_SIZE; i++)
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 64)
-#define DST_SIZE ((AVX512F_LEN) / 64)
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
static void
CALC (double *s, unsigned long long *r)
{
int i;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > ULONG_MAX)
r[i] = ULONG_MAX;
UNION_TYPE (AVX512F_LEN, d) s;
UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- unsigned long long res_ref[DST_SIZE] = { 0 };
+ unsigned long long res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_uq) (res1, res_ref))
abort ();
- MASK_MERGE (i_uq) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_uq) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_uq) (res2, res_ref))
abort ();
- MASK_ZERO (i_uq) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_uq) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_uq) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (_Float16 *s, short *r)
int i;
char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > SCHAR_MAX)
tmp = SCHAR_MAX;
UNION_TYPE (AVX512F_LEN, h) s;
UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[DST_SIZE] = { 0 };
+ short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
abort ();
- MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 16)
-#define DST_SIZE ((AVX512F_LEN) / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (_Float16 *s, short *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UCHAR_MAX)
tmp = UCHAR_MAX;
UNION_TYPE (AVX512F_LEN, h) s;
UNION_TYPE (AVX512F_LEN, i_w) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[DST_SIZE] = { 0 };
+ short res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref))
abort ();
- MASK_ZERO (i_w) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_w) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 32)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, int *r)
{
int i;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > INT_MAX)
r[i] = INT_MAX;
UNION_TYPE (AVX512F_LEN, ) s;
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[DST_SIZE] = { 0 };
+ int res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
- MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
#include <limits.h>
#include <math.h>
-#define SRC_SIZE ((AVX512F_LEN) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 32)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, int *r)
int i;
unsigned char tmp;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > SCHAR_MAX)
tmp = SCHAR_MAX;
UNION_TYPE (AVX512F_LEN, ) s;
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[DST_SIZE] = { 0 };
+ int res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
- MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
#include <limits.h>
#include <math.h>
-#define SRC_SIZE ((AVX512F_LEN) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 32)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, int *r)
{
int i;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UCHAR_MAX)
r[i] = UCHAR_MAX;
UNION_TYPE (AVX512F_LEN, ) s;
UNION_TYPE (AVX512F_LEN, i_d) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[DST_SIZE] = { 0 };
+ int res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref))
abort ();
- MASK_ZERO (i_d) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_d) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 64)
+#define SRC_SIZE (AVX512F_LEN_HALF / 32)
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, long long *r)
{
int i;
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > LLONG_MAX)
r[i] = LLONG_MAX;
UNION_TYPE (AVX512F_LEN_HALF, ) s;
UNION_TYPE (AVX512F_LEN, i_q) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- long long res_ref[DST_SIZE] = { 0 };
+ long long res_ref[SIZE] = { 0 };
int i, sign = 1;
for (i = 0; i < SRC_SIZE; i++)
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_q) (res1, res_ref))
abort ();
- MASK_MERGE (i_q) (res_ref, mask, DST_SIZE);
+ MASK_MERGE (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res2, res_ref))
abort ();
- MASK_ZERO (i_q) (res_ref, mask, DST_SIZE);
+ MASK_ZERO (i_q) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_q) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 32)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, unsigned int *r)
{
int i;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
if (s[i] > UINT_MAX)
r[i] = UINT_MAX;
UNION_TYPE (AVX512F_LEN, ) s;
UNION_TYPE (AVX512F_LEN, i_ud) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- unsigned int res_ref[DST_SIZE] = { 0 };
+ unsigned int res_ref[SIZE] = { 0 };
int i, sign = 1;
- for (i = 0; i < SRC_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
{
s.a[i] = 1.23 * (i + 2) * sign;
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
if (UNION_CHECK (AVX512F_LEN, i_ud) (res1, res_ref))
abort ();
- MASK_MERGE (i_ud) (res_ref, mask, SRC_SIZE);
+ MASK_MERGE (i_ud) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_ud) (res2, res_ref))
abort ();
- MASK_ZERO (i_ud) (res_ref, mask, SRC_SIZE);
+ MASK_ZERO (i_ud) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_ud) (res3, res_ref))
abort ();
}
#include "avx10-helper.h"
#include <limits.h>
-#define SRC_SIZE ((AVX512F_LEN_HALF) / 32)
-#define DST_SIZE ((AVX512F_LEN) / 64)
+#define SRC_SIZE (AVX512F_LEN_HALF / 32)
+#define SIZE (AVX512F_LEN / 64)
+#include "avx512f-mask-type.h"
static void
CALC (float *s, unsigned long long *r)
UNION_TYPE (AVX512F_LEN_HALF, ) s;
UNION_TYPE (AVX512F_LEN, i_uq) res1, res2, res3;
MASK_TYPE mask = MASK_VALUE;
- unsigned long long res_ref[DST_SIZE] = { 0 };
+ unsigned long long res_ref[SIZE] = { 0 };
int i, sign = 1;
for (i = 0; i < SRC_SIZE; i++)
sign = -sign;
}
- for (i = 0; i < DST_SIZE; i++)
+ for (i = 0; i < SIZE; i++)
res2.a[i] = DEFAULT_VALUE;
#if AVX512F_LEN == 128
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SRC_SIZE AVX512F_LEN / 16
-#define SIZE AVX512F_LEN / 32
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (float *dest, _Float16 *src1, _Float16 *src2)
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
float x = 0.5;
float y = 2;
res_ref2[i] = convert_fp32_to_bf16 (m2);
}
- MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
- MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE);
res1.x = INTRINSIC (_mask_fmaddne_pbh) (res1.x, mask, src1.x, src2.x);
res2.x = INTRINSIC (_mask3_fmaddne_pbh) (src1.x, src2.x, res2.x, mask);
- MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
}
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
float x = 0.5;
float y = 2;
res_ref2[i] = convert_fp32_to_bf16 (m2);
}
- MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
- MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE);
res1.x = INTRINSIC (_mask_fmsubne_pbh) (res1.x, mask, src1.x, src2.x);
res2.x = INTRINSIC (_mask3_fmsubne_pbh) (src1.x, src2.x, res2.x, mask);
- MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
}
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
float x = 0.5;
float y = 2;
res_ref2[i] = convert_fp32_to_bf16 (m2);
}
- MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
- MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE);
res1.x = INTRINSIC (_mask_fnmaddne_pbh) (res1.x, mask, src1.x, src2.x);
res2.x = INTRINSIC (_mask3_fnmaddne_pbh) (src1.x, src2.x, res2.x, mask);
- MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
}
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
float x = 0.5;
float y = 2;
res_ref2[i] = convert_fp32_to_bf16 (m2);
}
- MASK_MERGE (bf16_uw) (res1.a, mask, SIZE_RES);
- MASK_MERGE (bf16_uw) (res2.a, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res1.a, mask, SIZE);
+ MASK_MERGE (bf16_uw) (res2.a, mask, SIZE);
res1.x = INTRINSIC (_mask_fnmsubne_pbh) (res1.x, mask, src1.x, src2.x);
res2.x = INTRINSIC (_mask3_fnmsubne_pbh) (src1.x, src2.x, res2.x, mask);
- MASK_MERGE (bf16_uw) (res_ref, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
UNION_TYPE (AVX512F_LEN, ) src2;
MASK_TYPE mask = MASK_VALUE;
- for (i = 0; i < SIZE_RES / 2; i++)
+ for (i = 0; i < SIZE / 2; i++)
{
src1.a[i] = 0;
src2.a[i] = (uint32_t) (src1.a[i]) << 16;
}
- for (i = SIZE_RES / 2; i < SIZE_RES; i++)
+ for (i = SIZE / 2; i < SIZE; i++)
src1.a[i] = 0;
src1.a[0] = 0x7FC0;
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 5.0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_2
#define AVX10_512BIT
#endif
-#define SIZE (AVX512F_LEN / 16)
#include "avx10-helper.h"
+#define SIZE (AVX512F_LEN / 16)
#include <stdbool.h>
#include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
void static
CALC (__bf16 *r, __bf16 *s1, __bf16 *s2, int R)
#define AVX10_2
#define AVX10_512BIT
#endif
-#define SIZE (AVX512F_LEN / 64)
#include "avx10-helper.h"
+#define SIZE (AVX512F_LEN / 64)
#include <stdbool.h>
#include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
void static
CALC (double *r, double *s1, double *s2, int R)
#define AVX10_2
#define AVX10_512BIT
#endif
-#define SIZE (AVX512F_LEN / 16)
#include "avx10-helper.h"
+#define SIZE (AVX512F_LEN / 16)
#include <stdbool.h>
#include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
void static
CALC (_Float16 *r, _Float16 *s1, _Float16 *s2, int R)
#define AVX10_2
#define AVX10_512BIT
#endif
-#define SIZE (AVX512F_LEN / 32)
#include "avx10-helper.h"
+#define SIZE (AVX512F_LEN / 32)
#include <stdbool.h>
#include "avx10-minmax-helper.h"
+#include "avx512f-mask-type.h"
void static
CALC (float *r, float *s1, float *s2, int R)
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#endif
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 16)
-
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
static void
CALC (short* dst, char* src1, char* src2, int cont)
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, i_b) src2;
MASK_TYPE mask = MASK_VALUE;
- short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
src1.a[i] = 10 + 2 * i;
src2.a[i] = 3 * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_w) (res1, res_ref))
abort ();
- MASK_MERGE (i_w) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_w) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res2, res_ref2))
abort ();
- MASK_ZERO (i_w) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_w) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_w) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 32)
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 32)
static void
CALC (int *r, int *dst, char *s1, char *s2)
{
- short tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ short tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (short) s1[i] * (short) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, i_b) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * (10 + 3 * i * i);
src2.a[i] = sign * 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 32)
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, char *s1, char *s2)
{
- short tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ short tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (short) s1[i] * (short) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, i_b) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * (10 + 3 * i * i);
src2.a[i] = sign * 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 32)
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, char *s1, unsigned char *s2)
{
- short tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ short tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (short) s1[i] * (unsigned short) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, i_ub) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign*10*i*i;
src2.a[i] = 10 + 3*i*i + sign;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 32)
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, char *s1, unsigned char *s2)
{
- short tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ short tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (short) s1[i] * (unsigned short) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
UNION_TYPE (AVX512F_LEN, i_b) src1;
UNION_TYPE (AVX512F_LEN, i_ub) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * 10 * i * i;
src2.a[i] = 10 + 3 * i * i + sign;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 32)
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, unsigned char *s1, unsigned char *s2)
{
- unsigned short tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ unsigned short tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
UNION_TYPE (AVX512F_LEN, i_ub) src1;
UNION_TYPE (AVX512F_LEN, i_ub) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
src1.a[i] = 10 + 3 * i * i;
src2.a[i] = 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 8)
-#define SIZE_RES (AVX512F_LEN / 32)
+#define SRC_SIZE (AVX512F_LEN / 8)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, unsigned char *s1, unsigned char *s2)
{
- unsigned short tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ unsigned short tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (unsigned short) s1[i] * (unsigned short) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 4] + tempres[i * 4 + 1]
+ tempres[i * 4 + 2] + tempres[i * 4 + 3];
UNION_TYPE (AVX512F_LEN, i_ub) src1;
UNION_TYPE (AVX512F_LEN, i_ub) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
src1.a[i] = 10 + 3 * i * i;
src2.a[i] = 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 32)
-
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, short *s1, unsigned short *s2)
{
- int tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ int tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (int) s1[i] * (unsigned int) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 2] + tempres[i * 2 + 1];
r[i] = test;
UNION_TYPE (AVX512F_LEN, i_w) src1;
UNION_TYPE (AVX512F_LEN, i_uw) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * (10 + 3 * i * i);
src2.a[i] = sign * 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 32)
-
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, short *s1, unsigned short *s2)
{
- int tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ int tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (int) s1[i] * (unsigned int) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 2] + tempres[i * 2 + 1];
long long max_int = 0x7FFFFFFF;
UNION_TYPE (AVX512F_LEN, i_w) src1;
UNION_TYPE (AVX512F_LEN, i_uw) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * (10 + 3 * i * i);
src2.a[i] = sign * 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 32)
-
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, unsigned short *s1, short *s2)
{
- int tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ int tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (unsigned int) s1[i] * (int) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 2] + tempres[i * 2 + 1];
r[i] = test;
UNION_TYPE (AVX512F_LEN, i_uw) src1;
UNION_TYPE (AVX512F_LEN, i_w) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * 10 * i * i;
src2.a[i] = 10 + 3 * i * i + sign;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 32)
-
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, unsigned short *s1, short *s2)
{
- int tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ int tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (unsigned int) s1[i] * (int) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 2] + tempres[i * 2 + 1];
long long max_int = 0x7FFFFFFF;
UNION_TYPE (AVX512F_LEN, i_uw) src1;
UNION_TYPE (AVX512F_LEN, i_w) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
int sign = i % 2 ? 1 : -1;
src1.a[i] = sign * 10 * i * i;
src2.a[i] = 10 + 3 * i * i + sign;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 32)
-
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, unsigned short *s1, unsigned short *s2)
{
- unsigned int tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ unsigned int tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (unsigned int) s1[i] * (unsigned int) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 2] + tempres[i * 2 + 1];
r[i] = test;
UNION_TYPE (AVX512F_LEN, i_uw) src1;
UNION_TYPE (AVX512F_LEN, i_uw) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
src1.a[i] = 10 + 3 * i * i;
src2.a[i] = 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#include "avx10-helper.h"
-#define SIZE (AVX512F_LEN / 16)
-#define SIZE_RES (AVX512F_LEN / 32)
-
+#define SRC_SIZE (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 32)
+#include "avx512f-mask-type.h"
static void
CALC (int *r, int *dst, unsigned short *s1, unsigned short *s2)
{
- unsigned int tempres[SIZE];
- for (int i = 0; i < SIZE; i++)
+ unsigned int tempres[SRC_SIZE];
+ for (int i = 0; i < SRC_SIZE; i++)
tempres[i] = (unsigned int) s1[i] * (unsigned int) s2[i];
- for (int i = 0; i < SIZE_RES; i++)
+ for (int i = 0; i < SIZE; i++)
{
long long test = (long long) dst[i] + tempres[i * 2] + tempres[i * 2 + 1];
long long max_uint = 0xFFFFFFFF;
UNION_TYPE (AVX512F_LEN, i_uw) src1;
UNION_TYPE (AVX512F_LEN, i_uw) src2;
MASK_TYPE mask = MASK_VALUE;
- int res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ int res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE; i++)
+ for (i = 0; i < SRC_SIZE; i++)
{
src1.a[i] = 10 + 3 * i * i;
src2.a[i] = 10 * i * i;
}
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0x7FFFFFFF;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, i_d) (res1, res_ref))
abort ();
- MASK_MERGE (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res2, res_ref2))
abort ();
- MASK_ZERO (i_d) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (i_d) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, i_d) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 5.0;
res2.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
- float s = (float) (SIZE_RES - 1) / (float) i;
+ float s = (float) (SIZE - 1) / (float) i;
src1.a[i] = convert_fp32_to_bf16 (s);
float x = convert_bf16_to_fp32 (src1.a[i]);
__m128 px = _mm_load_ss (&x);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 5.0;
res2.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
- float s = (float) (SIZE_RES - 1) / (float) i;
+ float s = (float) (SIZE - 1) / (float) i;
src1.a[i] = convert_fp32_to_bf16 (s);
float x = convert_bf16_to_fp32 (src1.a[i]);
res_ref[i] = res_ref2[i] = convert_fp32_to_bf16_ne (rndscale (x));
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#endif
#include "avx10-helper.h"
#include <math.h>
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
res3.a[i] = DEFAULT_VALUE;
float x = (float) (2 * (i % 7) + 7);
- float y = 1.0 + (float) (4 * i) / (float) SIZE_RES;
+ float y = 1.0 + (float) (4 * i) / (float) SIZE;
float xx, yy, res;
src2.a[i] = convert_fp32_to_bf16 (y);
src1.a[i] = convert_fp32_to_bf16 (x);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#endif
#include "avx10-helper.h"
#include <math.h>
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#define AVX10_512BIT
#endif
#include "avx10-helper.h"
-#define SIZE_RES (AVX512F_LEN / 16)
+#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
void
TEST (void)
int i;
UNION_TYPE (AVX512F_LEN, bf16_uw) res1, res2, res3, src1, src2;
MASK_TYPE mask = MASK_VALUE;
- unsigned short res_ref[SIZE_RES], res_ref2[SIZE_RES];
+ unsigned short res_ref[SIZE], res_ref2[SIZE];
- for (i = 0; i < SIZE_RES; i++)
+ for (i = 0; i < SIZE; i++)
{
res1.a[i] = 0;
res2.a[i] = DEFAULT_VALUE;
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res1, res_ref))
abort ();
- MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_MERGE (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res2, res_ref2))
abort ();
- MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE_RES);
+ MASK_ZERO (bf16_uw) (res_ref2, mask, SIZE);
if (UNION_CHECK (AVX512F_LEN, bf16_uw) (res3, res_ref2))
abort ();
}
#include <math.h>
#include <limits.h>
#include <float.h>
-#include "avx512f-mask-type.h"
#define SIZE (AVX512F_LEN / 16)
+#include "avx512f-mask-type.h"
#ifndef __FPCLASSPH__
#define __FPCLASSPH__