/* -------- 1,1x,11000 (apparently unassigned) (7) -------- */
/* -------- 1,1x,11001 FRINTI 2d_2d, 4s_4s, 2s_2s (8) -------- */
/* rm plan:
- FRINTN: tieeven -- !! FIXME KLUDGED !!
+ FRINTN: tieeven
FRINTM: -inf
FRINTP: +inf
FRINTZ: zero
- FRINTA: tieaway -- !! FIXME KLUDGED !!
+ FRINTA: tieaway
FRINTX: per FPCR + "exact = TRUE"
FRINTI: per FPCR
*/
Bool isD = (size & 1) == 1;
if (bitQ == 0 && isD) return False; // implied 1d case
- IRTemp irrmRM = mk_get_IR_rounding_mode();
-
- UChar ch = '?';
- IRTemp irrm = newTemp(Ity_I32);
+ UChar ch = '?';
+ IROp op = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
+ Bool isBinop = True;
+ IRExpr* irrmE = NULL;
switch (ix) {
- case 1: ch = 'n'; assign(irrm, mkU32(Irrm_NEAREST)); break;
- case 2: ch = 'm'; assign(irrm, mkU32(Irrm_NegINF)); break;
- case 3: ch = 'p'; assign(irrm, mkU32(Irrm_PosINF)); break;
- case 4: ch = 'z'; assign(irrm, mkU32(Irrm_ZERO)); break;
+ case 1: ch = 'n'; isBinop = False; op = isD ? Iop_RoundF64toIntE : Iop_RoundF32toIntE; break;
+ case 2: ch = 'm'; irrmE = mkU32(Irrm_NegINF); break;
+ case 3: ch = 'p'; irrmE = mkU32(Irrm_PosINF); break;
+ case 4: ch = 'z'; irrmE = mkU32(Irrm_ZERO); break;
// The following is a kludge. Should be: Irrm_NEAREST_TIE_AWAY_0
- case 5: ch = 'a'; assign(irrm, mkU32(Irrm_NEAREST)); break;
+ case 5: ch = 'a'; isBinop = False; op = isD ? Iop_RoundF64toIntA0 : Iop_RoundF32toIntA0; break;
// I am unsure about the following, due to the "integral exact"
// description in the manual. What does it mean? (frintx, that is)
- case 6: ch = 'x'; assign(irrm, mkexpr(irrmRM)); break;
- case 8: ch = 'i'; assign(irrm, mkexpr(irrmRM)); break;
+ case 6: ch = 'x'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
+ case 8: ch = 'i'; irrmE = mkexpr(mk_get_IR_rounding_mode()); break;
default: vassert(0);
}
- IROp opRND = isD ? Iop_RoundF64toInt : Iop_RoundF32toInt;
if (isD) {
for (UInt i = 0; i < 2; i++) {
- putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
- getQRegLane(nn, i, Ity_F64)));
+ if (isBinop) {
+ IRTemp irrm = newTemp(Ity_I32);
+ assign(irrm, irrmE);
+ putQRegLane(dd, i, binop(op, mkexpr(irrm),
+ getQRegLane(nn, i, Ity_F64)));
+ } else {
+ putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F64)));
+ }
}
} else {
UInt n = bitQ==1 ? 4 : 2;
for (UInt i = 0; i < n; i++) {
- putQRegLane(dd, i, binop(opRND, mkexpr(irrm),
- getQRegLane(nn, i, Ity_F32)));
+ if (isBinop) {
+ IRTemp irrm = newTemp(Ity_I32);
+ assign(irrm, irrmE);
+ putQRegLane(dd, i, binop(op, mkexpr(irrm),
+ getQRegLane(nn, i, Ity_F32)));
+ } else {
+ putQRegLane(dd, i, unop(op, getQRegLane(nn, i, Ity_F32)));
+ }
}
if (bitQ == 0)
putQRegLane(dd, 1, mkU64(0)); // zero out lanes 2 and 3
}
}
+template<typename T>
+void test_frinta_fullvec(T* input, T* expected)
+{
+ T result[2*sizeof(double)/sizeof(T)];
+ T* rp = result;
+ if constexpr (std::is_same_v<double, T> == true)
+ {
+ __asm__ __volatile__(
+ "ldr q23, [%1];\n"
+ "frinta v22.2d, v23.2d;\n"
+ "str q22, [%0];\n"
+ : "+rm" (rp)
+ : "r" (input)
+ : "memory", "v22", "v23");
+ assert(result[0] == expected[0]);
+ assert(result[1] == expected[1]);
+ }
+ else
+ {
+ __asm__ __volatile__(
+ "ldr q23, [%1];\n"
+ "frinta v22.4s, v23.4s;\n"
+ "str q22, [%0];\n"
+ : "+rm" (rp)
+ : "r" (input)
+ : "memory", "v22", "v23");
+ assert(result[0] == expected[0]);
+ assert(result[1] == expected[1]);
+ assert(result[2] == expected[2]);
+ assert(result[3] == expected[3]);
+ }
+}
+
+void test_frinta_halfvec(float* input, float* expected)
+{
+ float result[2];
+ float* rp = result;
+ __asm__ __volatile__(
+ "ldr d23, [%1];\n"
+ "frinta v22.2s, v23.2s;\n"
+ "str d22, [%0];\n"
+ : "+rm" (rp)
+ : "r" (input)
+ : "memory", "v22", "v23");
+ assert(result[0] == expected[0]);
+ assert(result[1] == expected[1]);
+}
+
+
template<typename T>
void test_frintn(T input, T expected)
{
}
}
+template<typename T>
+void test_frintn_fullvec(T* input, T* expected)
+{
+ T result[2*sizeof(double)/sizeof(T)];
+ T* rp = result;
+ if constexpr (std::is_same_v<double, T> == true)
+ {
+ __asm__ __volatile__(
+ "ldr q23, [%1];\n"
+ "frintn v22.2d, v23.2d;\n"
+ "str q22, [%0];\n"
+ : "+rm" (rp)
+ : "r" (input)
+ : "memory", "v22", "v23");
+ assert(result[0] == expected[0]);
+ assert(result[1] == expected[1]);
+ }
+ else
+ {
+ __asm__ __volatile__(
+ "ldr q23, [%1];\n"
+ "frintn v22.4s, v23.4s;\n"
+ "str q22, [%0];\n"
+ : "+rm" (rp)
+ : "r" (input)
+ : "memory", "v22", "v23");
+ assert(result[0] == expected[0]);
+ assert(result[1] == expected[1]);
+ assert(result[2] == expected[2]);
+ assert(result[3] == expected[3]);
+ }
+}
+
+void test_frintn_halfvec(float* input, float* expected)
+{
+ float result[2];
+ float* rp = result;
+ __asm__ __volatile__(
+ "ldr d23, [%1];\n"
+ "frintn v22.2s, v23.2s;\n"
+ "str d22, [%0];\n"
+ : "+rm" (rp)
+ : "r" (input)
+ : "memory", "v22", "v23");
+ assert(result[0] == expected[0]);
+ assert(result[1] == expected[1]);
+}
+
int main()
{
// round "away from zero"
test_frinta(-1.5F, -2.0F);
test_frinta(-2.5F, -3.0F);
+ double in1[] = {1.5, 1.5};
+ double out1[] = {2.0, 2,0};
+ test_frinta_fullvec(in1, out1);
+ double in2[] = {2.5, 2.5};
+ double out2[] = {3.0, 3,0};
+ test_frinta_fullvec(in2, out2);
+ double in3[] = {-1.5, -1.5};
+ double out3[] = {-2.0, -2,0};
+ test_frinta_fullvec(in3, out3);
+ double in4[] = {-2.5, -2.5};
+ double out4[] = {-3.0, -3,0};
+ test_frinta_fullvec(in4, out4);
+
+ float in1f[] = {1.5F, 1.5F, 1.5F, 1.5F};
+ float out1f[] = {2.0F, 2.0F, 2.0F, 2.0F};
+ test_frinta_fullvec(in1f, out1f);
+ test_frinta_halfvec(in1f, out1f);
+ float in2f[] = {2.5F, 2.5F, 2.5F, 2.5F};
+ float out2f[] = {3.0F, 3.0F, 3.0F, 3.0F};
+ test_frinta_fullvec(in2f, out2f);
+ test_frinta_halfvec(in2f, out2f);
+ float in3f[] = {-1.5F, -1.5F, -1.5F, -1.5F};
+ float out3f[] = {-2.0F, -2.0F, -2.0F, -2.0F};
+ test_frinta_fullvec(in3f, out3f);
+ test_frinta_halfvec(in3f, out3f);
+ float in4f[] = {-2.5F, -2.5F, -2.5F, -2.5F};
+ float out4f[] = {-3.0F, -3.0F, -3.0F, -3.0F};
+ test_frinta_fullvec(in4f, out4f);
+ test_frinta_halfvec(in4f, out4f);
+
// round "to even"
test_frintn(1.5, 2.0);
test_frintn(2.5, 2.0);
test_frintn(2.5F, 2.0F);
test_frintn(-1.5F, -2.0F);
test_frintn(-2.5F, -2.0F);
+
+ test_frintn_fullvec(in1, out1);
+ test_frintn_fullvec(in2, out1);
+ test_frintn_fullvec(in3, out3);
+ test_frintn_fullvec(in4, out3);
+
+ test_frintn_fullvec(in1f, out1f);
+ test_frintn_halfvec(in1f, out1f);
+ test_frintn_fullvec(in2f, out1f);
+ test_frintn_halfvec(in2f, out1f);
+ test_frintn_fullvec(in3f, out3f);
+ test_frintn_halfvec(in3f, out3f);
+ test_frintn_fullvec(in4f, out3f);
+ test_frintn_halfvec(in4f, out3f);
}