0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
/* Mark for insns which produce approximated vector short results. */
+__attribute__((unused))
static V128 ApproxPS = { 0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF,
0x00,0x00,0x80,0xFF, 0x00,0x00,0x80,0xFF };
/* Mark for insns which produce approximated scalar short results. */
+__attribute__((unused))
static V128 ApproxSS = { 0x00,0x00,0x80,0xFF, 0xFF,0xFF,0xFF,0xFF,
0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF };
double mkPosNan ( void ) { return 0.0 / 0.0; }
double mkNegNan ( void ) { return -mkPosNan(); }
+__attribute__((noinline))
+UInt get_mxcsr ( void )
+{
+ ULong w64;
+ __asm__ __volatile__(
+ "subq $8, %%rsp" "\n\t"
+ "stmxcsr (%%rsp)" "\n\t"
+ "movq (%%rsp), %0" "\n"
+ "addq $8, %%rsp"
+ : /*OUT*/"=r"(w64) : /*IN*/ : "memory","cc"
+ );
+ if (0) printf("get %08x\n", (UInt)w64);
+ return (UInt)w64;
+}
+
+__attribute__((noinline))
+void set_mxcsr ( UInt w32 )
+{
+ if (0) printf("set %08x\n", w32);
+ ULong w64 = (ULong)w32;
+ __asm__ __volatile__(
+ "subq $8, %%rsp" "\n\t"
+ "movq %0, (%%rsp)" "\n\t"
+ "ldmxcsr (%%rsp)" "\n\t"
+ "addq $8, %%rsp"
+ : /*OUT*/ : /*IN*/"r"(w64) : "memory",/*"mxcsr",*/"cc"
+ );
+}
+
+UInt get_sse_roundingmode ( void )
+{
+ UInt w = get_mxcsr();
+ return (w >> 13) & 3;
+}
+
+void set_sse_roundingmode ( UInt m )
+{
+ UInt w;
+ assert(0 == (m & ~3));
+ w = get_mxcsr();
+ w &= ~(3 << 13);
+ w |= (m << 13);
+ set_mxcsr(w);
+}
+
#define DO_imm_r_r(_opname, _imm, _src, _dst) \
{ \
}
}
+void do_ROUNDSD_1XX ( Bool mem, V128* src, /*OUT*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "roundsd $4, (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "roundsd $4, %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11","xmm2"
+ );
+ }
+}
void test_ROUNDSD_w_immediate_rounding ( void )
{
}
}
+void test_ROUNDSD_w_mxcsr_rounding ( void )
+{
+ UInt rm;
+ double vals[22];
+ Int i = 0;
+ vals[i++] = 0.0;
+ vals[i++] = -0.0;
+ vals[i++] = mkPosInf();
+ vals[i++] = mkNegInf();
+ vals[i++] = mkPosNan();
+ vals[i++] = mkNegNan();
+ vals[i++] = -1.3;
+ vals[i++] = -1.1;
+ vals[i++] = -0.9;
+ vals[i++] = -0.7;
+ vals[i++] = -0.50001;
+ vals[i++] = -0.49999;
+ vals[i++] = -0.3;
+ vals[i++] = -0.1;
+ vals[i++] = 0.1;
+ vals[i++] = 0.3;
+ vals[i++] = 0.49999;
+ vals[i++] = 0.50001;
+ vals[i++] = 0.7;
+ vals[i++] = 0.9;
+ vals[i++] = 1.1;
+ vals[i++] = 1.3;
+ assert(i == 22);
+
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+
+ for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) {
+ V128 src, dst;
+
+ for (rm = 0; rm <= 3; rm++) {
+ set_sse_roundingmode(rm);
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 8);
+ do_ROUNDSD_1XX(False/*reg*/, &src, &dst);
+ printf("r (rm=%u) roundsd_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %10f %10f", vals[i], *(double*)(&dst[0]));
+ printf("\n");
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 8);
+ do_ROUNDSD_1XX(True/*mem*/, &src, &dst);
+ printf("m (rm=%u) roundsd_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %10f %10f", vals[i], *(double*)(&dst[0]));
+ printf("\n");
+ }
+ }
+
+ rm = get_sse_roundingmode();
+ assert(rm == 3);
+ set_sse_roundingmode(0);
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+}
+
+
/* ------------ ROUNDSS ------------ */
void do_ROUNDSS_000 ( Bool mem, V128* src, /*OUT*/V128* dst )
}
}
+void do_ROUNDSS_1XX ( Bool mem, V128* src, /*OUT*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "roundss $4, (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "roundss $4, %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11","xmm2"
+ );
+ }
+}
void test_ROUNDSS_w_immediate_rounding ( void )
{
}
}
+void test_ROUNDSS_w_mxcsr_rounding ( void )
+{
+ UInt rm;
+ float vals[22];
+ Int i = 0;
+ vals[i++] = 0.0;
+ vals[i++] = -0.0;
+ vals[i++] = mkPosInf();
+ vals[i++] = mkNegInf();
+ vals[i++] = mkPosNan();
+ vals[i++] = mkNegNan();
+ vals[i++] = -1.3;
+ vals[i++] = -1.1;
+ vals[i++] = -0.9;
+ vals[i++] = -0.7;
+ vals[i++] = -0.50001;
+ vals[i++] = -0.49999;
+ vals[i++] = -0.3;
+ vals[i++] = -0.1;
+ vals[i++] = 0.1;
+ vals[i++] = 0.3;
+ vals[i++] = 0.49999;
+ vals[i++] = 0.50001;
+ vals[i++] = 0.7;
+ vals[i++] = 0.9;
+ vals[i++] = 1.1;
+ vals[i++] = 1.3;
+ assert(i == 22);
+
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+
+ for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) {
+ V128 src, dst;
+
+ for (rm = 0; rm <= 3; rm++) {
+ set_sse_roundingmode(rm);
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 4);
+ do_ROUNDSS_1XX(False/*reg*/, &src, &dst);
+ printf("r (rm=%u) roundss_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0]));
+ printf("\n");
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 4);
+ do_ROUNDSS_1XX(True/*mem*/, &src, &dst);
+ printf("m (rm=%u) roundss_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %10f %10f", (double)vals[i], (double)*(float*)(&dst[0]));
+ printf("\n");
+ }
+ }
+
+ rm = get_sse_roundingmode();
+ assert(rm == 3);
+ set_sse_roundingmode(0);
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+}
+
/* ------------ ROUNDPD ------------ */
void do_ROUNDPD_000 ( Bool mem, V128* src, /*OUT*/V128* dst )
}
}
+void do_ROUNDPD_1XX ( Bool mem, V128* src, /*OUT*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "roundpd $4, (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "roundpd $4, %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11","xmm2"
+ );
+ }
+}
void test_ROUNDPD_w_immediate_rounding ( void )
{
}
}
+void test_ROUNDPD_w_mxcsr_rounding ( void )
+{
+ UInt rm;
+ double vals[22];
+ Int i = 0;
+ vals[i++] = 0.0;
+ vals[i++] = -0.0;
+ vals[i++] = mkPosInf();
+ vals[i++] = mkNegInf();
+ vals[i++] = mkPosNan();
+ vals[i++] = mkNegNan();
+ vals[i++] = -1.3;
+ vals[i++] = -1.1;
+ vals[i++] = -0.9;
+ vals[i++] = -0.7;
+ vals[i++] = -0.50001;
+ vals[i++] = -0.49999;
+ vals[i++] = -0.3;
+ vals[i++] = -0.1;
+ vals[i++] = 0.1;
+ vals[i++] = 0.3;
+ vals[i++] = 0.49999;
+ vals[i++] = 0.50001;
+ vals[i++] = 0.7;
+ vals[i++] = 0.9;
+ vals[i++] = 1.1;
+ vals[i++] = 1.3;
+ assert(i == 22);
+
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+
+ for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) {
+ V128 src, dst;
+
+ for (rm = 0; rm <= 3; rm++) {
+ set_sse_roundingmode(rm);
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 8);
+ memcpy(&src[8], &vals[(i+11)%22], 8);
+ do_ROUNDPD_1XX(False/*reg*/, &src, &dst);
+ printf("r (rm=%u) roundpd_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %10f -> %10f", vals[i], *(double*)(&dst[0]));
+ printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8]));
+ printf("\n");
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 8);
+ memcpy(&src[8], &vals[(i+11)%22], 8);
+ do_ROUNDPD_1XX(True/*mem*/, &src, &dst);
+ printf("m (rm=%u) roundpd_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %10f -> %10f", vals[i], *(double*)(&dst[0]));
+ printf(" %10f -> %10f", vals[(i+11)%22], *(double*)(&dst[8]));
+ printf("\n");
+ }
+ }
+
+ rm = get_sse_roundingmode();
+ assert(rm == 3);
+ set_sse_roundingmode(0);
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+}
+
/* ------------ ROUNDPS ------------ */
void do_ROUNDPS_000 ( Bool mem, V128* src, /*OUT*/V128* dst )
}
}
+void do_ROUNDPS_1XX ( Bool mem, V128* src, /*OUT*/V128* dst )
+{
+ if (mem) {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "roundps $4, (%0), %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11"
+ );
+ } else {
+ __asm__ __volatile__(
+ "movupd (%1), %%xmm11" "\n\t"
+ "movupd (%0), %%xmm2" "\n\t"
+ "roundps $4, %%xmm2, %%xmm11" "\n\t"
+ "movupd %%xmm11, (%1)" "\n"
+ : /*OUT*/
+ : /*IN*/ "r"(src), "r"(dst)
+ : /*TRASH*/ "xmm11","xmm2"
+ );
+ }
+}
void test_ROUNDPS_w_immediate_rounding ( void )
{
}
}
+void test_ROUNDPS_w_mxcsr_rounding ( void )
+{
+ UInt rm;
+ float vals[22];
+ Int i = 0;
+ vals[i++] = 0.0;
+ vals[i++] = -0.0;
+ vals[i++] = mkPosInf();
+ vals[i++] = mkNegInf();
+ vals[i++] = mkPosNan();
+ vals[i++] = mkNegNan();
+ vals[i++] = -1.3;
+ vals[i++] = -1.1;
+ vals[i++] = -0.9;
+ vals[i++] = -0.7;
+ vals[i++] = -0.50001;
+ vals[i++] = -0.49999;
+ vals[i++] = -0.3;
+ vals[i++] = -0.1;
+ vals[i++] = 0.1;
+ vals[i++] = 0.3;
+ vals[i++] = 0.49999;
+ vals[i++] = 0.50001;
+ vals[i++] = 0.7;
+ vals[i++] = 0.9;
+ vals[i++] = 1.1;
+ vals[i++] = 1.3;
+ assert(i == 22);
+
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+
+ for (i = 0; i < sizeof(vals)/sizeof(vals[0]); i++) {
+ V128 src, dst;
+
+ for (rm = 0; rm <= 3; rm++) {
+ set_sse_roundingmode(rm);
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 4);
+ memcpy(&src[4], &vals[(i+5)%22], 4);
+ memcpy(&src[8], &vals[(i+11)%22], 4);
+ memcpy(&src[12], &vals[(i+17)%22], 4);
+ do_ROUNDPS_1XX(False/*reg*/, &src, &dst);
+ printf("r (rm=%u) roundps_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0]));
+ printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4]));
+ printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8]));
+ printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12]));
+ printf("\n");
+
+ randV128(&src);
+ randV128(&dst);
+ memcpy(&src[0], &vals[i], 4);
+ memcpy(&src[4], &vals[(i+5)%22], 4);
+ memcpy(&src[8], &vals[(i+11)%22], 4);
+ memcpy(&src[12], &vals[(i+17)%22], 4);
+ do_ROUNDPS_1XX(True/*mem*/, &src, &dst);
+ printf("m (rm=%u) roundps_1XX ", rm);
+ showV128(&src);
+ printf(" ");
+ showV128(&dst);
+ printf(" %9f:%9f", vals[i], (double)*(float*)(&dst[0]));
+ printf(" %9f:%9f", vals[(i+5)%22], (double)*(float*)(&dst[4]));
+ printf(" %9f:%9f", vals[(i+11)%22], (double)*(float*)(&dst[8]));
+ printf(" %9f:%9f", vals[(i+17)%22], (double)*(float*)(&dst[12]));
+ printf("\n");
+ }
+ }
+
+ rm = get_sse_roundingmode();
+ assert(rm == 3);
+ set_sse_roundingmode(0);
+ rm = get_sse_roundingmode();
+ assert(rm == 0); // 0 == RN == default
+}
+
/* ------------ PTEST ------------ */
void test_PTEST ( void )
test_ROUNDSS_w_immediate_rounding();
test_ROUNDPD_w_immediate_rounding();
test_ROUNDPS_w_immediate_rounding();
+ test_ROUNDSD_w_mxcsr_rounding();
+ test_ROUNDSS_w_mxcsr_rounding();
+ test_ROUNDPD_w_mxcsr_rounding();
+ test_ROUNDPS_w_mxcsr_rounding();
// ------ SSE 4.2 ------
test_PCMPGTQ();
#else