From: Carl Love Date: Mon, 21 Sep 2020 20:56:22 +0000 (-0500) Subject: valgrind isa 3.1 foundation X-Git-Tag: VALGRIND_3_17_0~155 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=2a88a98f5b69ac7cdd06e682b2158fd8a31399c9;p=thirdparty%2Fvalgrind.git valgrind isa 3.1 foundation header files and other common parts associated with the initial isa v3.1 support --- diff --git a/configure.ac b/configure.ac index 1a89d05e5e..085c98993e 100755 --- a/configure.ac +++ b/configure.ac @@ -1455,7 +1455,9 @@ AC_HWCAP_CONTAINS_FLAG([arch_2_05],[HWCAP_HAS_ISA_2_05]) AC_HWCAP_CONTAINS_FLAG([arch_2_06],[HWCAP_HAS_ISA_2_06]) AC_HWCAP_CONTAINS_FLAG([arch_2_07],[HWCAP_HAS_ISA_2_07]) AC_HWCAP_CONTAINS_FLAG([arch_3_00],[HWCAP_HAS_ISA_3_00]) +AC_HWCAP_CONTAINS_FLAG([arch_3_01],[HWCAP_HAS_ISA_3_1]) AC_HWCAP_CONTAINS_FLAG([htm],[HWCAP_HAS_HTM]) +AC_HWCAP_CONTAINS_FLAG([mma],[HWCAP_HAS_MMA]) # ISA Levels AM_CONDITIONAL(HAS_ISA_2_05, [test x$HWCAP_HAS_ISA_2_05 = xyes]) @@ -1624,7 +1626,7 @@ AM_CONDITIONAL(SUPPORTS_HTM, test x$ac_compiler_supports_htm = xyes \ -a x$ac_compiler_sees_htm_builtins = xyes \ -a x$HWCAP_HAS_HTM = xyes ) -# isa 3.0 checking +# isa 3.0 checking. (actually 3.0 or newer) AC_MSG_CHECKING([that assembler knows ISA 3.00 ]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ @@ -1638,9 +1640,27 @@ ac_asm_have_isa_3_00=no AC_MSG_RESULT([no]) ]) +# isa 3.01 checking +AC_MSG_CHECKING([that assembler knows ISA 3.1 ]) + +AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ +]], [[ + __asm__ __volatile__("brh 1,2 "); +]])], [ +ac_asm_have_isa_3_1=yes +AC_MSG_RESULT([yes]) +], [ +ac_asm_have_isa_3_1=no +AC_MSG_RESULT([no]) +]) + + AM_CONDITIONAL(HAS_ISA_3_00, [test x$ac_asm_have_isa_3_00 = xyes \ -a x$HWCAP_HAS_ISA_3_00 = xyes]) +AM_CONDITIONAL(HAS_ISA_3_1, [test x$ac_asm_have_isa_3_1 = xyes \ + -a x$HWCAP_HAS_ISA_3_1 = xyes]) + # Check for pthread_create@GLIBC2.0 AC_MSG_CHECKING([for pthread_create@GLIBC2.0()]) diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am index 9bc0d0a764..a43e527fe6 100644 --- a/none/tests/ppc64/Makefile.am +++ b/none/tests/ppc64/Makefile.am @@ -3,7 +3,7 @@ include $(top_srcdir)/Makefile.tool-tests.am dist_noinst_SCRIPTS = filter_stderr -noinst_HEADERS = ppc64_helpers.h +noinst_HEADERS = ppc64_helpers.h isa_3_1_helpers.h EXTRA_DIST = \ jm-int.stderr.exp jm-int.stdout.exp jm-int.vgtest jm-int.stdout.exp-LE \ @@ -51,6 +51,11 @@ EXTRA_DIST = \ test_isa_3_0_other.stdout.exp-LE test_isa_3_0_other.vgtest \ subnormal_test.stderr.exp subnormal_test.stdout.exp \ subnormal_test.vgtest +# test_isa_3_1_RT.vgtest test_isa_3_1_RT.stderr.exp test_isa_3_1_RT.stdout.exp +# test_isa_3_1_XT.vgtest test_isa_3_1_XT.stderr.exp test_isa_3_1_XT.stdout.exp +# test_isa_3_1_VRT.vgtest test_isa_3_1_VRT.stderr.exp test_isa_3_1_VRT.stdout.exp +# test_isa_3_1_Misc.vgtest test_isa_3_1_Misc.stderr.exp test_isa_3_1_Misc.stdout.exp +# test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp check_PROGRAMS = \ allexec \ @@ -63,7 +68,8 @@ check_PROGRAMS = \ test_tm test_touch_tm ldst_multiple data-cache-instructions \ power6_mf_gpr std_reg_imm \ twi_tdi tw_td power6_bcmp - +# test_isa_3_1_RT test_isa_3_1_XT +# test_isa_3_1_Misc test_isa_3_1_VRT test_isa_3_1_AT AM_CFLAGS += @FLAG_M64@ AM_CXXFLAGS += @FLAG_M64@ @@ -71,6 +77,12 @@ AM_CCASFLAGS += @FLAG_M64@ allexec_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_NONNULL@ +#test_isa_3_1_XT_SOURCES = test_isa_3_1_XT.c test_isa_3_1_common.c +#test_isa_3_1_RT_SOURCES = test_isa_3_1_RT.c test_isa_3_1_common.c +#test_isa_3_1_VRT_SOURCES = test_isa_3_1_VRT.c test_isa_3_1_common.c +#test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c +#test_isa_3_1_Misc_SOURCES = test_isa_3_1_Misc.c test_isa_3_1_common.c + if HAS_ALTIVEC BUILD_FLAG_ALTIVEC = -maltivec ALTIVEC_FLAG = -DHAS_ALTIVEC @@ -128,6 +140,14 @@ BUILD_FLAGS_ISA_3_00 = ISA_3_00_FLAG = endif +if HAS_ISA_3_1 +BUILD_FLAGS_ISA_3_1 = -mcpu=power10 +ISA_3_1_FLAG = -DHAS_ISA_3_1 +else +BUILD_FLAGS_ISA_3_1 = +ISA_3_1_FLAG = +endif + test_isa_2_06_part1_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) \ @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) @@ -161,6 +181,9 @@ test_touch_tm_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(HTM_FLAG) test_isa_3_0_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(HTM_FLAG) $(ISA_3_00_FLAG) \ @FLAG_M64@ $(BUILD_FLAGS_ISA_3_00) +test_isa_3_1_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(ISA_3_1_FLAG) \ + @FLAG_M64@ $(BUILD_FLAGS_ISA_3_1) + subnormal_test_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) $(ISA_2_06_FLAG) \ @FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) $(BUILD_FLAGS_ISA_2_06) diff --git a/none/tests/ppc64/isa_3_1_helpers.h b/none/tests/ppc64/isa_3_1_helpers.h new file mode 100644 index 0000000000..dfc0422cbc --- /dev/null +++ b/none/tests/ppc64/isa_3_1_helpers.h @@ -0,0 +1,112 @@ +/* isa_3_1_helpers.h */ + +#include "isa_3_1_register_defines.h" + +extern unsigned long a_iters,b_iters,c_iters, m_iters; +extern unsigned long vrai,vrbi,vrci,vrmi; +extern unsigned long a_inc, b_inc, c_inc, m_inc; +extern unsigned long a_limit,b_limit,c_limit; +extern vector unsigned long long vrt, vra, vrb, vrc; +extern vector unsigned long long vrm; +extern vector unsigned long long vec_xa; +extern vector unsigned long long vec_xb; +extern vector unsigned long long vec_xc; +extern vector unsigned long long vec_xs; +extern vector unsigned long long vec_xt; +extern unsigned long long dcmx; + +extern unsigned long current_cr; +extern unsigned long current_fpscr; + +typedef void (*test_func_t) (void); +struct test_list_t { + test_func_t func; + const char *name; + const char *form; + unsigned long mask; /* holds SP or DP indicators. */ +}; +typedef struct test_list_t test_list_t; +extern struct test_list_t current_test; +typedef void (*test_group_t) (const char *name, test_func_t func, + unsigned int unused, char * cur_form); + +/* Misc options for debug. */ +/* setup_only indicates to do all of the register initializations, + but skip the instruction test. */ +extern unsigned long setup_only; +extern int verbose; +extern unsigned long prefix_override; +extern unsigned long vrm_override; +extern unsigned long mc_override; +extern unsigned long enable_setjmp; +extern unsigned long dump_tables; +extern void debug_show_form(const char *, char *); +extern void debug_show_current_iteration(); +extern void debug_dump_buffer(); + +extern void identify_form_components(const char *, const char *); +extern void dump_vsxargs(); +extern void generic_prologue(); +extern void build_args_table(); +extern void build_vsx_table(); +extern void print_register_header(); +extern void print_register_footer(); +extern void debug_show_iter_ranges(); +extern void print_result_buffer(); +extern void dump_float_vsx_tables(); +extern void build_float_vsx_tables(); +extern void initialize_target_registers(); +extern void initialize_source_registers(); +extern void set_up_iterators(); +extern void initialize_buffer(int); + +extern int verbose; +#define debug_printf(X) if (verbose>0) printf(X); +#define debug_show_labels (verbose>0) +#define debug_show_iters (verbose>1) +#define debug_show_raw_values (verbose>2) +#define debug_show_all_regs (verbose>5) +#define debug_show_tables (verbose>6) + + +#define CHECK_OVERRIDES { \ + if (vrm_override && vrmi > 0) continue; \ + if (prefix_override && strncmp("p", instruction_name, 1) == 0) { \ + if (verbose) printf("Skipping prefix insn test %s\n",instruction_name); \ + continue; \ + } \ +} + +/* CR helpers. */ + +#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7" + +#define SET_CR(_arg) \ + __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR ); + +#define SET_CR0_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x80,%0 " : : "b" (_arg):"cr0"); +#define SET_CR1_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x40,%0 " : : "b" (_arg):"cr1"); +#define SET_CR2_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x20,%0 " : : "b" (_arg):"cr2"); +#define SET_CR3_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x10,%0 " : : "b" (_arg):"cr3"); +#define SET_CR4_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x08,%0 " : : "r" (_arg):"cr4"); +#define SET_CR5_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x04,%0 " : : "r" (_arg):"cr5"); +#define SET_CR6_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x02,%0 " : : "r" (_arg):"cr6"); +#define SET_CR7_FIELD(_arg) __asm__ __volatile__ ("mtocrf 0x01,%0 " : : "r" (_arg):"cr7"); + +#define SET_XER(_arg) __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" ); +#define GET_CR(_lval) __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) ) +#define GET_XER(_lval) __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) ) +#define SET_CR_ZERO SET_CR(0) + +/* ************** */ +/* FPSCR helpers. */ +#define SET_FPSCR_ZERO \ + do { \ + double _d = 0.0; \ + __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \ + } while (0); + +#define GET_FPSCR(_arg) \ + __asm__ __volatile__ ("mffs %0" : "=f"(_arg) ); + + diff --git a/none/tests/ppc64/isa_3_1_register_defines.h b/none/tests/ppc64/isa_3_1_register_defines.h new file mode 100644 index 0000000000..e4d021e5c0 --- /dev/null +++ b/none/tests/ppc64/isa_3_1_register_defines.h @@ -0,0 +1,50 @@ +/* register definitions used in tests for isa_3_1. */ + +/* ACC / Accumulator. + An ACC is associated with a set of four VSR registers. + Each ACC contains four 128-bit rows. + Each row of each ACC is aliased to a specific VSR in the following manner. + ACC[0][0] == VSR[0]; ACC[0][1] == VSR[1]; ACC[0][2] == VSR[2]; ACC[0][3] == VSR[3] + ... + ACC[7][0] == VSR[28]; ACC[7][0] == VSR[28]; ACC[7][0] == VSR[28]; ACC[7][0] == VSR[28] +*/ +#define ACCNUM 4 +register vector long long TEST_ACC0 __asm__ ("vs16"); +register vector long long TEST_ACC1 __asm__ ("vs17"); +register vector long long TEST_ACC2 __asm__ ("vs18"); +register vector long long TEST_ACC3 __asm__ ("vs19"); + +/* XSp and XTp use the same register pair, defined here as 20 and 21. + { also XSp,XTp in scripts } */ +register vector long long XTp0 __asm__ ("vs20"); // XTp[0];XSp[0]; +register vector long long XTp1 __asm__ ("vs21"); // XTp[1];XSp[1]; + +// xa,xb,xc references are mapped to a specific vector register. +// out of order to allow xap mapped over xa and xc. +register vector long long xa __asm__ ("vs22"); // also xap. +register vector long long xc __asm__ ("vs23"); // also 2nd half of xap. +register vector long long xb __asm__ ("vs24"); +register vector long long xt __asm__ ("vs25"); + +/* frs,frb (variable named frsb) both use the same register pair. + (top half of vs26,vs27) */ +register double frsb __asm__ ("fr26"); +register double frsbp __asm__ ("fr27"); +/* frt,frtp register pair. (top half of vs28,vs29) */ +register double frt __asm__ ("vs28"); +register double frtp __asm__ ("vs29"); + +register uint64_t ra __asm__ ("r20"); +register uint64_t rb __asm__ ("r21"); +register uint64_t rc __asm__ ("r22"); +register uint64_t rs __asm__ ("r24"); /* rsp part 1 */ +register uint64_t rsp __asm__ ("r25"); /* rsp part 2 */ +register uint64_t rt __asm__ ("r26"); /* rtp part 1 */ +register uint64_t rtp __asm__ ("r27"); /* rtp part 2 */ + +extern unsigned long long vsrd; +extern unsigned long get_vsrhd_vs26(); +extern unsigned long get_vsrhd_vs27(); +extern unsigned long get_vsrhd_vs28(); +extern unsigned long get_vsrhd_vs29(); + diff --git a/none/tests/ppc64/test_isa_3_1_AT.vgtest b/none/tests/ppc64/test_isa_3_1_AT.vgtest new file mode 100644 index 0000000000..e39ffd22a5 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_AT.vgtest @@ -0,0 +1,3 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prereq: ../../../tests/check_ppc64_auxv_cap mma +prog: test_isa_3_1_AT diff --git a/none/tests/ppc64/test_isa_3_1_Misc.vgtest b/none/tests/ppc64/test_isa_3_1_Misc.vgtest new file mode 100644 index 0000000000..3802934054 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_Misc.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_Misc diff --git a/none/tests/ppc64/test_isa_3_1_RT.vgtest b/none/tests/ppc64/test_isa_3_1_RT.vgtest new file mode 100644 index 0000000000..5aad9de09f --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_RT.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_RT diff --git a/none/tests/ppc64/test_isa_3_1_VRT.vgtest b/none/tests/ppc64/test_isa_3_1_VRT.vgtest new file mode 100644 index 0000000000..96d57b2c28 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_VRT.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_VRT diff --git a/none/tests/ppc64/test_isa_3_1_XT.vgtest b/none/tests/ppc64/test_isa_3_1_XT.vgtest new file mode 100644 index 0000000000..cc717eab14 --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_XT.vgtest @@ -0,0 +1,2 @@ +prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1 +prog: test_isa_3_1_XT diff --git a/none/tests/ppc64/test_isa_3_1_common.c b/none/tests/ppc64/test_isa_3_1_common.c new file mode 100644 index 0000000000..585fd0c90f --- /dev/null +++ b/none/tests/ppc64/test_isa_3_1_common.c @@ -0,0 +1,2188 @@ +/* test_isa_3_1_common.c */ + +/* Copyright (C) 2020, IBM + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, see . + + The GNU General Public License is contained in the file COPYING. + */ + +#include +#include +#include +#include +#include + +#include "isa_3_1_register_defines.h" +#include "isa_3_1_helpers.h" +#include "tests/malloc.h" // memalign + +/* post_test indicates to the printf helpers if we are pre- or post- + instruction execution, subsequently used to suppress register + output when those register contents are not useful. */ +unsigned long post_test; +/* increase verbosity for increasing amounts of debug output. */ +int verbose = 0; +#define DEADBEEF 0x1111111111111111ULL + +vector unsigned long long vec_xa; +vector unsigned long long vec_xb; +vector unsigned long long vec_xc; +vector unsigned long long vec_xs; +vector unsigned long long vec_xt; +unsigned long long dcmx; + +/* Iterator controls. These are adjusted as appropriate for the tests + being exercised. See set_up_iterators () below. +*/ +unsigned long a_iters, b_iters, c_iters, m_iters; +unsigned long a_inc, b_inc, c_inc, m_inc; +unsigned long vrai, vrbi, vrci, vrmi; +unsigned long a_limit = 0xffff, b_limit = 0xffff, c_limit = 0xffff; + +vector unsigned long long vrt, vra, vrb, vrc; +vector unsigned long long vrm; + +/* Debug: Set these to allow skipping of test subsets that + have nonzero vrm or mc values. +*/ +unsigned long prefix_override = 0; +unsigned long vrm_override = 0; +unsigned long mc_override = 0; +unsigned long enable_setjmp = 0; +unsigned long dump_tables = 0; + +/* condition register misc. */ +extern unsigned long current_cr; +extern unsigned long current_fpscr; + +/* Helpers to manage when our output fields require special handling. + This includes scenarios including: + - some parts of the output fields are Undefined. + - some parts of the output field contain *estimated* data that needs to be + truncated when printed. + - Some parts of the output need to be reported as INF or NAN. + - The contents need to be interpreted as single or double precision. +*/ +// Double precision indicators. +#define DP0 0b00100000 +#define DP1 0b00010000 +#define DOUBLE_MASK 0b00110000 +// Single precision indicators. +#define SP0 0b00001000 +#define SP1 0b00000100 +#define SP2 0b00000010 +#define SP3 0b00000001 +#define SINGLE_MASK 0b00001111 +// Estimated output indicators. +#define SINGLE_EST_MASK 0b01000000 +#define DOUBLE_EST_MASK 0b10000000 +// bfloat16 indicators. +#define B16_MASK 0b1111111100000000 +#define B16_0 0b1000000000000000 +#define B16_1 0b0100000000000000 +#define B16_2 0b0010000000000000 +#define B16_3 0b0001000000000000 +#define B16_4 0b0000100000000000 +#define B16_5 0b0000010000000000 +#define B16_6 0b0000001000000000 +#define B16_7 0b0000000100000000 + +/* Instruction Form indicators. + These are set based on the instruction name and the associated + instruction form. These are subsequently used to help initialize + the incoming register contents when testing the specific instruction. +*/ +bool has_ra, has_rb, has_rc, has_rs, has_rt; +bool has_rtp, has_rsp; +bool has_vra, has_vrb, has_vrc, has_vrm, has_vrt; +bool has_xa, has_xb, has_xc, has_xs, has_xt; +bool has_xap; +bool uses_xc_as_blend_mask; +bool has_xsp, has_xtp; +bool has_frb, has_frbp; // frb* uses same regs as frsp. +bool has_frs, has_frsp; +bool has_frt, has_frtp; +bool uses_CRBIT, uses_RC, uses_MC; +bool uses_cr; +bool is_divide_or_modulo; +bool is_insert_double; +bool is_testlsb; +bool has_rs_as_value_source; +bool has_dcmx; +unsigned long is_clear_or_insert_insns; +unsigned long is_mtvsr_insn; +unsigned long is_cmp_insn; +bool has_ra_target; +bool uses_dfp128_input; +bool uses_dfp128_output; +bool uses_acc; // Accumulator related. +bool uses_acc_src; +bool uses_acc_dest; +bool uses_acc_vsrs; +bool uses_buffer; // Buffer related. +bool uses_load_buffer, uses_store_buffer, uses_any_buffer; +bool uses_quad; +unsigned long output_mask; // Output field special handling. +bool instruction_is_sp, instruction_is_sp_estimate; +bool instruction_is_dp, instruction_is_dp_estimate; +bool instruction_is_b16; + +unsigned long long min (unsigned long long a, unsigned long long b) { + if ( a < b ) + return a; + return b; +} + +/* Parse the 'form' field to mark and identify arguments to the instruction. */ +void identify_form_components (const char *instruction_name, + const char *cur_form) +{ + has_ra = ((strstr (cur_form, ",RA") != NULL) || + (strstr (cur_form, "(RA)") != NULL)); + has_ra_target = (strncmp (cur_form, "RA,", 3) == 0); + has_rb = strstr (cur_form, ",RB") != NULL; + has_rc = strstr (cur_form, ",RC") != NULL; + has_rs = ((strstr (cur_form, ",RS") != NULL) || + (strncmp (cur_form, "RS", 2) == 0)); + has_rsp = (strncmp (cur_form, "RSp", 3) == 0); + has_rt = (strncmp (cur_form, "RT", 2) == 0); + has_rtp = (strncmp (cur_form, "RTp", 3) == 0); + + has_vra = strstr (cur_form, "VRA") != NULL; + has_vrb = strstr (cur_form, "VRB") != NULL; + has_vrc = strstr (cur_form, "VRC") != NULL; + has_vrm = strstr (cur_form, "VRM") != NULL; + has_vrt = (strncmp (cur_form, "VRT", 3) == 0); + + has_frb = strstr (cur_form, "FRB") != NULL; + has_frbp = strstr (cur_form, "FRBp") != NULL; + has_frs = strstr (cur_form, "FRS") != NULL; + has_frsp = strstr (cur_form, "FRSp") != NULL; + has_frt = strstr (cur_form, "FRT") != NULL; + has_frtp = strstr (cur_form, "FRTp") != NULL; + + has_xa = strstr (cur_form, ",XA") != NULL; + has_xap = strstr (cur_form, ",XAp") != NULL; + has_xb = strstr (cur_form, ",XB") != NULL; + has_xc = strstr (cur_form, ",XC") != NULL; + has_xs = (strncmp (cur_form, "XS", 2) == 0); + has_xsp = (strncmp (cur_form, "XSp", 3) == 0); + has_xt = (strncmp (cur_form, "XT", 2) == 0); + has_xtp = (strncmp (cur_form, "XTp", 3) == 0); + + uses_acc_src = (strstr (cur_form, "AS") != NULL); + uses_acc_dest = (strstr (cur_form, "AT") != NULL); +/* These (xxm*acc) are special cases where the acc_src is used, but we + need to read the associated _vsrs on the way out. +*/ + uses_acc_vsrs = ( + (strstr (instruction_name, "xxmfacc") != NULL) || + (strstr (instruction_name, "xxmtacc") != NULL) ); + uses_acc = uses_acc_src || uses_acc_dest || uses_acc_vsrs; + + uses_dfp128_input = ( + (strncmp (instruction_name, "dctf", 4) == 0)); + uses_dfp128_output = ( + (strncmp (instruction_name, "dcff", 4) == 0)); + is_divide_or_modulo = ( + (strncmp (instruction_name, "vdiv", 4) == 0) || + (strncmp (instruction_name, "pmvdiv", 6) == 0) || + (strncmp (instruction_name, "vmod", 4) == 0) || + (strncmp (instruction_name, "pmvmod", 6) == 0) ); + is_insert_double = ( + (strncmp (instruction_name, "vinsd", 5) == 0) ); + is_testlsb = ( + (strncmp (instruction_name, "xvtlsbb", 7) == 0) ); + uses_xc_as_blend_mask = ( + (strncmp (instruction_name, "xxblend", 7) == 0) ); + has_dcmx = strstr (cur_form, "DCMX") != NULL; + uses_CRBIT = ( + (strncmp (cur_form, "BF", 2) == 0) || + (strstr (cur_form, ",BI") != 0)); + uses_RC = ( + (strstr (instruction_name, ".") != NULL )); + uses_MC = ( + (strstr (instruction_name, ",MC") != NULL )); + uses_cr = ( + (strstr (instruction_name, "setbcr") != 0) || + (strstr (instruction_name, "setnbcr") != 0)); +/* The lxvkq instruction loads special values into a VSX vector, so although + this looks like a load, it is excluded from the uses_load_buffer set + because it does not load a value from a buffer. */ + uses_load_buffer = ( + (strncmp (instruction_name, "ld", 2) == 0) || + (strncmp (instruction_name, "lq", 2) == 0) || + (strncmp (instruction_name, "plq", 3) == 0) || + (strncmp (instruction_name, "plx", 3) == 0) || + (strncmp (instruction_name, "pmlx", 4) == 0) || + (strncmp (instruction_name, "lxv", 3) == 0) || + ( (strncmp (instruction_name, "lxva", 4) == 0) && + (strncmp (instruction_name, "lxvkq", 5) != 0)) ); + uses_store_buffer = ( + (strncmp (instruction_name, "pmst", 4) == 0) || + (strncmp (instruction_name, "pst", 3) == 0) || + (strncmp (instruction_name, "st", 2) == 0)); + uses_any_buffer = (strstr (cur_form, "(RA)") != NULL); + uses_buffer = uses_any_buffer||uses_load_buffer||uses_store_buffer; + + uses_quad = (uses_buffer && (strstr (instruction_name, "q") != NULL)); + + has_rs_as_value_source = ( + (strcmp (cur_form, "RA,RS,RB") == 0) || + (strcmp (cur_form, "RA,RS") == 0) ); + + is_clear_or_insert_insns = ( + (strncmp (instruction_name, "vclr", 4) == 0) || + (strncmp (instruction_name, "vins", 4) == 0) ); + + /* This is used by a helper function to control the CR field output when + the instruction is a compare, otherwise it is likely a bitfield check. */ + is_cmp_insn = ( (strstr (cur_form, "cmp") != NULL)); + + is_mtvsr_insn = ( (strncmp (instruction_name, "mtvsr", 5) == 0)); + + /* If the instruction output needs to be something other than a hex dump, + a mask will have been defined as part of the test_list_t structure. + This includes instructions that return estimated values, as well as + those that return NAN results which contain sign bits that need to be + filtered out. */ + output_mask = ( current_test.mask ); + instruction_is_dp = ( current_test.mask & DOUBLE_MASK ); + instruction_is_dp_estimate = ( current_test.mask & DOUBLE_EST_MASK ); + instruction_is_sp = ( current_test.mask & SINGLE_MASK ); + instruction_is_sp_estimate = ( current_test.mask & SINGLE_EST_MASK ); + instruction_is_b16 = ( current_test.mask & B16_MASK ); +} + +void display_form_components (char * cur_form) { + printf (" %s\n", cur_form); + printf ("Instruction form elements: "); + if (has_ra) printf ("ra "); + if (has_rb) printf ("rb "); + if (has_rc) printf ("rc "); + if (has_rs) printf ("rs "); + if (has_rsp) printf ("rsp "); + if (has_rt) printf ("rt "); + if (has_rtp) printf ("rtp "); + if (has_vra) printf ("vra "); + if (has_vrb) printf ("vrb "); + if (has_vrc) printf ("vrc "); + if (has_vrm) printf ("vrm "); + if (has_vrt) printf ("vrt "); + + if (has_frb) printf ("frb "); + if (has_frbp) printf ("frbp "); + if (has_frs) printf ("frs "); + if (has_frsp) printf ("frsp "); + if (has_frt) printf ("frt "); + if (has_frtp) printf ("frtp "); + if (has_xa) printf ("xa "); + if (has_xap) printf ("xap "); + if (has_xb) printf ("xb "); + if (has_xc) printf ("xc "); + if (has_xs) printf ("xs "); + if (has_xsp) printf ("xsp "); + if (has_xt) printf ("xt "); + if (has_xtp) printf ("xtp "); + if (uses_acc_src) printf ("AS "); + if (uses_acc_dest) printf ("AT "); + printf ("\n"); + if (uses_dfp128_input) + printf ("uses dfp128 input.\n"); + if (uses_dfp128_output) + printf ("uses dfp128 output.\n"); + if (has_ra_target) + printf ("ra is a target register.\n"); + if (has_rs_as_value_source) + printf ("rs is a value source.\n"); + if (uses_xc_as_blend_mask) + printf ("uses xc as a blend mask.\n"); + if (is_clear_or_insert_insns) + printf ("is a clear or insert insn.\n"); + if (is_insert_double) + printf ("is an insert doubleword.\n"); + if (is_testlsb) + printf ("tests lsb.\n"); + if (uses_buffer) + printf ("uses_buffer: (l:%d s:%d ?:%d)\n", + uses_load_buffer, uses_store_buffer, uses_any_buffer); + if (uses_quad) + printf ("is a quad load or store.\n"); + if (is_cmp_insn) + printf ("is a compare instruction.\n"); + if (uses_CRBIT) + printf ("instruction references a CR.\n"); + if (uses_cr) + printf ("instruction reads CR bits.\n"); + if (uses_MC) + printf ("Instruction uses MC.\n"); + if (uses_RC) + printf ("Instruction uses Record Bit (cr6).\n"); + if (uses_acc) + printf ("Instruction uses ACC: (src:%d, dst:%d, vsrs:%d).\n", + uses_acc_src, uses_acc_dest, uses_acc_vsrs); + if (output_mask) { + printf ("Instruction results are masked: "); + printf (" (%lx) ", output_mask); + printf ("%s ", instruction_is_sp?"SP ":""); + printf ("%s ", instruction_is_sp_estimate?"SP Estimate ":""); + printf ("%s ", instruction_is_dp?"DP ":""); + printf ("%s ", instruction_is_dp_estimate?"DP Estimate ":""); + printf ("%s ", instruction_is_b16?"bfloat16 ":""); + } + printf ("\n"); +} + +long long mask64[] = { 0x0, 0x00000000ffffffff, 0xffffffff55555555, + 0x5555aaaaaaaa5555, 0xaaaa00000000aaaa }; +#define MASK64SIZE 5 +unsigned long long vrm_mask[] = { 0x0, 0x8000000000000000, + 0x8000000000000000, 0x0 }; +#define VRMMASK_SIZE 4 + +// Helpers to print double/float values. +// Union to help handle referencing hex/float/double values. +union rosetta_t { + unsigned long long ull; + unsigned long long ullp[2]; + float flt; + float fltp[2]; + uint16_t uint16s[4]; + double dbl; +}; + +void generic_print_float_as_hex (float f) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.flt = f; + printf (" %016llx", stone.ull); +} + +void generic_print_ull_as_float (unsigned long long ull) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.ull = ull; + printf (" %f", stone.flt); +} + +void generic_print_ull_as_double (unsigned long long ull) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.ull = ull; + printf (" %e", stone.dbl); +} + +void generic_print_double_as_hex (double d) { + union rosetta_t stone; + stone.ullp[0] = stone.ullp[1] = 0; //init + stone.dbl = d; + printf (" %016llx", stone.ull); +} + +// SP in a 32-bit field. +#define SP_SIGNBIT_MASK 0x80000000 +#define SP_EXPONENT_MASK 0x7f800000 +#define SP_FRACTION_MASK 0x007fffff + +// DP (64-bit). +#define DP_SIGNBIT_MASK 0x8000000000000000UL +#define DP_EXPONENT_MASK 0x7ff0000000000000UL +#define DP_FRACTION_MASK 0x000fffffffffffffUL + +// B16 bfloat16. +#define BF16_SIGNBIT_MASK 0x8000 +#define BF16_EXPONENT_MASK 0x7f80 +#define BF16_FRACTION_MASK 0x007f + +/* + - NAN and Zero values need the sign bit display suppressed. (See comments + in jm-insns.c, approx line 7203). + - Some instructions return estimated values, which are calculated + to a different level of precision within valgrind. Those + instructions need their outputs limited to a specific number of + digits as seen below. */ + +// NAN - Maximum biased exponent and a nonzero mantissa (fraction). +#define PRINT_SP_NAN printf (" NaN"); +// DEN - Exp == 0 and Frac != 0 +#define PRINT_SP_PLUS_DEN printf (" +Den"); +#define PRINT_SP_MINUS_DEN printf (" -Den"); +// INF - Maximum biased exponent and a zero mantissa. +#define PRINT_SP_INF printf (" Inf"); +#define PRINT_SP_PLUS_INF printf (" +Inf"); +#define PRINT_SP_MINUS_INF printf (" -Inf"); +#define PRINT_SP_FLOAT(x) printf ("%13.05e", x); +#define PRINT_SP_FLOAT_EST(x) printf ("%13.03e", x); +#define PRINT_SP_FLOAT_PLUS_ZERO printf (" +Zero"); +#define PRINT_SP_FLOAT_MINUS_ZERO printf (" -Zero"); + +/* Print a SINGLE (16 bit) SP value out of the left part of a 32-bit field. */ +void special_print_sp_value (uint32_t value) { + int signbit; + int exponent; + unsigned long long fraction; + union rosetta_t stone; + + stone.ull = value; + signbit = value & SP_SIGNBIT_MASK; + exponent = (value & SP_EXPONENT_MASK); + fraction = value & SP_FRACTION_MASK; + + if (debug_show_raw_values) { + printf ("\nsp_debug: v:%08x s: %d %3x %8llx %f , ", + value, signbit?1:0, exponent, fraction, stone.flt); + } + if (exponent == SP_EXPONENT_MASK && fraction == 0 ) { + if (signbit) + PRINT_SP_MINUS_INF + else + PRINT_SP_PLUS_INF + } else if (exponent == SP_EXPONENT_MASK && fraction != 0 ) { + PRINT_SP_NAN + } else if (exponent == 0 && fraction == 0 ) { + if (signbit) + PRINT_SP_FLOAT_MINUS_ZERO + else + PRINT_SP_FLOAT_PLUS_ZERO + } else if (exponent == 0 && fraction != 0 ) { + if (signbit) + PRINT_SP_MINUS_DEN + else + PRINT_SP_PLUS_DEN + } else if (instruction_is_sp_estimate) { + PRINT_SP_FLOAT_EST (stone.flt); + } else { + PRINT_SP_FLOAT (stone.flt); + } +} + +void dissect_sp_value (unsigned long long foo) { + if (debug_show_raw_values) { + printf ("RAW sp::%4llx ", foo); + printf (" [s:"); + printf ("%x", (foo & SP_SIGNBIT_MASK)>0); + printf (" e:"); + printf ("%4llx", foo & SP_EXPONENT_MASK); + printf (" f:"); + printf ("%4llx", foo & SP_FRACTION_MASK); + printf ("] "); + } + special_print_sp_value (foo); + printf (" "); +} + +/* Print one DP values out of our vec_ field. */ +#define PRINT_DP_NAN printf (" NaN"); +#define PRINT_DP_MINUS_DEN printf (" -Den"); +#define PRINT_DP_PLUS_DEN printf (" +Den"); +#define PRINT_DP_MINUS_INF printf (" -Inf"); +#define PRINT_DP_PLUS_INF printf (" +InF"); +#define PRINT_DP_FLOAT(x) printf (" %15.08e", x); +#define PRINT_DP_FLOAT_EST(x) printf (" %15.02e", x); +#define PRINT_DP_FLOAT_PLUS_ZERO printf (" +Zero"); +#define PRINT_DP_FLOAT_MINUS_ZERO printf (" -Zero"); +#define PRINT_DP_FLOAT_ZERO printf (" 0.000000e+000"); +void special_print_dp_value (unsigned long long value) { + unsigned long long signbit; + unsigned long long exponent; + unsigned long long fraction; + union rosetta_t stone; + + stone.ull = value; + signbit = (value & DP_SIGNBIT_MASK) > 0; + exponent = value & DP_EXPONENT_MASK; // >> double_exponent_shift; + fraction = value & DP_FRACTION_MASK; + if (verbose>2) + printf ("\ndb_debug: %16llx s:%d %3llx %8llx %llx , ", + value, signbit?1:0, exponent, fraction, stone.ull); + if (exponent == DP_EXPONENT_MASK /* MAX */ && fraction == 0 ) { + if (signbit) + PRINT_DP_MINUS_INF + else + PRINT_DP_PLUS_INF + } else if (exponent == DP_EXPONENT_MASK && fraction != 0 ) { + PRINT_DP_NAN + } else if (exponent == 0 && fraction == 0 ) { + if (signbit) + PRINT_DP_FLOAT_MINUS_ZERO + else + PRINT_DP_FLOAT_PLUS_ZERO + } else if (exponent == 0 && fraction != 0 ) { + if (signbit) + PRINT_DP_MINUS_DEN + else + PRINT_DP_PLUS_DEN + } else if (instruction_is_dp_estimate) { + PRINT_DP_FLOAT_EST (stone.dbl); + } else { + PRINT_DP_FLOAT (stone.dbl); + } +} + +void dissect_dp_value (unsigned long long foo) { + if (debug_show_raw_values) { + printf ("RAW dp::%llx", (foo)); + printf (" [sign:"); + printf ("%x ", (foo & DP_SIGNBIT_MASK) > 0); + printf (" expbits:"); + printf ("%3llx", foo & DP_EXPONENT_MASK ); + printf (" frac:"); + printf ("%16llx", foo & DP_FRACTION_MASK); + printf ("] "); + } + special_print_dp_value (foo); + printf (" "); +} + +// NAN - Maximum biased exponent and a nonzero mantissa (fraction). +#define PRINT_BF16_NAN printf (" NaN"); +// DEN - Exp == 0 and Frac != 0 +#define PRINT_BF16_PLUS_DEN printf (" +Den"); +#define PRINT_BF16_MINUS_DEN printf (" -Den"); +// INF - Maximum biased exponent and a zero mantissa. +#define PRINT_BF16_INF printf (" Inf"); +#define PRINT_BF16_PLUS_INF printf (" +Inf"); +#define PRINT_BF16_MINUS_INF printf (" -Inf"); +#define PRINT_BF16_FLOAT(x) printf (" 0x%04x", x); +#define PRINT_BF16_FLOAT_PLUS_ZERO printf (" +Zero"); +#define PRINT_BF16_FLOAT_MINUS_ZERO printf (" -Zero"); +/* print a single bfloat16 value. */ +void special_print_bf16_value (uint16_t value) { + int signbit; + int exponent; + unsigned long long fraction; + union rosetta_t stone; + signbit = value & BF16_SIGNBIT_MASK; + exponent = (value & BF16_EXPONENT_MASK); + fraction = (value & BF16_FRACTION_MASK); + stone.ull = value; + if (debug_show_raw_values) { + printf ("\nbf16_debug: v:%08x s: %d %3x %8llx %f , ", + value, signbit?1:0, exponent, fraction, stone.flt); + } else if (verbose > 0) { + printf (" v:%08x", value); + } + if (exponent == BF16_EXPONENT_MASK && fraction == 0 ) { + if (signbit) + PRINT_BF16_MINUS_INF + else + PRINT_BF16_PLUS_INF + } else if (exponent == BF16_EXPONENT_MASK && fraction != 0 ) { + PRINT_BF16_NAN + } else if (exponent == 0 && fraction == 0 ) { + if (signbit) + PRINT_BF16_FLOAT_MINUS_ZERO + else + PRINT_BF16_FLOAT_PLUS_ZERO + } else if (exponent == 0 && fraction != 0 ) { + if (signbit) + PRINT_BF16_MINUS_DEN + else + PRINT_BF16_PLUS_DEN + } else + PRINT_BF16_FLOAT (value); +} + +/* ******************** */ +/* Accumulator related. */ +/* Note that our tests to set and clear the acc both read and write + from and to the associated VSRs, so some tests may be + self-fulfilling. */ +void push_vsrs_to_acc () { + if (!setup_only) + __asm__ __volatile__ ("xxmtacc 4 "); // $ACCNUM +} + +void push_acc_to_vsrs () { + if (!setup_only) + __asm__ __volatile__ ("xxmfacc 4 "); // $ACCNUM +} + + +void __print_splat_or_sp(long long vv) { + if (vv == DEADBEEF) + printf (" * "); + else { + special_print_sp_value (0xffffffff & (vv>>32)); + special_print_sp_value (0xffffffff & (vv)); + } +} + +void __print_splat_or_dp(long long vv) { + if (vv == DEADBEEF) + printf (" * "); + else { + special_print_dp_value (vv); + } +} + +void __print_splat_or_raw(long long vv) { + if (vv == DEADBEEF) + printf (" * "); + else + printf ("%llx ", vv); +} + +void print_accumulator () { + if (uses_acc || debug_show_all_regs) { + push_acc_to_vsrs (); + if (debug_show_labels) printf (" Acc[]:"); + if (instruction_is_sp) { + printf (" ("); + __print_splat_or_sp(TEST_ACC0[0]); + __print_splat_or_sp(TEST_ACC0[1]); + __print_splat_or_sp(TEST_ACC1[0]); + __print_splat_or_sp(TEST_ACC1[1]); + __print_splat_or_sp(TEST_ACC2[0]); + __print_splat_or_sp(TEST_ACC2[1]); + __print_splat_or_sp(TEST_ACC3[0]); + __print_splat_or_sp(TEST_ACC3[1]); + printf (")"); + } else if (instruction_is_dp) { + printf (" {"); + __print_splat_or_dp(TEST_ACC0[0]); + __print_splat_or_sp(TEST_ACC0[1]); + __print_splat_or_dp(TEST_ACC1[0]); + __print_splat_or_sp(TEST_ACC1[1]); + __print_splat_or_dp(TEST_ACC2[0]); + __print_splat_or_sp(TEST_ACC2[1]); + __print_splat_or_dp(TEST_ACC3[0]); + __print_splat_or_sp(TEST_ACC3[1]); + printf ("}"); + } else { + printf (" ["); + __print_splat_or_raw(TEST_ACC0[0]); + __print_splat_or_raw(TEST_ACC0[1]); + __print_splat_or_raw(TEST_ACC1[0]); + __print_splat_or_raw(TEST_ACC1[1]); + __print_splat_or_raw(TEST_ACC2[0]); + __print_splat_or_raw(TEST_ACC2[1]); + __print_splat_or_raw(TEST_ACC3[0]); + __print_splat_or_raw(TEST_ACC3[1]); + printf ("]"); + } + } +} + + +/* ************** */ +/* The bit definitions for the FPSCR are as follows. +Bit (s) Description +0:31 Reserved +32 Floating-Point Exception Summary (FX) +33 Floating-Point Enabled Exception Summary (FEX) +34 Floating-Point Invalid Operation Exception Summary (VX) +35 Floating-Point Overflow Exception (OX) +36 Floating-Point Underflow Exception (UX) +37 Floating-Point Zero Divide Exception (ZX) +38 Floating-Point Inexact Exception (XX) +39 Floating-Point Invalid Operation Exception (SNaN) (VXSNAN) +40 Floating-Point Invalid Operation Exception (∞ - ∞) (VXISI) +41 Floating-Point Invalid Operation Exception (∞ ÷ ∞) (VXIDI) +42 Floating-Point Invalid Operation Exception (0 ÷ 0) (VXZDZ) +43 Floating-Point Invalid Operation Exception (∞ × 0) (VXIMZ) +44 Floating-Point Invalid Operation Exception (Invalid Compare) (VXVC) +45 Floating-Point Fraction Rounded (FR) +46 Floating-Point Fraction Inexact (FI) +47:51 Floating-Point Result Flags (FPRF) +47 Floating-Point Result Class Descriptor (C) +48:51 Floating-Point Condition Code (FPCC) + 48 Floating-Point Less Than or Negative (FL or <) + 49 Floating-Point Greater Than or Positive (FG or >) + 50 Floating-Point Equal or Zero (FE or = ) + 51 Floating-Point Unordered or NaN (FU or ?) +52 Reserved +53 Floating-Point Invalid Operation Exception (Software-Defined Condition) (VXSOFT) +54 Floating-Point Invalid Operation Exception (Invalid Square Root) (VXSQRT) +55 Floating-Point Invalid Operation Exception (Invalid Integer Convert) (VXCVI) +56 Floating-Point Invalid Operation Exception Enable (VE) +57 Floating-Point Overflow Exception Enable (OE) +58 Floating-Point Underflow Exception Enable (UE) +59 Floating-Point Zero Divide Exception Enable (ZE) +60 Floating-Point Inexact Exception Enable (XE) +61 Floating-Point Non-IEEE Mode (NI) +62:63 Floating-Point Rounding Control (RN) + 00 Round to Nearest + 01 Round toward Zero + 10 Round toward +Infinity + 11 Round toward -Infinity +*/ +/* Valgrind currently tracks the rounding mode, C and FPCC fields + of the FPSCR. Additional checking in the testcase is not + necessary or beneficial. */ + +#define FPCC_C_BIT (0x1 << (63-47)) +#define FPCC_FL_BIT (0x1 << (63-48)) +#define FPCC_FG_BIT (0x1 << (63-49)) +#define FPCC_FE_BIT (0x1 << (63-50)) +#define FPCC_FU_BIT (0x1 << (63-51)) +#define FPCC_FPRF_MASK \ + FPCC_C_BIT | FPCC_FL_BIT | FPCC_FG_BIT | FPCC_FE_BIT | FPCC_FU_BIT + +#define FPSCR_RN_BIT62 (0x1 << (63-62)) +#define FPSCR_RN_BIT63 (0x1 << (63-63)) + +#define CRFIELD_BIT0 0x8 +#define CRFIELD_BIT1 0x4 +#define CRFIELD_BIT2 0x2 +#define CRFIELD_BIT3 0x1 + +/* Display the condition register bits. */ +int cr_overflow_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT3); +} + +int cr_zero_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT2); +} + +int cr_positive_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT1); +} + +int cr_negative_set (unsigned this_cr) { + return (this_cr & CRFIELD_BIT0); +} + +/* This function (__dissect_cr) takes a bitfield directly. */ +static void __dissect_cr (unsigned this_cr) { + extern unsigned long is_cmp_insn; + printf ("["); + if (cr_negative_set (this_cr)) + printf ("%s", is_cmp_insn ? " (LT) 0x1 = Negative 0b1 " : "1"); + else + printf ("%s", verbose ? "0" : "0"); + + if (cr_positive_set (this_cr)) + printf ("%s", is_cmp_insn ? " (GT) 0x2 = Positive fg_flag (zero/inf/denorm) " : "1"); + else + printf ("%s", verbose ? "0" : "0"); + + if (cr_zero_set (this_cr)) + printf ("%s", is_cmp_insn ? " (EQ) 0x4 = Zero fe_flag (zero/nan/inf/neg/e_b<-970" : "1"); + else + printf ("%s", verbose ? "0" : "0"); + + if (cr_overflow_set (this_cr)) + printf ("%s", is_cmp_insn ? " (SO) 0x8 = Overflow 0b0" : "1"); + else + printf ("%s", verbose ? "0" : "0"); + printf ("]"); +} + +/* Extract one CR field */ +int extract_cr_rn (unsigned long chosen_cr, unsigned long rn) { + unsigned int masked_cr; + unsigned long shifted_value; + shifted_value = chosen_cr >> ( ( (7 - rn) * 4 ) ); + masked_cr = shifted_value & 0xf; + return masked_cr; +} + +/* Display one CR field */ +void dissect_cr_rn (unsigned long chosen_cr, unsigned long rn) { + unsigned int masked_cr; + if (debug_show_labels) printf (" RC/CR (%ld):", rn ); + masked_cr = extract_cr_rn (chosen_cr, rn); + printf ("%ld:", rn); + __dissect_cr (masked_cr); +} + +char * fpscr_strings[] = { +" 0-RSVD", " 1-RSVD", " 2-RSVD", " 3-RSVD", " 4-RSVD", " 5-RSVD", " 6-RSVD", +" 7-RSVD", " 8-RSVD", " 9-RSVD", "10-RSVD", "11-RSVD", "12-RSVD", "13-RSVD", +"14-RSVD", "15-RSVD", "16-RSVD", "17-RSVD", "18-RSVD", "19-RSVD", "20-RSVD", +"21-RSVD", "22-RSVD", "23-RSVD", "24-RSVD", "25-RSVD", "26-RSVD", "27-RSVD", +"28-RSVD", "29-DRN0", "30-DRN1", "31-DRN2", +/* 32 */ "FX", "FEX", "VX", +/* 35 */ "OX", "UX", "ZX", "XX", "VXSNAN", +/* 40 */ "VXISI (inf-inf)", "VXIDI (inf/inf)", "VXZDZ (0/0)", +/* 43 */ "VXIMZ (inf*0)", "VXVC", +/* 45 */ "FR", "FI", +/* 47 */ "FPRF-C", "FPCC-FL", "FPCC-FG", +/* 50 */ "FPCC-FE", "FPCC-FU", +/* 52 */ "52-RSVD", "FXSOFT", "VXSQRT", +/* 55 */ "VXCVI", "VE", "OE", "UE", "ZE", +/* 60 */ "XE", "NI", "RN-bit62", "RN-bit63" +}; +/* Display only the fpscr bits that are valid under valgrind. + * Valgrind tracks the C (FPSCR[47]), FPCC (FPSCR[48:51) + * DRN (FPSCR[29:31]) and RN (FPSCR[62:63]). */ +void dissect_fpscr_valgrind (unsigned long local_fpscr) { + int i; + long mybit; + + /* Print DRN fields */ + for (i = 29; i < 32; i++) { + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } + + /* Print C and FPCC fields */ + for (i = 47; i < 52; i++) { + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } + + /* Print RN field */ + for (i = 62; i < 64; i++) { + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } +} + +/* + * This prints the entire FPSCR field. This is only called under higher + * verbosities, as valgrind does not track most of these bits. + */ +void dissect_fpscr_raw (unsigned long local_fpscr) { +/* Due to the additional involved logic, the rounding mode (RN) bits 61-62 + * are handled within dissect_fpscr_rounding_mode (). */ + int i; + long mybit; + for (i = 0; i < 61; i++) { + /* also note that the bit numbering is backwards. */ + mybit = 1LL << (63 - i); + if (mybit & local_fpscr) { + printf (" %s", fpscr_strings[i]); + } + } +} + +void dissect_fpscr (unsigned long local_fpscr) { + if (verbose > 2) { + printf (" [[ fpscr:%lx ]] ", local_fpscr); + dissect_fpscr_raw (local_fpscr); + } else { + dissect_fpscr_valgrind (local_fpscr); + } +} + + +/* *************** */ +/* Buffer Helpers. +Define both a base and a reference buffer. When printing results, only print +the values when there is a difference between the two. */ +#define BUFFER_SIZE 12 +/* Note: Watch the alignment of the buffer, some loads/stores may require +stronger alignments. */ +__attribute__ ( (aligned (16))) unsigned long long buffer[2*BUFFER_SIZE]; +__attribute__ ( (aligned (16))) unsigned long long reference_buffer[2*BUFFER_SIZE]; +unsigned long changed_index[2*BUFFER_SIZE]; +void initialize_buffer (int t) +{ + int x; + for (x = 0; x < BUFFER_SIZE; x++) + /* We don't want each of the 32-bit chunks to be identical since loads + * of a byte from the wrong 32-bit chuck may be difficult to spot. + * Load these up with values that are also interesting if SP/DP, etc. + */ + switch ( (t+x)%BUFFER_SIZE) { + case 0: buffer[x] = 0x3fe00094e0007359; break; // sp + case 1: buffer[x] = 0x7ff7020304057607; break; // nan + case 2: buffer[x] = 0x7ff0000000007000; break; // inf + case 3: buffer[x] = 0x7f0000007f007000; break; // sp pair. + case 4: buffer[x] = 0x5a05a05a05a07a05; break; + case 5: buffer[x] = 0x0102030405067708; break; + case 6: buffer[x] = 0xfedcba9876547210; break; + case 7: buffer[x] = 0x0123456789ab7def; break; + case 8: buffer[x] = 0xffeeddccbbaa7988; break; + default: buffer[x] = 0x1112111211127112* (x-8); break; + } + for (x = 0; x < BUFFER_SIZE; x++) + reference_buffer[x] = buffer[x]; +} + +/* Buffer printing helper. This only displays the contents if they have + changed with respect to the reference buffer, or if running under + high verbosity. */ +void dump_changed_buffer (unsigned long range) { + int x; + int buffer_changed = 0; + + for (x = 0; (x < BUFFER_SIZE) && (x2) + printf (" {idx %d %016llx %016llx}", + x, reference_buffer[x] , buffer[x] ); + } + } + if (verbose>2 || buffer_changed) { + printf (" ["); + for (x = 0; x < BUFFER_SIZE && (x 0) + printf ("%s%016llx", changed_index[x] == 1?"*":" ", buffer[x] ); + if (changed_index[x]) { + if (instruction_is_sp) { + printf (" ("); + special_print_sp_value (0xffffffff & buffer[x] >> 32 ); + printf (" "); + special_print_sp_value (0xffffffff & buffer[x]); + printf (") "); + } else if (instruction_is_dp) { + printf (" {"); + special_print_dp_value (buffer[x]); + printf ("} "); + } + printf ("%016llx", buffer[x]); + } else + printf (" - "); + } + printf ("]"); + } +} + +void dump_raw_buffer () { + int x; + printf ("buffer:["); + for (x = 0; x < BUFFER_SIZE ; x++) { + if (x%4 == 0) printf (" (%d)", x); + printf ("%016llx ", buffer[x]); + } + printf ("]"); +} + +void dump_small_buffer (void) { + dump_changed_buffer (8); +} + +void dump_large_buffer (void) { + dump_changed_buffer (8); +} + +void dump_buffer () { +if (verbose>1) printf (" buffer:"); + if (uses_quad) { + dump_large_buffer (); + } else { + dump_small_buffer (); + } +} + +void print_undefined () { + if (verbose>1) + printf (" [Undef]"); + else + printf (" "); +} + +/* print the input 64-bit vector as 32-bit SP lumps. */ +void print_vec_as_sp (unsigned long long ull64) { + printf (" %08llx", ull64 >> 32 ); + printf (" %08llx", ull64 & 0xffff ); +} + +/*------------------------------------------------------------------*/ +/* Decimal Floating Point (DFP) helper functions */ +/*------------------------------------------------------------------*/ +#define NOT(x) ( ( ( x ) == 0) ? 1 : 0) +#define GET(x,y) ( ( ( x ) & ( 0x1UL << ( y ) ) ) >> ( y ) ) +#define PUT(x,y) ( ( x )<< ( y ) ) + +unsigned long dpb_to_bcd ( unsigned long chunk ) +{ + int a, b, c, d, e, f, g, h, i, j, k, m; + int p, q, r, s, t, u, v, w, x, y; + unsigned long value; + + /* convert 10 bit densely packed BCD to BCD */ + p = GET ( chunk, 9 ); + q = GET ( chunk, 8 ); + r = GET ( chunk, 7 ); + s = GET ( chunk, 6 ); + t = GET ( chunk, 5 ); + u = GET ( chunk, 4 ); + v = GET ( chunk, 3 ); + w = GET ( chunk, 2 ); + x = GET ( chunk, 1 ); + y = GET ( chunk, 0 ); + + /* The BCD bit values are given by the following boolean equations.*/ + a = ( NOT (s) & v & w ) | ( t & v & w & s ) | ( v & w & NOT (x) ); + b = ( p & s & x & NOT (t) ) | ( p & NOT (w) ) | ( p & NOT (v) ); + c = ( q & s & x & NOT (t) ) | ( q & NOT (w) ) | ( q & NOT (v) ); + d =r; + e = ( v & NOT (w) & x ) | ( s & v & w & x ) | ( NOT (t) & v & x & w ); + f = ( p & t & v & w & x & NOT (s) ) | ( s & NOT (x) & v ) | ( s & NOT (v) ); + g = ( q & t & w & v & x & NOT (s) ) | ( t & NOT (x) & v ) | ( t & NOT (v) ); + h = u; + i = ( t & v & w & x ) | ( s & v & w & x ) | ( v & NOT (w) & NOT (x) ); + j = ( p & NOT (s) & NOT (t) & w & v ) | ( s & v & NOT (w) & x ) + | ( p & w & NOT (x) & v ) | ( w & NOT (v) ); + k = ( q & NOT (s) & NOT (t) & v & w ) | ( t & v & NOT (w) & x ) + | ( q & v & w & NOT (x) ) | ( x & NOT (v) ); + m = y; + + value = PUT (a, 11) | PUT (b, 10) | PUT (c, 9) | PUT (d, 8) | PUT (e, 7) + | PUT (f, 6) | PUT (g, 5) | PUT (h, 4) | PUT (i, 3) | PUT (j, 2) + | PUT (k, 1) | PUT (m, 0); + return value; +} +#undef NOT +#undef GET +#undef PUT + +/* get_declet (). Return a 10-bit declet, beginning at the 'start' + * offset. + * + * | dword1 | dword0 | + * | 0 63|64 127| + */ +#define TEN_BITS 0x03ffULL + +int get_declet (int start, uint64_t dword1, uint64_t dword0) { + unsigned long local_declet; + unsigned int dword0_shift; + unsigned int dword1_shift; + + dword1_shift = 63 - (start + 9); + dword0_shift = 127 - (start + 9); + + if (verbose>5) printf ("\n%s (%d) %016lx %016lx", + __FUNCTION__, start, dword1, dword0); + + if ( (start + 9) < 63) { /* fully within dword1 */ + local_declet = (dword1 >> dword1_shift) & TEN_BITS; + + } else if (start >= 65) {/* fully within dword0 */ + local_declet = (dword0 >> dword0_shift) & TEN_BITS; + + } else { /* straddling the two dwords*/ + unsigned long mask_dword0; + unsigned long mask_dword1; + + mask_dword1 = TEN_BITS >> (64 - dword0_shift); + mask_dword0 = TEN_BITS << (dword0_shift); + local_declet = + ( (dword1 & mask_dword1) << (64-dword0_shift)) + + ( (dword0 & mask_dword0) >> dword0_shift); + } + return local_declet; +} + +int get_bcd_digit_from_dpd (int start, uint64_t dword1, + uint64_t dword0) { + long bcd_digit; + long declet; + + declet = get_declet (start, dword1, dword0); + bcd_digit = dpb_to_bcd (declet); + return bcd_digit; +} + +/* For DFP finite numbers, the combination field (G field) is a + * combination of the exponent and the LMD (Left Most Digit) of the + * significand. The fields are encoded/decoded as described in the + * table here. + * 00 01 10 -< Exponent bits. + * 0: 00000 01000 10000 + * ... + * 7: 00111 01111 10111 + * 8: 11000 11010 11100 + * 9: 11001 11011 11101 (encoded special field). + * | + * ^ LMD value. +*/ +#define DFP_GFIELD_MASK 0x7c00000000000000UL +#define DFP_GFIELD_SHIFT 58 +//The exponent bias value is 101 for DFP Short, 398 +//for DFP Long, and 6176 for DFP Extended. +#define DFP128_EXPONENT_BIAS 6176 +#define DFP64_EXPONENT_BIAS 398 + +unsigned int special_field_LMD (uint64_t dword1) { + unsigned long g_field_specials; + int left_two_bits; + int right_three_bits; + + g_field_specials = (dword1 & DFP_GFIELD_MASK) >> DFP_GFIELD_SHIFT; + left_two_bits = (g_field_specials & 0x18) >> 3; + right_three_bits = g_field_specials & 0x07; + + /* The LMD result maps directly to the right_three_bits value as + * long as the left two bits are 0b00, 0b01, 0b10. So a compare + * against 3 is sufficient to determine if we can return the right + * three bits directly. (LMD values 0..7). + */ + if (left_two_bits < 3) { + return (right_three_bits); + } + + /* LMD values of 8 or 9 require a bit of swizzle, but a check of + * the right-most bit is sufficient to determine whether LMD value + * is 8 or 9. + */ + if (right_three_bits & 0x1) + return 9; + else + return 8; +} + +/* Returns the exponent bits, as decoded from the G field. */ +int special_field_exponent_bits (unsigned long dword1) { + unsigned long g_field_specials; + int left_two_bits; + int right_three_bits; + + g_field_specials = (dword1 & DFP_GFIELD_MASK) >> DFP_GFIELD_SHIFT; + left_two_bits = (g_field_specials & 0x18) >> 3; + right_three_bits = g_field_specials & 0x07; + + /* The special field exponent bits maps directly to the left_two_bits + * value as long as the left two bits are 0b00, 0b01, 0b10. So a compare + * against 3 is sufficient for those values. + */ + if (left_two_bits < 3) { + return (left_two_bits); + } + + switch (right_three_bits) { + case 0: + case 1: return 0x0; + case 2: + case 3: return 0x1; + case 4: + case 5: return 0x2; + case 6: /* Infinity */ return 0x0; + case 7: /* NaN */ return 0x0; + } + return -1; /* should never hit this */ +} + +/* The 'exponent left' shift is for moving the leftmost two bits + * of the exponent down to where they can be easily merged with the + * rest of the exponent. + */ +#define DFP128_EXPONENT_RIGHT_MASK 0x03ffc00000000000 +#define DFP64_EXPONENT_RIGHT_MASK 0x03fc000000000000 +#define DFP128_EXPONENT_RIGHT_MASK_SHIFT 46 +#define DFP64_EXPONENT_RIGHT_MASK_SHIFT 50 +#define DFP128_EXPONENT_LEFT_SHIFT 12 +#define DFP64_EXPONENT_LEFT_SHIFT 8 + +#define DFP_NAN 0x1f +#define DFP_INF 0x1e +#define DFP_SIGNALING_NAN_BIT 0x0200000000000000 + +/* return the dfp exponent from the leading dword. */ +signed long dfp128_exponent (unsigned long dword1) { + unsigned long exponent_left; + unsigned long exponent_right; + unsigned long biased_exponent; + signed long exponent; + + exponent_left = special_field_exponent_bits (dword1); + exponent_right = (dword1 & DFP128_EXPONENT_RIGHT_MASK); + biased_exponent = (exponent_left << DFP128_EXPONENT_LEFT_SHIFT) + + (exponent_right >> DFP128_EXPONENT_RIGHT_MASK_SHIFT); + + /* Unbias the exponent. */ + exponent = biased_exponent - DFP128_EXPONENT_BIAS; + return exponent; +} + +/* Interpret the paired 64-bit values as a extended (quad) 128 bit DFP. + * + * | Significand | Combination Field/ | | + * | sign bit | Encoded Exponent | remainder of significand | + * |0 |1 17|18 127| + * ^ (bit0) Significand sign bit. + * ^ (bit 1:17) Combination field. Contains high bits of + * exponent (encoded), LMD of significand (encoded), + * and the remainder of the exponent. First five bits + * will indicate special cases NAN or INF. + * ^ (bit 18:127) Remainder of the + * significand. + */ + +#define DFP128_COMBINATION_MASK 0x7fffc +#define DFP64_COMBINATION_MASK 0x7ffc +#define DFP128_COMBINATION_SHIFT 46 +#define DFP64_COMBINATION_SHIFT 50 +#define DFP_SPECIAL_SYMBOLS_MASK 0x1f +#define DFP_SPECIAL_SYMBOLS_SHIFT 58 + +#define DFP_NAN 0x1f +#define DFP_INF 0x1e +#define DFP_SIGNALING_NAN_BIT 0x0200000000000000 + +#define DFP128_T_START 18 + +void dissect_dfp128_float (uint64_t dword1, uint64_t dword0) { + long signbit; + signed long exponent; + unsigned long gfield_special_symbols; + unsigned long lmd_digit; + unsigned long bcd_digits[13]; + int i; + int silent = 0; // suppress leading zeros from the output. + + if (debug_show_raw_values) + printf ("DFP128R:%016lx, %016lx", dword1, dword0); + + signbit = (dword1 >> 63); + + if (signbit) printf (" -"); + else printf (" "); + + gfield_special_symbols = + ((dword1 >> DFP_SPECIAL_SYMBOLS_SHIFT) & DFP_SPECIAL_SYMBOLS_MASK); + + switch (gfield_special_symbols) { + case DFP_INF: + printf ( "inf "); + break; + + case DFP_NAN: + if (dword1 & DFP_SIGNALING_NAN_BIT) + printf ("SNaN "); + else + printf ("QNaN "); + break; + + default: + // printf ( "Finite "); + exponent = dfp128_exponent (dword1); + // printf ("Exponent: %d Bias: %d ", exponent, DFP128_EXPONENT_BIAS ); + + lmd_digit = special_field_LMD (dword1); + for (i = 0; i < 11; i++) { + bcd_digits[i] = get_bcd_digit_from_dpd ( (DFP128_T_START + + 10 * i), dword1, dword0); + } + if (lmd_digit) { + silent++; + printf ("%01lx", lmd_digit); + } else { + printf (" "); + } + for (i = 0; i < 11; i++) { + if (bcd_digits[i] || silent ) { + silent++; + printf ("%01lx", bcd_digits[i]); + } else { + /* always print at least the last zero */ + if (i == 10) + printf ("0"); + else + printf (" "); + } + } + printf (" * 10^"); + printf ("%ld", exponent); + } +} + +void print_vsr (int vsr_to_print) { +unsigned long long blob1 = 0, blob2 = 0; + switch (vsr_to_print) { + case 26: + __asm__ __volatile__ ("mfvsrd %0, 26":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 26":"=r" (blob2)); + break; + case 27: + __asm__ __volatile__ ("mfvsrd %0, 27":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 27":"=r" (blob2)); + break; + case 28: + __asm__ __volatile__ ("mfvsrd %0, 28":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 28":"=r" (blob2)); + break; + case 29: + __asm__ __volatile__ ("mfvsrd %0, 29":"=r" (blob1)); + __asm__ __volatile__ ("mfvsrld %0, 29":"=r" (blob2)); + break; + default: + printf ("Add entry for VSR %d to %s in %s.\n", vsr_to_print, __FUNCTION__, __FILE__); + } +if (debug_show_labels) + printf (" VSR (%d):", vsr_to_print); +printf (" %llx, %llx ", blob1, blob2); +} + +void print_frt () { + unsigned long long value1, value3; + if (has_frt || debug_show_all_regs ) { + if (debug_show_labels) printf (" frt%s:", has_frtp?"p":"" ); + /* If the result is a dfp128 value, the dfp128 value is + contained in the frt, frtp values which are split across + a pair of VSRs. */ + if (uses_dfp128_output) { + if (verbose) print_vsr (28); + if (verbose) print_vsr (29); + value1 = get_vsrhd_vs28 (); + value3 = get_vsrhd_vs29 (); + dissect_dfp128_float (value1, value3); + } else { + if (debug_show_raw_values) generic_print_float_as_hex (frt); + printf (" %e", frt); + if (has_frtp) { + if (debug_show_raw_values) generic_print_float_as_hex (frtp); + printf (" %e", frtp); + } + } + } +} + +/* implementation detail.. FRS and FRB use the same set of regs. */ +void print_frs_or_frb () { + unsigned long long vsrvalue1, vsrvalue3; + if (debug_show_labels) { + if (has_frs) printf (" frs%s:", has_frsp?"p":"" ); + if (has_frb) printf (" frb%s:", has_frbp?"p":"" ); + } + if (uses_dfp128_input) { + if (verbose) print_vsr (26); + if (verbose) print_vsr (27); + vsrvalue1 = get_vsrhd_vs26 (); vsrvalue3 = get_vsrhd_vs27 (); + dissect_dfp128_float (vsrvalue1, vsrvalue3); + } else if (instruction_is_dp) { + generic_print_double_as_hex (frsb); + generic_print_double_as_hex (frsbp); + } else if (instruction_is_sp) { + generic_print_float_as_hex (frsb); + generic_print_float_as_hex (frsbp); + } else { + printf (" %18.8e", frsb); + printf (" %18.8e", frsbp); + } +} + +void print_ra () { + if (debug_show_labels) printf (" ra:"); + /* special case for when ra == &buffer. */ + if ( (void *)ra == &buffer ) + printf (" (&buffer)"); + else if ( (void *)ra != &buffer || debug_show_raw_values) { + printf (" %lx", ra); + } +} + +void print_rb () { + if (debug_show_labels) printf (" rb:"); + if ( (void *)rb == &buffer) + printf (" (&buffer)"); + else + printf (" %lx", rb); +} + +void print_rc () { + if (debug_show_labels) printf (" rc:"); + printf (" %lx", rc); +} + +void print_rs () { + if (debug_show_labels) printf (" rs:"); + printf (" %lx", rs); + if (has_rsp) { + if (debug_show_labels) printf (" rsp:"); + printf (" %lx", rsp); + } +} + +void print_rt () { + if (debug_show_labels) printf (" rt%s:", has_rtp?"p":""); + printf (" %16lx", rt); + if (has_rtp) { + printf (" %16lx", rtp); + } +} + +void print_vra () { + if (debug_show_labels) printf (" vra:"); + printf (" %016lx,%016lx", vra[0], vra[1]); +} + +void print_vrb () { + if (debug_show_labels) printf (" vrb:"); + printf (" %016lx,%016lx", vrb[0], vrb[1]); +} + +void print_vrc () { + if (debug_show_labels) printf (" vrc:"); + printf (" %016lx,%016lx", vrc[0], vrc[1]); +} + +/* for VRM, don't print leading zeros for better visibility of diffs */ +void print_vrm () { + if (debug_show_labels) printf (" vrm:"); + printf (" %16lx,%16lx", vrm[0], vrm[1]); +} + +void print_vrt () { + if (debug_show_labels) printf (" vrt:"); + if (debug_show_raw_values || (output_mask && uses_load_buffer )) { + printf (" %16lx,", vrt[1]); + printf ( "%016lx", vrt[0]); + } + if (!post_test) return; + if (!output_mask) { + printf (" %16lx,", vrt[1]); + printf ("%016lx", vrt[0]); + } else { + /* there is a mask requiring special handling. */ + if (instruction_is_dp) { + if (output_mask&DP0) + special_print_dp_value (vrt[1]); + if (output_mask&DP1) + special_print_dp_value (vrt[0]); + } + if (instruction_is_sp) { + if (output_mask&SP0) + special_print_sp_value (0xffffffff&vrt[1]>>32); + if (output_mask&SP1) + special_print_sp_value (0xffffffff&vrt[1]); + if (output_mask&SP2) + special_print_sp_value (0xffffffff&vrt[0]>>32); + if (output_mask&SP3) + special_print_sp_value (0xffffffff&vrt[0]); + } + } +} + +void print_xa_or_xc () { + if (has_xa) { + if (debug_show_labels) printf (" vec_xa:"); + printf (" %016lx,", vec_xa[0] ); + printf ("%016lx", vec_xa[1] ); + } + if (has_xc | has_xap) { // Note that xap is shared with xc. + if (debug_show_labels) printf (" vec_x%s", has_xc?"c":"ap"); + printf (" %016lx,", vec_xc[0] ); + printf ("%016lx", vec_xc[1] ); + } +} + +void print_xb () { + if (debug_show_labels) printf (" vec_xb:"); + if (instruction_is_sp_estimate) { + print_vec_as_sp (vec_xb[0]); + printf (","); + print_vec_as_sp (vec_xb[1]); + } else { + printf (" %016lx,", vec_xb[0] ); + printf ("%016lx", vec_xb[1] ); + } +} + +void print_xs () { + if (debug_show_labels) printf (" vec_xs:"); + printf (" %016lx,", vec_xs[0] ); + printf ("%016lx", vec_xs[1] ); +} + +//fixme - consolidate this with print_xt variation. +void print_xtp () { +if (debug_show_labels) printf (" vec_xtp:" ); + printf (" %16lx", XTp0[0]); + printf (" %16lx", XTp0[1]); + printf (" %16lx", XTp1[0]); + printf (" %16lx", XTp1[1]); +} + +void print_xsp () { + // Xsp uses the same pair of regs as xtp does. + print_xtp (); +} + +void print_xt () { +if (debug_show_labels) printf (" vec_xt:" ); + if (debug_show_raw_values) { + printf (" %16lx", vec_xt[0]); + printf (" %16lx", vec_xt[1]); + } + // Don't print the xt value unless we are post-instruction test. + if (!post_test) return; + if (!output_mask ) { + if (vec_xt[0] == (unsigned long)&buffer) printf (" (&buffer) "); + else printf (" %16lx", vec_xt[0]); + if (vec_xt[1] == (unsigned long)&buffer) printf (" (&buffer) "); + else printf (" %16lx", vec_xt[1]); + if (has_xtp) { + printf (" %16lx", XTp0[0]); + printf (" %16lx", XTp0[1]); + printf (" %16lx", XTp1[0]); + printf (" %16lx", XTp1[1]); + } + } else { + /* there is a mask requiring special handling. */ + if (instruction_is_dp) { + if (output_mask&0b100000) + special_print_dp_value (vec_xt[0]); + if (output_mask&0b010000) + special_print_dp_value (vec_xt[1]); + } + if (instruction_is_sp) { + if (output_mask&0b1000) + special_print_sp_value (0xffffffff&vec_xt[0]>>32); + else print_undefined (); + if (output_mask&0b0100) + special_print_sp_value (0xffffffff&vec_xt[0]); + else print_undefined (); + if (output_mask&0b0010) + special_print_sp_value (0xffffffff&vec_xt[1]>>32); + else print_undefined (); + if (output_mask&0b0001) + special_print_sp_value (0xffffffff&vec_xt[1]); + else print_undefined (); + } + if (instruction_is_b16) { + if (output_mask&B16_0) + special_print_bf16_value (0xffffff& (vec_xt[0]>>48)); + else + print_undefined (); + if (output_mask&B16_1) + special_print_bf16_value (0xffffff& (vec_xt[0]>>32)); + else + print_undefined (); + if (output_mask&B16_2) + special_print_bf16_value (0xffffff& (vec_xt[0]>>16)); + else + print_undefined (); + if (output_mask&B16_3) + special_print_bf16_value (0xffffff& (vec_xt[0] )); + else + print_undefined (); + if (output_mask&B16_4) + special_print_bf16_value (0xffffff& (vec_xt[1]>> 48)); + else + print_undefined (); + if (output_mask&B16_5) + special_print_bf16_value (0xffffff& (vec_xt[1]>> 32)); + else + print_undefined (); + if (output_mask&B16_6) + special_print_bf16_value (0xffffff& (vec_xt[1]>> 16)); + else + print_undefined (); + if (output_mask&B16_7) + special_print_bf16_value (0xffffff& (vec_xt[1] )); + else + print_undefined (); + } + } +} + +void print_register_header () { + post_test = 0; + if (has_ra || debug_show_all_regs) print_ra (); + if (has_rb || debug_show_all_regs) print_rb (); + if (has_rc || debug_show_all_regs) print_rc (); + if (has_rs || has_rsp || debug_show_all_regs) print_rs (); + // only print the target registers before the test if verbosity is high. + if (has_rt && debug_show_all_regs) print_rt (); + if (has_xa || has_xap || has_xc || debug_show_all_regs) print_xa_or_xc (); + if (has_xb || debug_show_all_regs) print_xb (); + if (has_xs || debug_show_all_regs ) { + if (debug_show_labels) printf (" vec_xs%s:", has_xsp?"p":""); + if (has_xsp) print_xsp (); else print_xs (); + } + /* printing of the xtp pair is handled differently. */ + if (has_xt && debug_show_all_regs ) { + if (has_xtp) print_xtp (); else print_xt (); + } + if (has_vra || debug_show_all_regs) print_vra (); + if (has_vrb || debug_show_all_regs) print_vrb (); + if (has_vrc || debug_show_all_regs) print_vrc (); + if (has_vrm || debug_show_all_regs) print_vrm (); + if (has_vrt && debug_show_all_regs) print_vrt (); + if (has_frs || has_frb || debug_show_all_regs) print_frs_or_frb (); + if (uses_acc_src || debug_show_all_regs) print_accumulator (); + if (uses_load_buffer) dump_buffer (); +} + +void print_register_footer () { + post_test = 1; + if ( (uses_CRBIT || debug_show_all_regs || (uses_RC ) )) { + if (debug_show_labels) printf (" CR:"); + printf (" [%08lx]", current_cr); + } + + if (current_fpscr) dissect_fpscr (current_fpscr); + + if (uses_RC) dissect_cr_rn (current_cr, 6); + if (uses_acc_dest || uses_acc_vsrs) print_accumulator (); + if (has_vrt || debug_show_all_regs) print_vrt (); + if (has_xt || debug_show_all_regs) { + if (has_xtp) { + print_xtp (); + } else { + print_xt (); + } + } + if (has_ra_target || debug_show_all_regs) print_ra (); + if (has_rt || debug_show_all_regs) print_rt (); + if (has_frt || debug_show_all_regs) print_frt (); +} + +void generic_prologue () { + if (verbose) + printf (" %s %s \n", __DATE__, __TIME__); +} + +/* + Helpers to build the VSX input table. +*/ +#define MAX_VSX_ARRAY_SIZE 42 +unsigned long nb_divmod_num_vsxargs; +unsigned long nb_divmod_den_vsxargs; +unsigned long nb_vsxargs; +unsigned long long * vsxargs = NULL; +void build_vsx_table (void) +{ + long i = 0; + vsxargs = memalign (16, MAX_VSX_ARRAY_SIZE * sizeof (unsigned long)); +/* + The following hex values map to assorted Fp values including zero, inf, nan. + +/-INF EXP:MAX FRAC:0 + +/-NOR EXP:!0 FRAC:!0 + +/-DEN EXP:0 FRAC:!0 + +/-zero EXP:0 FRAC:0 +*/ +// | | // SP || DP + vsxargs[i++] = 0x7F800000ff800000UL; // +inf, -inf || NOR (big) + vsxargs[i++] = 0xff8000007f800000UL; // -inf, +inf || -NOR (big) + vsxargs[i++] = 0xff7ffffe7f7ffffeUL; // -NOR (big), +NOR (big) || +NOR (big) + vsxargs[i++] = 0x0080000e8080000eUL; // +NOR (tiny), -NOR (tiny)|| +NOR (tiny) + vsxargs[i++] = 0x0180055e0180077eUL; // +NOR (rnd), -NOR (rnd) || random # + nb_divmod_den_vsxargs = i; // Values that are safe to divide by are above. + vsxargs[i++] = 0x0000111e8000222eUL; // +den, -den || den + vsxargs[i++] = 0x7ff0000000000000UL; // NAN, +zero || +inf + vsxargs[i++] = 0xfff0000000000000UL; // NAN, +zero || -inf + vsxargs[i++] = 0x2208400000000000UL; // dfp128 value + vsxargs[i++] = 0x0000000000000009UL; // dfp128 value + vsxargs[i++] = 0xffff000180000001UL; // NAN, NOR || NAN + vsxargs[i++] = 0x0000000000000000UL; // +zero, +zero || +zero + vsxargs[i++] = 0x8000000000000000UL; // -zero, +zero || -zero + nb_divmod_num_vsxargs = i; // Values that are safe to be divided are above. + nb_vsxargs = i; +/* Eyecatcher. If there are any 999_999 patterns in the generated output, + it is likely a nb_vsx* bounds check has been missed. + This may also be seen if we are dealing with a quadword instruction + and have an odd number of pairs. */ + vsxargs[i++] = 0x9999999999999999UL; + vsxargs[i++] = 0x9999999999999999UL; +} + +#define CHECK_LINES \ +if ( (i) == nb_divmod_num_vsxargs) printf ("--numerator line--\n"); \ +if ( (i) == nb_divmod_den_vsxargs) printf ("--denominator line--\n"); + +void dump_vsxargs () { + int i; + printf ("\ndump_vsxargs:\n"); + printf ("SP: \n"); + for (i = 0;i>32))); + printf ("%08llx ", 0xffffffff & vsxargs[i]); + dissect_sp_value (0xffffffff & (vsxargs[i]>>32)); + dissect_sp_value (0xffffffff & (vsxargs[i]>>0)); + printf ("\n"); + } + printf ("\n DP: \n"); + for (i = 0;i1) + printf ("Registered %ld args values\n", nb_args); +} + +/* hardcoded dfp128 table. */ +unsigned long long dfp128_vals[] = { + // Some finite numbers + 0x2208000000000000ULL, 0x0000000000000001ULL, // 1 *10^0 + 0xa208800000000000ULL, 0x0000000000000001ULL, // -1 *10^1 + 0x2208000000000000ULL, 0x0000000000000000ULL, // 0*10^256 + 0x0000000000000000ULL, 0x0000000000000001ULL, // 1 *10^-6176. (smallest exp) + 0x77ffc00000000000ULL, 0x0000000000000001ULL, // 1 *10^6111 (largest exp) + 0x77ffffffffffffffULL, 0xffffffffffffffffULL, // max possible value *10^6111 (largest exp) + 0x0000000000000000ULL, 0x0000000000000001ULL, // min possible value 1 *10^-6176. (smallest exp) + 0x8000000000000000ULL, 0x0000000000000001ULL, // -1 *10^-6176. (smallest exp) + 0xa208800000000000ULL, 0x0000000000000777ULL, // other neg value. + 0x2208400000000000ULL, 0x0000000000000009ULL, // other value. + 0x2208800000000011ULL, 0x1110000678900009ULL, // other value. + // flavors of zero + 0x2208000000000000ULL, 0x0000000000000000ULL, // 0*10^256 + 0xa208000000000000ULL, 0x0000000000000000ULL, // -0*10^0 + 0xa248000000000000ULL, 0x0000000000000000ULL, // 0*10^256 + // flavors of NAN + 0x7c00000000000000ULL, 0x0000000000000000ULL, // quiet + 0xfc00000000000000ULL, 0xc00100035b007700ULL, // NAN + 0x7e00000000000000ULL, 0xfe000000d0e0a0d0ULL, // signaling NAN + // flavors of Infinity + 0x7800000000000000ULL, 0x0000000000000000ULL, // +inf + 0xf800000000000000ULL, 0x0000000000000000ULL, // -inf + 0xf900000000000000ULL, 0x0000000000000000ULL, // -inf + + 0x9999999999999999ULL, 0x9999999999999999ULL // Eyecatcher. +}; +unsigned long nb_dfp128args = 32; + + +/* ********************************* */ +/* helpers to set up loop iterators. */ + +void debug_show_iter_ranges () { +/* Show the iteration maxes and the increments. */ + if (debug_show_iters) + printf ("{ a:/%2ld (+%ld) b:/%ld (+%ld) c:/%ld (+%ld) m:/%ld (+%ld) } \n", + a_iters, a_inc, b_iters, b_inc, c_iters, c_inc, m_iters, m_inc ); +} + +void set_up_iterators () { + /* Set the baselines. + Increments for a, b, m default to 1, c defaults to 2. + Total number of iterations default to 1. */ + a_inc = 1; b_inc = 1; + c_inc = 2; m_inc = 1; + a_iters = 1; b_iters = 1; + c_iters = 1; m_iters = 1; + /* Now, set the iterator limits as appropriate for the arguments + that will be used to test the instructions. */ + if (has_vra || has_xa) + a_iters = nb_vsxargs; + if (has_ra) + a_iters = nb_args; + if (has_frb || has_frs || has_vrb || has_xb) { + b_iters = nb_vsxargs; + if (uses_dfp128_input) + b_inc = 2; + } + if (has_rb) + b_iters = nb_args; + if (has_vrc || has_xc) { + if (uses_xc_as_blend_mask) + c_iters = MASK64SIZE; + else + c_iters = nb_vsxargs; + } else if (has_rc) { + c_iters = nb_args; + } else if (has_dcmx) + // Note: dcmx is hardcoded in tests, otherwise would set to dcmx_iters. + c_iters = 1; + else if (has_rs_as_value_source) { + c_iters = nb_args; + c_inc = 1; + } + if (vrm_override) + m_iters = 1; + if (has_vrm ) + m_iters = 4; + if (is_divide_or_modulo) { + a_iters = nb_divmod_num_vsxargs+1; + b_iters = nb_divmod_den_vsxargs; + } + if (is_clear_or_insert_insns) { + a_iters = 4; + b_iters = 6; + } + if ((has_vra+has_vrb+has_vrc+has_vrm+has_xa+has_xb+uses_MC > 2) && + (verbose < 4)) { + /* Instruction tests using multiple fields will generate a lot of + output. In those cases, arbitrarily increase the increment values + to cut the number of iterations. */ + a_inc+= 5; + b_inc+= 5; + c_inc+= 5; + } + /* Drop the iterator count if we've specified a limit. */ + a_iters = min (a_iters, a_limit); + b_iters = min (b_iters, b_limit); + c_iters = min (c_iters, c_limit); +} + +/* This is printed inline, so do not add carriage return, etc. */ +void debug_show_current_iteration () { + if (debug_show_iters) + printf ("{ %2lx %lx %lx %lx } ", vrai, vrbi, vrci, vrmi); +} + +void debug_dump_buffer () { + if ( (verbose>4) || (verbose > 1 && uses_buffer)) { + dump_raw_buffer (); + printf ("\n"); + } +} + +void print_result_buffer () { + if (uses_store_buffer) + dump_buffer (); +} + +/* display the instruction form. */ +void debug_show_form (const char * instruction_name, char * cur_form) { + if (verbose>0) { + printf ("Instruction Name and form: %s ", instruction_name); + display_form_components (cur_form); + } +} + +/* ***************************** */ +/* Build Floating point helpers. */ + +/* Data Formats for floating point. + * Floating point values include the following: + * -INF -NOR -DEN -0 +0 +DEN +NOR +INF + * INFinite: When the biased exponent is the MAX possible value, and + * the fraction field is 0. + * ZERo. biased exponent is zero, fraction is 0. + * DENormalized. biased exponent is 0, and fraction is non-zero. + * NORmalized. All other values that are neither Zero, Denormalized, + * or Infinite. Biased exponent = 1..MAX-1. + */ + +/* Quad (128bit): + * | Sign | EXPonent+Bias | FRACTION/Mantissa | + * 0 1 15 16 127 + * exponent is 15 bits. ranging from: 0x0000 .. 0x7fff + * 0 = (zero if fraction == 0, DeNormal if fraction != 0 ) + * 1...0x7ffe = normalized + * 7fff = (infinite if fraction == 0, NaN if fraction != 0) + */ +#define QUAD_EXP_MASK 0x7fff + +/* This assumes we are working on the top half of a quad stored in a 64-bit + * register. + */ +#define QUAD_EXP_SHIFT 48 +#define QUAD_MANTISSA_MASK 0x0000ffffffffffff +unsigned long long build_binary128_float (unsigned long long signbit, + unsigned long long exponent, + unsigned long long mantissa) { + unsigned long long thevalue; + + thevalue = (unsigned long long) (signbit << 63) | + ( (exponent & QUAD_EXP_MASK) << QUAD_EXP_SHIFT) | + (mantissa & QUAD_MANTISSA_MASK); + + if (debug_show_tables) { + printf ("%s %llx\n", __FUNCTION__, (unsigned long long)thevalue); + printf ("SP: "); + special_print_sp_value (0xffffffff & (thevalue>>48)); + special_print_sp_value (0xffffffff & (thevalue>>32)); + special_print_sp_value (0xffffffff & (thevalue>>16)); + special_print_sp_value (0xffffffff & thevalue); + // Printing zeros here is unnecessary, but visually helpfull + // for symmetry and visualization of the quadword. + special_print_sp_value (0); + special_print_sp_value (0); + special_print_sp_value (0); + special_print_sp_value (0); + printf ("\n"); + printf ("F: "); + generic_print_ull_as_float ( 0xffffffff & thevalue>>32); + generic_print_ull_as_float ( 0xffffffff & thevalue); + generic_print_ull_as_float ( 0); + generic_print_ull_as_float ( 0); + printf ("\n"); + printf ("D: "); + generic_print_ull_as_double (thevalue); + generic_print_ull_as_double (0); + printf ("\n"); + } + return thevalue; +} + +/* A table of exponent values for use in the float precision tests. */ +unsigned long exponent_table[] = { +#ifdef EXHAUSTIVE_TESTS + 0x0000, /* +/-0 or +/-DENormalized, depending on associated mantissa. */ + 0x1a, /* within NORmalized for 16, 32, 64, 128-bit. */ + 0x1f, /* +/-INF or +/-NaN for 16bit, NORmalized for 32, 64, 128 */ + 0xff, /* +/-INF or +/-NaN for 32bit, NORmalized for 64, 128 */ + 0x7ff, /* +/-INF or +/-NaN for 32 and 64bit, NORmalized for 128 */ + 0x7fff, /* +/-INF or +/-NaN for 128bit. */ +#else + 0x0000, /* +/-0 or +/-DENormalized, depending on associated mantissa. */ + 0xff, /* +/-INF or +/-NaN for 32bit, NORmalized for 64, 128 */ + 0x7ff, /* +/-INF or +/-NaN for 32 and 64bit, NORmalized for 128 */ + 0x7fff, /* +/-INF or +/-NaN for 128bit. */ +#endif +}; +#define MAX_EXPONENTS (sizeof (exponent_table) / sizeof (unsigned long)) + +unsigned long mantissa_table[] = { +#ifdef EXHAUSTIVE_TESTS + 0xbeefbeefbeef, /* NOR or DEN or NaN */ + 0x000000000000, /* ZERO or INF */ + 0x7fffffffffff, /* NOR or DEN or NaN */ +#else + 0x000000000000, /* ZERO or INF */ + 0x7fffffffffff, /* NOR or DEN or NaN */ +#endif +}; +#define MAX_MANTISSAS (sizeof (mantissa_table) / sizeof (unsigned long)) + +/* build in 64-bit chunks, low doubleword is zero. */ +unsigned long * binary128_float_vsxargs = NULL; +unsigned long nb_float_vsxargs; +#define MAX_FLOAT_VSX_ARRAY_SIZE ( ( (MAX_EXPONENTS * MAX_MANTISSAS) * 2 + 1) * 2) + +void dump_float_vsx_tables (void) { + /* quad */ + printf ("Quad (binary128_float_vsxargs):\n"); + for (int i = 0 ; i < nb_float_vsxargs; i+= 2 ) { + printf ("%2d:", i); + printf ("%016lx%016lx \n", binary128_float_vsxargs[i], binary128_float_vsxargs[i+1]); + } + printf ("\n"); +} + +void build_float_vsx_tables () { + long i = 0; + unsigned long signbit; + unsigned long exponent; + unsigned long mantissa;/* also referred to as FRACTION in the ISA.*/ + unsigned long exponent_index; + unsigned long mantissa_index; + + if (debug_show_tables) printf ("%s\n", __FUNCTION__); + binary128_float_vsxargs = malloc (MAX_FLOAT_VSX_ARRAY_SIZE + * sizeof (unsigned long)); + for (signbit = 0; signbit < 2; signbit++) { + for (exponent_index = 0; exponent_index < MAX_EXPONENTS; + exponent_index++) { + for (mantissa_index = 0; mantissa_index < MAX_MANTISSAS; + mantissa_index++) { + exponent = exponent_table[exponent_index]; + mantissa = mantissa_table[mantissa_index]; + if (debug_show_tables) { + printf ("signbit:%lx ", signbit); + printf ("exponent:%4lx ", exponent); + printf ("mantissa:%lx ", mantissa); + printf ("\n"); + } + + binary128_float_vsxargs[i] = build_binary128_float (signbit, exponent, + mantissa); + // for simplicity, leave the lower half of the 128-bit value as zero. + binary128_float_vsxargs[i+1] = 0; + i += 2; + } + } + } + nb_float_vsxargs = i; + if (verbose>1) + printf ("Registered %ld float_vsxargs\n", nb_float_vsxargs); +} + +/* **************************************** */ +/* Source/destination register initializers */ +void initialize_target_registers () { + vrt[0] = DEADBEEF; + vrt[1] = DEADBEEF; + vec_xt[0] = vec_xt[1] = DEADBEEF; + rt = DEADBEEF; + frt = 0.0; + frtp = 0.0; + // xs/xt register pairs. + XTp0[0] = vsxargs[6] ; XTp0[1] = vsxargs[5]; + XTp1[0] = vsxargs[4] ; XTp1[1] = vsxargs[3]; + if (uses_acc_dest) { + // Initialize the associated VSRs to 'DEADBEEF', then call + // xxmtacc to do the actual set. + TEST_ACC0[0] = DEADBEEF; TEST_ACC0[1] = DEADBEEF; + TEST_ACC1[0] = DEADBEEF; TEST_ACC1[1] = DEADBEEF; + TEST_ACC2[0] = DEADBEEF; TEST_ACC2[1] = DEADBEEF; + TEST_ACC3[0] = DEADBEEF; TEST_ACC3[1] = DEADBEEF; + push_vsrs_to_acc (); + } +} + +float float_as_hex (unsigned long long hexval) { + union rosetta_t stone; + stone.ull = hexval; + return stone.flt; +} + +double double_as_hex (unsigned long long hexval) { + union rosetta_t stone; + stone.ull = hexval; + return stone.dbl; +} + +void initialize_source_registers () { + SET_CR_ZERO; + current_cr = 0; + current_fpscr = 0; + SET_FPSCR_ZERO; + current_fpscr = 0; + if (is_divide_or_modulo) { + vra[0] = vec_xa[0] = vsxargs[ (vrai ) % nb_divmod_num_vsxargs]; + vra[1] = vec_xa[1] = vsxargs[ (vrai+1) % nb_divmod_num_vsxargs]; + vrb[0] = vec_xb[0] = vsxargs[ (vrbi ) % nb_divmod_den_vsxargs]; + vrb[1] = vec_xb[1] = vsxargs[ (vrbi+1) % nb_divmod_den_vsxargs]; + } else { + vra[0] = vec_xa[0] = vsxargs[ (vrai ) % nb_vsxargs]; + vra[1] = vec_xa[1] = vsxargs[ (vrai+1) % nb_vsxargs]; + vrb[0] = vec_xb[0] = vsxargs[ (vrbi ) % nb_vsxargs]; + vrb[1] = vec_xb[1] = vsxargs[ (vrbi+1) % nb_vsxargs]; + if (is_testlsb) { + /* Special casing for this test to force the vec_xb low bits + to zero or one. */ + if (vrbi%3 == 0) { + // force bits to zero. + vec_xb[0] = vec_xb[0]&0xfefefefefefefefeUL; + vec_xb[1] = vec_xb[1]&0xfefefefefefefefeUL; + } + if (vrbi%3 == 1) { + // force bits to one. + vec_xb[0] = vec_xb[0]|0x0101010101010101UL; + vec_xb[1] = vec_xb[1]|0x0101010101010101UL; + } + } + } + if (has_xap) { + /* shift this back to vrai if we are an xa pair */ + vrc[0] = vec_xc[0] = vsxargs[ (vrai+2) % nb_vsxargs]; + vrc[1] = vec_xc[1] = vsxargs[ (vrai+3) % nb_vsxargs]; + } else { + vrc[0] = vec_xc[0] = vsxargs[ (vrci ) % nb_vsxargs]; + vrc[1] = vec_xc[1] = vsxargs[ (vrci+1) % nb_vsxargs]; + } + + if (uses_xc_as_blend_mask) { + vec_xc[0] = mask64[ (vrci )%MASK64SIZE]; + vec_xc[1] = mask64[ (vrci+1)%MASK64SIZE]; + } + + if (uses_dfp128_input) { + frsb = double_as_hex (dfp128_vals[ (vrbi ) % nb_dfp128args]); + frsbp = double_as_hex (dfp128_vals[ (vrbi+1) % nb_dfp128args]); + } else { + frsb = vsxargs[ (vrbi )%nb_vsxargs]; + frsbp = vsxargs[ (vrbi+1)%nb_vsxargs]; + } + + ra = args[vrai]; + rb = args[vrbi % nb_args ]; + rc = args[vrci]; + rs = args[vrai % nb_args ]; + rsp = args[ (vrai+1) % nb_args ]; + + if (is_clear_or_insert_insns) { + if (has_rb) rb = 2*vrbi; + /* note special case for is_insert_double, see set_up_iterators () */ + if (has_ra) ra = 4*vrai; + if (is_insert_double) { + /* For an insert_double, the results are undefined + for ra > 8, so modulo those into a valid range. */ + ra =ra % 9; + } + } + + if (has_rc) rc = 2*vrci; + if (uses_buffer) { + if (has_rb) { + ra = 8*vrai; + rb = (unsigned long) &buffer; + b_iters = 1; + } else if (has_ra) { + ra = (unsigned long ) &buffer; + a_iters = 1; + if (has_frs || has_rsp) { + b_iters = 2; + } + } + initialize_buffer (0); + } + if (is_mtvsr_insn && has_rb) { + rb = mask64[vrbi%MASK64SIZE]; + b_iters = MASK64SIZE; + } + if (has_rs_as_value_source) { + rs = args[vrci]; + } + + vrm[0] = vrm_mask[ vrmi % VRMMASK_SIZE ]; + vrm[1] = vrm_mask[ (vrmi+1) % VRMMASK_SIZE ]; + + dcmx = 1 << vrci; + + if (uses_acc_src) { + /* initialize the ACC with data */ + TEST_ACC0[0] = vsxargs[ (vrai ) % nb_vsxargs]; + TEST_ACC0[1] = vsxargs[ (vrai+1) % nb_vsxargs]; + TEST_ACC1[0] = vsxargs[ (vrai+2) % nb_vsxargs]; + TEST_ACC1[1] = vsxargs[ (vrai+3) % nb_vsxargs]; + TEST_ACC2[0] = vsxargs[ (vrai+4) % nb_vsxargs]; + TEST_ACC2[1] = vsxargs[ (vrai+5) % nb_vsxargs]; + TEST_ACC3[0] = vsxargs[ (vrai+6) % nb_vsxargs]; + TEST_ACC3[1] = vsxargs[ (vrai+7) % nb_vsxargs]; + push_vsrs_to_acc (); + } + if (uses_acc_vsrs) { + /* initialize the VSRs that will be used by the accumulator related tests. */ + TEST_ACC0[0] = vsxargs[vrai] ; + TEST_ACC0[1] = vsxargs[vrai+1]; + TEST_ACC1[0] = vsxargs[vrai+2]; + TEST_ACC1[1] = vsxargs[vrai+3]; + TEST_ACC2[0] = vsxargs[vrai+4]; + TEST_ACC2[1] = vsxargs[vrai+5]; + TEST_ACC3[0] = vsxargs[vrai+6]; + TEST_ACC3[1] = vsxargs[vrai+7]; + } + if (has_xs) { + vec_xs[0] = vsxargs[ (vrai ) % nb_vsxargs]; + vec_xs[1] = vsxargs[ (vrai+1) % nb_vsxargs]; + } + if (has_xsp) { + vec_xt[0] = vsxargs[ (vrai+2) % nb_vsxargs]; + vec_xt[1] = vsxargs[ (vrai+3) % nb_vsxargs]; + } +} + +unsigned long long vsrd; +unsigned long get_vsrhd_vs26 () { + __asm__ __volatile__ ("mfvsrd %0, 26":"=r" (vsrd)); return vsrd; } +unsigned long get_vsrhd_vs27 () { + __asm__ __volatile__ ("mfvsrd %0, 27":"=r" (vsrd)); return vsrd; } +unsigned long get_vsrhd_vs28 () { + __asm__ __volatile__ ("mfvsrd %0, 28":"=r" (vsrd)); return vsrd; } +unsigned long get_vsrhd_vs29 () { + __asm__ __volatile__ ("mfvsrd %0, 29":"=r" (vsrd)); return vsrd; } + diff --git a/tests/check_ppc64_auxv_cap b/tests/check_ppc64_auxv_cap index 92a219ec84..caceef53d3 100755 --- a/tests/check_ppc64_auxv_cap +++ b/tests/check_ppc64_auxv_cap @@ -15,8 +15,10 @@ P_HWCAP_1=" vsx arch_2_06 power6x dfp pa6t arch_2_05 ic_snoop smt booke" P_HWCAP_2=" cellbe power5+ power5 power4 notb efpdouble efpsingle spe" P_HWCAP_3=" ucache 4xxmac mmu fpu altivec ppc601 ppc64 ppc32 " P_HWCAP2_1=" tar isel ebb dscr htm arch_2_07 arch_3_00 " +# Additional entries as of ... future +P_HWCAP2_2=" arch_3_1 mma " CAPABILITY_FOUND="no" -for POTENTIAL_CAP in $P_HWCAP_1 $P_HWCAP_2 $P_HWCAP_3 $P_HWCAP2_1 ; do +for POTENTIAL_CAP in $P_HWCAP_1 $P_HWCAP_2 $P_HWCAP_3 $P_HWCAP2_1 $P_HWCAP2_2 ; do if [ "x$CAPABILITY_WORD" = "x$POTENTIAL_CAP" ]; then CAPABILITY_FOUND="yes" break