}
+/*------------------------------------------------*/
+/*---- VSX Matrix signed integer GER functions ---*/
+/*------------------------------------------------*/
+static UInt exts4( UInt src)
+{
+ /* Input is an 4-bit value. Extend bit 3 to bits [31:4] */
+ if (( src >> 3 ) & 0x1)
+ return src | 0xFFFFFFF0; /* sign bit is a 1, extend */
+ else
+ return src & 0xF; /* make sure high order bits are zero */
+}
+
+static UInt exts8( UInt src)
+{
+ /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */
+ if (( src >> 7 ) & 0x1)
+ return src | 0xFFFFFF00; /* sign bit is a 1, extend */
+ else
+ return src & 0xFF; /* make sure high order bits are zero */
+}
+
+static UInt extz8( UInt src)
+{
+ /* Input is an 8-bit value. Extend src on the left with zeros. */
+ return src & 0xFF; /* make sure high order bits are zero */
+}
+
+static ULong exts16to64( UInt src)
+{
+ /* Input is an 16-bit value. Extend bit 15 to bits [63:16] */
+ if (( src >> 15 ) & 0x1)
+ return ((ULong) src) | 0xFFFFFFFFFFFF0000ULL; /* sign is 1, extend */
+ else
+ /* make sure high order bits are zero */
+ return ((ULong) src) & 0xFFFFULL;
+}
+
+static UInt chop64to32( Long src ) {
+ /* Take a 64-bit input, return the lower 32-bits */
+ return (UInt)(0xFFFFFFFF & src);
+}
+
+static UInt clampS64toS32( Long src ) {
+ /* Take a 64-bit signed input, clamp positive values to 2^31,
+ clamp negative values at -2^31. Return the result in an
+ unsigned 32-bit value. */
+ Long max_val = 2147483647; // 2^31-1
+ if ( src > max_val)
+ return (UInt)max_val;
+
+ if (src < -max_val)
+ return (UInt)-max_val;
+
+ return (UInt)src;
+}
+
+void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg,
+ UInt *acc_word)
+{
+ U128* pU128_dst;
+
+ vassert( (acc >= 0) && (acc < 8) );
+ vassert( (reg >= 0) && (reg < 4) );
+
+ pU128_dst = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128)
+ + reg*sizeof(U128));
+
+ /* The U128 type is defined as an array of unsigned intetgers. */
+ (*pU128_dst)[0] = acc_word[0];
+ (*pU128_dst)[1] = acc_word[1];
+ (*pU128_dst)[2] = acc_word[2];
+ (*pU128_dst)[3] = acc_word[3];
+ return;
+}
+
+void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, UInt reg,
+ UInt *acc_word)
+{
+ U128* pU128_src;
+
+ acc_word[3] = 0xDEAD;
+ acc_word[2] = 0xBEEF;
+ acc_word[1] = 0xBAD;
+ acc_word[0] = 0xBEEF;
+
+ vassert( (acc >= 0) && (acc < 8) );
+ vassert( (reg >= 0) && (reg < 4) );
+
+ pU128_src = (U128*) (((UChar*)gst) + offset + acc*4*sizeof(U128)
+ + reg*sizeof(U128));
+
+ /* The U128 type is defined as an array of unsigned intetgers. */
+ acc_word[0] = (*pU128_src)[0];
+ acc_word[1] = (*pU128_src)[1];
+ acc_word[2] = (*pU128_src)[2];
+ acc_word[3] = (*pU128_src)[3];
+ return;
+}
+
+void vsx_matrix_4bit_ger_dirty_helper ( VexGuestPPC64State* gst,
+ UInt offset_ACC,
+ ULong srcA_hi, ULong srcA_lo,
+ ULong srcB_hi, ULong srcB_lo,
+ UInt masks_inst )
+{
+ /* This helper calculates the result for one of the four ACC entires.
+ It is called twice, to get the hi and then the low 64-bit of the
+ 128-bit result. */
+ UInt i, j, mask, sum, inst, acc_entry, prefix_inst;
+
+ UInt srcA_nibbles[4][8]; /* word, nibble */
+ UInt srcB_nibbles[4][8]; /* word, nibble */
+ UInt acc_word[4];
+ UInt prod0, prod1, prod2, prod3, prod4, prod5, prod6, prod7;
+ UInt result[4];
+ UInt pmsk = 0;
+ UInt xmsk = 0;
+ UInt ymsk = 0;
+
+ mask = 0xF;
+ inst = (masks_inst >> 5) & 0xFF;
+ prefix_inst = (masks_inst >> 13) & 0x1;
+ acc_entry = masks_inst & 0xF;
+
+ /* LE word numbering */
+ if ( prefix_inst == 0 ) {
+ /* Set the masks for non-prefix instructions */
+ pmsk = 0b11111111;
+ xmsk = 0b1111;
+ ymsk = 0b1111;
+
+ } else {
+ pmsk = (masks_inst >> 22) & 0xFF;
+ xmsk = (masks_inst >> 18) & 0xF;
+ ymsk = (masks_inst >> 14) & 0xF;
+ }
+
+ /* Address nibbles using IBM numbering */
+ for( i = 0; i < 4; i++) {
+ /* Get the ACC contents directly from the PPC64 state */
+ get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word);
+
+ // input is in double words
+ for( j = 0; j< 8; j++) {
+ srcA_nibbles[3][j] = (srcA_hi >> (60-4*j)) & mask; // hi bits [63:32]
+ srcA_nibbles[2][j] = (srcA_hi >> (28-4*j)) & mask; // hi bits [31:0]
+ srcA_nibbles[1][j] = (srcA_lo >> (60-4*j)) & mask; // lo bits [63:32]
+ srcA_nibbles[0][j] = (srcA_lo >> (28-4*j)) & mask; // lo bits [31:0]
+
+ srcB_nibbles[3][j] = (srcB_hi >> (60-4*j)) & mask;
+ srcB_nibbles[2][j] = (srcB_hi >> (28-4*j)) & mask;
+ srcB_nibbles[1][j] = (srcB_lo >> (60-4*j)) & mask;
+ srcB_nibbles[0][j] = (srcB_lo >> (28-4*j)) & mask;
+ }
+
+ for( j = 0; j < 4; j++) {
+ if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
+ if (((pmsk >> 7) & 0x1) == 0)
+ prod0 = 0;
+ else
+ prod0 = exts4( srcA_nibbles[i][0] )
+ * exts4( srcB_nibbles[j][0] );
+
+ if (((pmsk >> 6) & 0x1) == 0)
+ prod1 = 0;
+ else
+ prod1 = exts4( srcA_nibbles[i][1] )
+ * exts4( srcB_nibbles[j][1] );
+
+ if (((pmsk >> 5) & 0x1) == 0)
+ prod2 = 0;
+ else
+ prod2 = exts4( srcA_nibbles[i][2] )
+ * exts4( srcB_nibbles[j][2] );
+
+ if (((pmsk >> 4) & 0x1) == 0)
+ prod3 = 0;
+ else
+ prod3 = exts4( srcA_nibbles[i][3] )
+ * exts4( srcB_nibbles[j][3] );
+
+ if (((pmsk >> 3) & 0x1) == 0)
+ prod4 = 0;
+ else
+ prod4 = exts4( srcA_nibbles[i][4] )
+ * exts4( srcB_nibbles[j][4] );
+
+ if (((pmsk >> 2) & 0x1) == 0)
+ prod5 = 0;
+ else
+ prod5 = exts4( srcA_nibbles[i][5] )
+ * exts4( srcB_nibbles[j][5] );
+
+ if (((pmsk >> 1) & 0x1) == 0)
+ prod6 = 0;
+ else
+ prod6 = exts4( srcA_nibbles[i][6] )
+ * exts4( srcB_nibbles[j][6] );
+
+ if ((pmsk & 0x1) == 0)
+ prod7 = 0;
+ else
+ prod7 = exts4( srcA_nibbles[i][7] )
+ * exts4( srcB_nibbles[j][7] );
+ /* sum is UInt so the result is choped to 32-bits */
+ sum = prod0 + prod1 + prod2 + prod3 + prod4
+ + prod5 + prod6 + prod7;
+
+ if ( inst == XVI4GER8 )
+ result[j] = sum;
+
+ else if ( inst == XVI4GER8PP )
+ result[j] = sum + acc_word[j];
+
+ } else {
+ result[j] = 0;
+ }
+ }
+ write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
+ }
+}
+
+void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst,
+ UInt offset_ACC,
+ ULong srcA_hi, ULong srcA_lo,
+ ULong srcB_hi, ULong srcB_lo,
+ UInt masks_inst )
+{
+ UInt i, j, mask, sum, inst, acc_entry, prefix_inst;
+
+ UInt srcA_bytes[4][4]; /* word, byte */
+ UInt srcB_bytes[4][4]; /* word, byte */
+ UInt acc_word[4];
+ UInt prod0, prod1, prod2, prod3;
+ UInt result[4];
+ UInt pmsk = 0;
+ UInt xmsk = 0;
+ UInt ymsk = 0;
+
+ mask = 0xFF;
+ inst = (masks_inst >> 5) & 0xFF;
+ prefix_inst = (masks_inst >> 13) & 0x1;
+ acc_entry = masks_inst & 0xF;
+
+ /* LE word numbering */
+ if ( prefix_inst == 0 ) {
+ /* Set the masks */
+ pmsk = 0b1111;
+ xmsk = 0b1111;
+ ymsk = 0b1111;
+
+ } else {
+ pmsk = (masks_inst >> 26) & 0xF;
+ xmsk = (masks_inst >> 18) & 0xF;
+ ymsk = (masks_inst >> 14) & 0xF;
+ }
+
+ /* Address byes using IBM numbering */
+ for( i = 0; i < 4; i++) {
+ /* Get the ACC contents directly from the PPC64 state */
+ get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word);
+
+ for( j = 0; j< 4; j++) {
+ srcA_bytes[3][j] = (srcA_hi >> (56-8*j)) & mask;
+ srcA_bytes[2][j] = (srcA_hi >> (24-8*j)) & mask;
+ srcA_bytes[1][j] = (srcA_lo >> (56-8*j)) & mask;
+ srcA_bytes[0][j] = (srcA_lo >> (24-8*j)) & mask;
+
+ srcB_bytes[3][j] = (srcB_hi >> (56-8*j)) & mask;
+ srcB_bytes[2][j] = (srcB_hi >> (24-8*j)) & mask;
+ srcB_bytes[1][j] = (srcB_lo >> (56-8*j)) & mask;
+ srcB_bytes[0][j] = (srcB_lo >> (24-8*j)) & mask;
+ }
+
+ for( j = 0; j < 4; j++) {
+ if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
+ if (((pmsk >> 3) & 0x1) == 0)
+ prod0 = 0;
+ else
+ prod0 =
+ exts8( srcA_bytes[i][0] )
+ * extz8( srcB_bytes[j][0] );
+
+ if (((pmsk >> 2) & 0x1) == 0)
+ prod1 = 0;
+ else
+ prod1 =
+ exts8( srcA_bytes[i][1] )
+ * extz8( srcB_bytes[j][1] );
+
+ if (((pmsk >> 1) & 0x1) == 0)
+ prod2 = 0;
+ else
+ prod2 =
+ exts8( srcA_bytes[i][2] )
+ * extz8( srcB_bytes[j][2] );
+
+ if (((pmsk >> 0) & 0x1) == 0)
+ prod3 = 0;
+ else
+ prod3 =
+ exts8( srcA_bytes[i][3] )
+ * extz8( srcB_bytes[j][3] );
+
+ /* sum is UInt so the result is choped to 32-bits */
+ sum = prod0 + prod1 + prod2 + prod3;
+
+ if ( inst == XVI8GER4 )
+ result[j] = sum;
+
+ else if ( inst == XVI8GER4PP )
+ result[j] = sum + acc_word[j];
+
+ } else {
+ result[j] = 0;
+ }
+ }
+ write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
+ }
+}
+
+void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst,
+ UInt offset_ACC,
+ ULong srcA_hi, ULong srcA_lo,
+ ULong srcB_hi, ULong srcB_lo,
+ UInt masks_inst )
+{
+ UInt i, j, mask, inst, acc_entry, prefix_inst;
+ ULong sum;
+ UInt srcA_word[4][2]; /* word, hword */
+ UInt srcB_word[4][2]; /* word, hword */
+ UInt acc_word[4];
+ ULong prod0, prod1;
+ UInt result[4];
+ UInt pmsk = 0;
+ UInt xmsk = 0;
+ UInt ymsk = 0;
+
+ mask = 0xFFFF;
+ inst = (masks_inst >> 5) & 0xFF;
+ prefix_inst = (masks_inst >> 13) & 0x1;
+ acc_entry = masks_inst & 0xF;
+
+ /* LE word numbering */
+ if ( prefix_inst == 0 ) {
+ /* Set the masks for non prefix instructions */
+ pmsk = 0b11;
+ xmsk = 0b1111;
+ ymsk = 0b1111;
+
+ } else {
+ pmsk = (masks_inst >> 28) & 0x3;
+ xmsk = (masks_inst >> 18) & 0xF;
+ ymsk = (masks_inst >> 14) & 0xF;
+ }
+
+ /* Address half-words using IBM numbering */
+ for( i = 0; i < 4; i++) {
+ /* Get the ACC contents directly from the PPC64 state */
+ get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word);
+
+ for( j = 0; j< 2; j++) {
+ srcA_word[3][j] = (srcA_hi >> (48-16*j)) & mask;
+ srcA_word[2][j] = (srcA_hi >> (16-16*j)) & mask;
+ srcA_word[1][j] = (srcA_lo >> (48-16*j)) & mask;
+ srcA_word[0][j] = (srcA_lo >> (16-16*j)) & mask;
+
+ srcB_word[3][j] = (srcB_hi >> (48-16*j)) & mask;
+ srcB_word[2][j] = (srcB_hi >> (16-16*j)) & mask;
+ srcB_word[1][j] = (srcB_lo >> (48-16*j)) & mask;
+ srcB_word[0][j] = (srcB_lo >> (16-16*j)) & mask;
+ }
+
+ for( j = 0; j < 4; j++) {
+ if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
+ if (((pmsk >> 1) & 0x1) == 0)
+ prod0 = 0;
+
+ else
+ prod0 = exts16to64( srcA_word[i][0] )
+ * exts16to64( srcB_word[j][0] );
+
+ if (((pmsk >> 0) & 0x1) == 0)
+ prod1 = 0;
+ else
+ prod1 = exts16to64( srcA_word[i][1] )
+ * exts16to64( srcB_word[j][1] );
+ /* sum is UInt so the result is choped to 32-bits */
+ sum = prod0 + prod1;
+
+ if ( inst == XVI16GER2 )
+ result[j] = chop64to32( sum );
+
+ else if ( inst == XVI16GER2S )
+ result[j] = clampS64toS32( sum );
+
+ else if ( inst == XVI16GER2PP ) {
+ result[j] = chop64to32( sum + acc_word[j] );
+ }
+
+ else if ( inst == XVI16GER2SPP ) {
+ result[j] = clampS64toS32( sum + acc_word[j] );
+ }
+
+ } else {
+ result[j] = 0;
+ }
+ }
+ write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
+ }
+}
+
+//matrix 16 float stuff
+union
+convert_t {
+ UInt u32;
+ ULong u64;
+ Float f;
+ Double d;
+};
+
+static Float reinterpret_int_as_float( UInt input )
+{
+ /* Reinterpret the bit pattern of an int as a float. */
+ __attribute__ ((aligned (128))) union convert_t conv;
+
+ conv.u32 = input;
+ return conv.f;
+}
+
+static UInt reinterpret_float_as_int( Float input )
+{
+ /* Reinterpret the bit pattern of an int as a float. */
+ __attribute__ ((aligned (128))) union convert_t conv;
+
+ conv.f = input;
+ return conv.u32;
+}
+
+static Double reinterpret_long_as_double( ULong input )
+{
+ /* Reinterpret the bit pattern of an int as a float. */
+ __attribute__ ((aligned (128))) union convert_t conv;
+
+ conv.u64 = input;
+ return conv.d;
+}
+
+static ULong reinterpret_double_as_long( Double input )
+{
+ /* Reinterpret the bit pattern of an int as a float. */
+ __attribute__ ((aligned (128))) union convert_t conv;
+
+ conv.d = input;
+ return conv.u64;
+}
+
+static Double conv_f16_to_double( ULong input )
+{
+ // This all seems to be very alignment sensitive??
+ __attribute__ ((aligned (64))) ULong src;
+ __attribute__ ((aligned (64))) Double result;
+ src = input;
+ __asm__ __volatile__ ("xscvhpdp %x0,%x1" : "=wa" (result) : "wa" (src));
+ return result;
+}
+
+
+static Float conv_double_to_float( Double src )
+{
+ return (float) src ;
+}
+
+
+static Double negate_double( Double input )
+{
+ /* Don't negate a NaN value. A NaN has an exponet
+ of all 1's, non zero fraction. */
+ __attribute__ ((aligned (128))) union convert_t conv;
+
+ conv.d = input;
+
+ if ( ( ( conv.u64 & I64_EXP_MASK) == I64_EXP_MASK )
+ && ( ( conv.u64 & I64_FRACTION_MASK ) != 0 ) )
+ return input;
+ else
+ return -input;
+}
+
+static Float negate_float( Float input )
+{
+ /* Don't negate a NaN value. A NaN has an exponet
+ of all 1's, non zero fraction. */
+ __attribute__ ((aligned (128))) union convert_t conv;
+
+ conv.f = input;
+
+ if ( ( ( conv.u32 & I32_EXP_MASK) == I32_EXP_MASK )
+ && ( ( conv.u32 & I32_FRACTION_MASK ) != 0 ) )
+ return input;
+ else
+ return -input;
+}
+
+void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
+ UInt offset_ACC,
+ ULong srcA_hi, ULong srcA_lo,
+ ULong srcB_hi, ULong srcB_lo,
+ UInt masks_inst )
+{
+ UInt i, j, mask, inst, acc_entry, prefix_inst;
+
+ UInt srcA_word[4][2]; /* word, hword */
+ UInt srcB_word[4][2]; /* word, hword */
+ Double src10, src11, src20, src21;
+ UInt acc_word_input[4];
+ Float acc_word[4];
+ Double prod;
+ Double msum;
+ UInt result[4];
+ UInt pmsk = 0;
+ UInt xmsk = 0;
+ UInt ymsk = 0;
+
+ mask = 0xFFFF;
+ inst = (masks_inst >> 5) & 0xFF;
+ prefix_inst = (masks_inst >> 13) & 0x1;
+ acc_entry = masks_inst & 0xF;
+
+ if ( prefix_inst == 0 ) {
+ /* Set the masks for non-prefix instructions */
+ pmsk = 0b11;
+ xmsk = 0b1111;
+ ymsk = 0b1111;
+
+ } else {
+ /* Use mask supplied with prefix inst */
+ pmsk = (masks_inst >> 28) & 0x3;
+ xmsk = (masks_inst >> 18) & 0xF;
+ ymsk = (masks_inst >> 14) & 0xF;
+ }
+
+ /* Address half-words using IBM numbering */
+ for( i = 0; i < 4; i++) {
+ /* Get the ACC contents directly from the PPC64 state */
+ get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input);
+
+ acc_word[3] = reinterpret_int_as_float( acc_word_input[3] );
+ acc_word[2] = reinterpret_int_as_float( acc_word_input[2] );
+ acc_word[1] = reinterpret_int_as_float( acc_word_input[1] );
+ acc_word[0] = reinterpret_int_as_float( acc_word_input[0] );
+
+ for( j = 0; j < 2; j++) { // input is in double words
+ srcA_word[3][j] = (UInt)((srcA_hi >> (48-16*j)) & mask);
+ srcA_word[2][j] = (UInt)((srcA_hi >> (16-16*j)) & mask);
+ srcA_word[1][j] = (UInt)((srcA_lo >> (48-16*j)) & mask);
+ srcA_word[0][j] = (UInt)((srcA_lo >> (16-16*j)) & mask);
+
+ srcB_word[3][j] = (UInt)((srcB_hi >> (48-16*j)) & mask);
+ srcB_word[2][j] = (UInt)((srcB_hi >> (16-16*j)) & mask);
+ srcB_word[1][j] = (UInt)((srcB_lo >> (48-16*j)) & mask);
+ srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask);
+ }
+
+ for( j = 0; j < 4; j++) {
+ if (((pmsk >> 1) & 0x1) == 0) {
+ src10 = 0;
+ src20 = 0;
+ } else {
+ src10 = conv_f16_to_double((ULong)srcA_word[i][0]);
+ src20 = conv_f16_to_double((ULong)srcB_word[j][0]);
+ }
+
+ if ((pmsk & 0x1) == 0) {
+ src11 = 0;
+ src21 = 0;
+ } else {
+ src11 = conv_f16_to_double((ULong)srcA_word[i][1]);
+ src21 = conv_f16_to_double((ULong)srcB_word[j][1]);
+ }
+
+
+ prod = src10 * src20;
+ msum = prod + src11 * src21;
+
+ if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
+ /* Note, we do not track the exception handling bits
+ ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */
+
+ if ( inst == XVF16GER2 )
+ result[j] = reinterpret_float_as_int(
+ conv_double_to_float(msum) );
+
+ else if ( inst == XVF16GER2PP )
+ result[j] = reinterpret_float_as_int(
+ conv_double_to_float(msum)
+ + acc_word[j] );
+
+ else if ( inst == XVF16GER2PN )
+ result[j] = reinterpret_float_as_int(
+ conv_double_to_float(msum)
+ + negate_float( acc_word[j] ) );
+
+ else if ( inst == XVF16GER2NP )
+ result[j] = reinterpret_float_as_int(
+ conv_double_to_float( negate_double( msum ) )
+ + acc_word[j] );
+
+ else if ( inst == XVF16GER2NN )
+ result[j] = reinterpret_float_as_int(
+ conv_double_to_float( negate_double( msum ) )
+ + negate_float( acc_word[j] ) );
+ } else {
+ result[j] = 0;
+ }
+ }
+ write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
+ }
+}
+
+void vsx_matrix_32bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
+ UInt offset_ACC,
+ ULong srcA_hi, ULong srcA_lo,
+ ULong srcB_hi, ULong srcB_lo,
+ UInt masks_inst )
+{
+ UInt i, j, mask, inst, acc_entry, prefix_inst;
+
+ Float srcA_word[4];
+ Float srcB_word[4];
+ UInt acc_word_input[4];
+ Float acc_word[4];
+ UInt result[4];
+ UInt xmsk = 0;
+ UInt ymsk = 0;
+ Float src1, src2, acc;
+
+ mask = 0xFFFFFFFF;
+ inst = (masks_inst >> 5) & 0xFF;
+ prefix_inst = (masks_inst >> 13) & 0x1;
+ acc_entry = masks_inst & 0xF;
+
+ if ( prefix_inst == 0 ) {
+ /* Set the masks for non-prefix instructions */
+ xmsk = 0b1111;
+ ymsk = 0b1111;
+
+ } else {
+ xmsk = (masks_inst >> 18) & 0xF;
+ ymsk = (masks_inst >> 14) & 0xF;
+ }
+
+ srcA_word[3] = reinterpret_int_as_float( (srcA_hi >> 32) & mask );
+ srcA_word[2] = reinterpret_int_as_float( srcA_hi & mask );
+ srcA_word[1] = reinterpret_int_as_float( (srcA_lo >> 32) & mask );
+ srcA_word[0] = reinterpret_int_as_float( srcA_lo & mask );
+
+ srcB_word[3] = reinterpret_int_as_float( (srcB_hi >> 32) & mask );
+ srcB_word[2] = reinterpret_int_as_float( srcB_hi & mask );
+ srcB_word[1] = reinterpret_int_as_float( (srcB_lo >> 32) & mask );
+ srcB_word[0] = reinterpret_int_as_float( srcB_lo & mask );
+
+ /* Address byes using IBM numbering */
+ for( i = 0; i < 4; i++) {
+ /* Get the ACC contents directly from the PPC64 state */
+ get_ACC_entry (gst, offset_ACC, acc_entry, 3-i, acc_word_input);
+
+ acc_word[3] = reinterpret_int_as_float( acc_word_input[3] );
+ acc_word[2] = reinterpret_int_as_float( acc_word_input[2] );
+ acc_word[1] = reinterpret_int_as_float( acc_word_input[1] );
+ acc_word[0] = reinterpret_int_as_float( acc_word_input[0] );
+
+ for( j = 0; j < 4; j++) {
+
+ if ((((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) == 0x1) {
+ /* Note, we do not track the exception handling bits
+ ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */
+
+ src1 = srcA_word[i];
+ src2 = srcB_word[j];
+ acc = acc_word[j];
+
+ if ( inst == XVF32GER )
+ result[j] = reinterpret_float_as_int( src1 * src2 );
+
+ else if ( inst == XVF32GERPP )
+ result[j] = reinterpret_float_as_int( ( src1 * src2 ) + acc );
+
+ else if ( inst == XVF32GERPN )
+ result[j] = reinterpret_float_as_int( ( src1 * src2 )
+ + negate_float( acc ) );
+
+ else if ( inst == XVF32GERNP )
+ result[j] = reinterpret_float_as_int(
+ negate_float( src1 * src2 ) + acc );
+
+ else if ( inst == XVF32GERNN )
+ result[j] = reinterpret_float_as_int(
+ negate_float( src1 * src2 ) + negate_float( acc ) );
+ } else {
+ result[j] = 0;
+ }
+ }
+ write_ACC_entry (gst, offset_ACC, acc_entry, 3-i, result);
+ }
+}
+
+void vsx_matrix_64bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
+ UInt offset_ACC,
+ ULong srcX_hi, ULong srcX_lo,
+ ULong srcY_hi, ULong srcY_lo,
+ UInt masks_inst )
+{
+ /* This function just computes the result for one entry in the ACC. */
+ UInt i, j, inst, acc_entry, prefix_inst;
+
+ Double srcX_dword[4];
+ Double srcY_dword[2];
+ Double result[2];
+ UInt result_uint[4];
+ ULong result_ulong[2];
+ Double acc_dword[4];
+ ULong acc_word_ulong[2];
+ UInt acc_word_input[4];
+ UInt xmsk = 0;
+ UInt ymsk = 0;
+ UInt start_i;
+ Double src1, src2, acc;
+
+ inst = (masks_inst >> 8) & 0xFF;
+ prefix_inst = (masks_inst >> 16) & 0x1;
+ start_i = (masks_inst >> 4) & 0xF;
+ acc_entry = masks_inst & 0xF;
+
+ if ( prefix_inst == 0 ) {
+ /* Set the masks for non-prefix instructions */
+ xmsk = 0b1111;
+ ymsk = 0b11;
+
+ } else {
+ xmsk = (masks_inst >> 21) & 0xF;
+ ymsk = (masks_inst >> 19) & 0x3;
+ }
+
+ /* Need to store the srcX_dword in the correct index for the following
+ for loop. */
+ srcX_dword[1+start_i] = reinterpret_long_as_double( srcX_lo);
+ srcX_dword[0+start_i] = reinterpret_long_as_double( srcX_hi );
+ srcY_dword[1] = reinterpret_long_as_double( srcY_lo );
+ srcY_dword[0] = reinterpret_long_as_double( srcY_hi );
+
+ for( i = start_i; i < start_i+2; i++) {
+ /* Get the ACC contents directly from the PPC64 state */
+ get_ACC_entry (gst, offset_ACC, acc_entry, 3 - i,
+ acc_word_input);
+
+ acc_word_ulong[1] = acc_word_input[3];
+ acc_word_ulong[1] = (acc_word_ulong[1] << 32) | acc_word_input[2];
+ acc_word_ulong[0] = acc_word_input[1];
+ acc_word_ulong[0] = (acc_word_ulong[0] << 32) | acc_word_input[0];
+ acc_dword[0] = reinterpret_long_as_double( acc_word_ulong[0] );
+ acc_dword[1] = reinterpret_long_as_double( acc_word_ulong[1]);
+
+ for( j = 0; j < 2; j++) {
+
+ if (((xmsk >> i) & 0x1) & ((ymsk >> j) & 0x1)) {
+ /* Note, we do not track the exception handling bits
+ ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */
+
+ src1 = srcX_dword[i];
+ src2 = srcY_dword[j];
+ acc = acc_dword[j];
+
+ if ( inst == XVF64GER )
+ result[j] = src1 * src2;
+
+ else if ( inst == XVF64GERPP )
+ result[j] = ( src1 * src2 ) + acc;
+
+ else if ( inst == XVF64GERPN )
+ result[j] = ( src1 * src2 ) + negate_double( acc );
+
+ else if ( inst == XVF64GERNP )
+ result[j] = negate_double( src1 * src2 ) + acc;
+
+ else if ( inst == XVF64GERNN )
+ result[j] = negate_double( src1 * src2 ) + negate_double( acc );
+
+ } else {
+ result[j] = 0;
+ }
+ }
+
+ /* Need to store the two double float values as two unsigned ints in
+ order to store them to the ACC. */
+ result_ulong[0] = reinterpret_double_as_long ( result[0] );
+ result_ulong[1] = reinterpret_double_as_long ( result[1] );
+
+ result_uint[0] = result_ulong[0] & 0xFFFFFFFF;
+ result_uint[1] = (result_ulong[0] >> 32) & 0xFFFFFFFF;
+ result_uint[2] = result_ulong[1] & 0xFFFFFFFF;
+ result_uint[3] = (result_ulong[1] >> 32) & 0xFFFFFFFF;
+
+ write_ACC_entry (gst, offset_ACC, acc_entry, 3 - i,
+ result_uint);
+ }
+}
+
/*----------------------------------------------*/
/*--- The exported fns .. ---*/
/*----------------------------------------------*/
VECZERO(vex_state->guest_VSR62);
VECZERO(vex_state->guest_VSR63);
+ VECZERO( vex_state->guest_ACC_0_r0 );
+ VECZERO( vex_state->guest_ACC_0_r1 );
+ VECZERO( vex_state->guest_ACC_0_r2 );
+ VECZERO( vex_state->guest_ACC_0_r3 );
+ VECZERO( vex_state->guest_ACC_1_r0 );
+ VECZERO( vex_state->guest_ACC_1_r1 );
+ VECZERO( vex_state->guest_ACC_1_r2 );
+ VECZERO( vex_state->guest_ACC_1_r3 );
+ VECZERO( vex_state->guest_ACC_2_r0 );
+ VECZERO( vex_state->guest_ACC_2_r1 );
+ VECZERO( vex_state->guest_ACC_2_r2 );
+ VECZERO( vex_state->guest_ACC_2_r3 );
+ VECZERO( vex_state->guest_ACC_3_r0 );
+ VECZERO( vex_state->guest_ACC_3_r1 );
+ VECZERO( vex_state->guest_ACC_3_r2 );
+ VECZERO( vex_state->guest_ACC_3_r3 );
+ VECZERO( vex_state->guest_ACC_4_r0 );
+ VECZERO( vex_state->guest_ACC_4_r1 );
+ VECZERO( vex_state->guest_ACC_4_r2 );
+ VECZERO( vex_state->guest_ACC_4_r3 );
+ VECZERO( vex_state->guest_ACC_5_r0 );
+ VECZERO( vex_state->guest_ACC_5_r1 );
+ VECZERO( vex_state->guest_ACC_5_r2 );
+ VECZERO( vex_state->guest_ACC_5_r3 );
+ VECZERO( vex_state->guest_ACC_6_r0 );
+ VECZERO( vex_state->guest_ACC_6_r1 );
+ VECZERO( vex_state->guest_ACC_6_r2 );
+ VECZERO( vex_state->guest_ACC_6_r3 );
+ VECZERO( vex_state->guest_ACC_7_r0 );
+ VECZERO( vex_state->guest_ACC_7_r1 );
+ VECZERO( vex_state->guest_ACC_7_r2 );
+ VECZERO( vex_state->guest_ACC_7_r3 );
+
# undef VECZERO
vex_state->guest_CIA = 0;
vex_state->guest_PPR = 0x4ULL << 50; // medium priority
vex_state->guest_PSPB = 0x100; // an arbitrary non-zero value to start with
vex_state->guest_DSCR = 0;
+
}
#define OFFB_PPR offsetofPPCGuestState(guest_PPR)
#define OFFB_PSPB offsetofPPCGuestState(guest_PSPB)
#define OFFB_DSCR offsetofPPCGuestState(guest_DSCR)
+#define OFFB_ACC_0_r0 offsetofPPCGuestState(guest_ACC_0_r0)
+#define OFFB_ACC_0_r1 offsetofPPCGuestState(guest_ACC_0_r1)
+#define OFFB_ACC_0_r2 offsetofPPCGuestState(guest_ACC_0_r2)
+#define OFFB_ACC_0_r3 offsetofPPCGuestState(guest_ACC_0_r3)
+#define OFFB_ACC_1_r0 offsetofPPCGuestState(guest_ACC_1_r0)
+#define OFFB_ACC_1_r1 offsetofPPCGuestState(guest_ACC_1_r1)
+#define OFFB_ACC_1_r2 offsetofPPCGuestState(guest_ACC_1_r2)
+#define OFFB_ACC_1_r3 offsetofPPCGuestState(guest_ACC_1_r3)
+#define OFFB_ACC_2_r0 offsetofPPCGuestState(guest_ACC_2_r0)
+#define OFFB_ACC_2_r1 offsetofPPCGuestState(guest_ACC_2_r1)
+#define OFFB_ACC_2_r2 offsetofPPCGuestState(guest_ACC_2_r2)
+#define OFFB_ACC_2_r3 offsetofPPCGuestState(guest_ACC_2_r3)
+#define OFFB_ACC_3_r0 offsetofPPCGuestState(guest_ACC_3_r0)
+#define OFFB_ACC_3_r1 offsetofPPCGuestState(guest_ACC_3_r1)
+#define OFFB_ACC_3_r2 offsetofPPCGuestState(guest_ACC_3_r2)
+#define OFFB_ACC_3_r3 offsetofPPCGuestState(guest_ACC_3_r3)
+#define OFFB_ACC_4_r0 offsetofPPCGuestState(guest_ACC_4_r0)
+#define OFFB_ACC_4_r1 offsetofPPCGuestState(guest_ACC_4_r1)
+#define OFFB_ACC_4_r2 offsetofPPCGuestState(guest_ACC_4_r2)
+#define OFFB_ACC_4_r3 offsetofPPCGuestState(guest_ACC_4_r3)
+#define OFFB_ACC_5_r0 offsetofPPCGuestState(guest_ACC_5_r0)
+#define OFFB_ACC_5_r1 offsetofPPCGuestState(guest_ACC_5_r1)
+#define OFFB_ACC_5_r2 offsetofPPCGuestState(guest_ACC_5_r2)
+#define OFFB_ACC_5_r3 offsetofPPCGuestState(guest_ACC_5_r3)
+#define OFFB_ACC_6_r0 offsetofPPCGuestState(guest_ACC_6_r0)
+#define OFFB_ACC_6_r1 offsetofPPCGuestState(guest_ACC_6_r1)
+#define OFFB_ACC_6_r2 offsetofPPCGuestState(guest_ACC_6_r2)
+#define OFFB_ACC_6_r3 offsetofPPCGuestState(guest_ACC_6_r3)
+#define OFFB_ACC_7_r0 offsetofPPCGuestState(guest_ACC_7_r0)
+#define OFFB_ACC_7_r1 offsetofPPCGuestState(guest_ACC_7_r1)
+#define OFFB_ACC_7_r2 offsetofPPCGuestState(guest_ACC_7_r2)
+#define OFFB_ACC_7_r3 offsetofPPCGuestState(guest_ACC_7_r3)
/*------------------------------------------------------------*/
return ifieldDM ( instr );
}
+/* Extract AT field from theInstr 8LS:D form */
+static UChar ifieldAT ( UInt instr ) {
+ return toUChar( IFIELD( instr, 23, 3 ) );
+}
+
/*------------------------------------------------------------*/
/*--- Guest-state identifiers ---*/
/*------------------------------------------------------------*/
* needed.
*/
PPC_GST_DSCR, // Data Stream Control Register
+ PPC_GST_ACC_0_r0, /* Accumulator register file. Eight accumulators each
+ * with four 128-bit registers.
+ */
+ PPC_GST_ACC_0_r1,
+ PPC_GST_ACC_0_r2,
+ PPC_GST_ACC_0_r3,
+ PPC_GST_ACC_1_r0,
+ PPC_GST_ACC_1_r1,
+ PPC_GST_ACC_1_r2,
+ PPC_GST_ACC_1_r3,
+ PPC_GST_ACC_2_r0,
+ PPC_GST_ACC_2_r1,
+ PPC_GST_ACC_2_r2,
+ PPC_GST_ACC_2_r3,
+ PPC_GST_ACC_3_r0,
+ PPC_GST_ACC_3_r1,
+ PPC_GST_ACC_3_r2,
+ PPC_GST_ACC_3_r3,
+ PPC_GST_ACC_4_r0,
+ PPC_GST_ACC_4_r1,
+ PPC_GST_ACC_4_r2,
+ PPC_GST_ACC_4_r3,
+ PPC_GST_ACC_5_r0,
+ PPC_GST_ACC_5_r1,
+ PPC_GST_ACC_5_r2,
+ PPC_GST_ACC_5_r3,
+ PPC_GST_ACC_6_r0,
+ PPC_GST_ACC_6_r1,
+ PPC_GST_ACC_6_r2,
+ PPC_GST_ACC_6_r3,
+ PPC_GST_ACC_7_r0,
+ PPC_GST_ACC_7_r1,
+ PPC_GST_ACC_7_r2,
+ PPC_GST_ACC_7_r3,
PPC_GST_MAX
} PPC_GST;
return mkexpr(val);
}
+/*-----------------------------------------------------------*/
+/* Helpers to access VSX Accumulator register file
+ *-----------------------------------------------------------*/
+static void putACC( UInt index, UInt reg, IRExpr* src )
+{
+ switch (index) {
+ case 0:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_0_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_0_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_0_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_0_r3, src ) );
+ break;
+ }
+ break;
+
+ case 1:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_1_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_1_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_1_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_1_r3, src ) );
+ break;
+ }
+ break;
+
+ case 2:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_2_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_2_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_2_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_2_r3, src ) );
+ break;
+ }
+ break;
+
+ case 3:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_3_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_3_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_3_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_3_r3, src ) );
+ break;
+ }
+ break;
+
+ case 4:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_4_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_4_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_4_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_4_r3, src ) );
+ break;
+ }
+ break;
+
+ case 5:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_5_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_5_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_5_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_5_r3, src ) );
+ break;
+ }
+ break;
+
+ case 6:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_6_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_6_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_6_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_6_r3, src ) );
+ break;
+ }
+ break;
+
+ case 7:
+ switch (reg) {
+ case 0:
+ stmt( IRStmt_Put( OFFB_ACC_7_r0, src ) );
+ break;
+ case 1:
+ stmt( IRStmt_Put( OFFB_ACC_7_r1, src ) );
+ break;
+ case 2:
+ stmt( IRStmt_Put( OFFB_ACC_7_r2, src ) );
+ break;
+ case 3:
+ stmt( IRStmt_Put( OFFB_ACC_7_r3, src ) );
+ break;
+ }
+ break;
+ }
+}
+
+static IRExpr* /* :: Ity_V128 */ getACC ( UInt index, UInt reg )
+{
+ vassert( (index >= 0) && (index < 8) );
+ vassert( (reg >= 0) && (reg < 4) );
+ // vex_printf("getACC (%d, %d)) \n", index, reg);
+ switch (index) {
+ case 0:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_0_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_0_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_0_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_0_r3, Ity_V128 );
+ }
+ break;
+
+ case 1:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_1_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_1_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_1_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_1_r3, Ity_V128 );
+ }
+ break;
+
+ case 2:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_2_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_2_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_2_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_2_r3, Ity_V128 );
+ }
+ break;
+
+ case 3:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_3_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_3_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_3_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_3_r3, Ity_V128 );
+ }
+ break;
+
+ case 4:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_4_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_4_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_4_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_4_r3, Ity_V128 );
+ }
+ break;
+
+ case 5:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_5_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_5_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_5_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_5_r3, Ity_V128 );
+ }
+ break;
+
+ case 6:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_6_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_6_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_6_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_6_r3, Ity_V128 );
+ }
+ break;
+
+ case 7:
+ switch (reg) {
+ case 0:
+ return IRExpr_Get( OFFB_ACC_7_r0, Ity_V128 );
+ case 1:
+ return IRExpr_Get( OFFB_ACC_7_r1, Ity_V128 );
+ case 2:
+ return IRExpr_Get( OFFB_ACC_7_r2, Ity_V128 );
+ case 3:
+ return IRExpr_Get( OFFB_ACC_7_r3, Ity_V128 );
+ }
+ break;
+ }
+ return 0; // error
+}
+
+
/*------------------------------------------------------------*/
/* Helpers for VSX instructions that do floating point
* operations and need to determine if a src contains a
IRExpr *srcA, IRExpr *srcB,
IRExpr *srcC, IRExpr *IMM ){
/* This function implements the ISA 3.1 instruction xxeval. The
- instruction is too complex to do with Iops. An Iop implementation is
- expected to exhaust memory and be really complex to write, debug and
- understand. The second option would be to just map it to a new Iop.
- Unfortunately, I doubt any other architecture will implement it making
- the Iop PPC specific which isn't really attractive. It would need
- extensive documenation for the Iop definition for anyone else to
- understand what it does. That leaves doing it as a clean helper. This
- is not the ideal option, but was chosen for now to help document what
- the instruction does. Discuss this with Julian before committing to
- decide if we really want to use this approach or map the instructioin
- to a new IOP. */
- /* FIX ME, CARLL 11/8/2018*/
+ instruction is too complex to do with Iops. */
/* The instruction description, note the IBM bit numbering is left to right:
- For each integer value i, 0 to 127, do the following.
+ For each integer value i, 0 to 127, do the following.
- Let j be the value of the concatenation of the contents of bit i of
- srcA, bit i of srcB, bit i of srcC. (j = srcA[i] | srcB[i] | srcC[i])
+ Let j be the value of the concatenation of the contents of bit i of
+ srcA, bit i of srcB, bit i of srcC. (j = srcA[i] | srcB[i] | srcC[i])
- The value of bit IMM[j] is placed into bit result[i].
+ The value of bit IMM[j] is placed into bit result[i].
Basically the instruction lets you set each of the 128 bits in the result
by selecting one of the eight bits in the IMM value. */
- /* Calling clean helpers with 128-bit args is currently not supported. It
- isn't worth adding the support. We will simply call a 64-bit helper to
- do the upper 64-bits of the result and the lower 64-bits of the result.
- */
+ /* Calling clean helpers with 128-bit args is currently not supported. We
+ will simply call a 64-bit clean helper to do the upper 64-bits of the
+ result and then call it do do the lower 64-bits of the result. */
IRTemp result_hi = newTemp( Ity_I64 );
IRTemp result_lo = newTemp( Ity_I64 );
return binop( Iop_64HLtoV128, mkexpr( result_hi ), mkexpr( result_lo ) );
}
+static void setup_fxstate_struct( IRDirty* d, UInt AT, IREffect AT_fx ) {
+ /* declare guest state effects, writing to four ACC 128-bit regs. */
+ d->nFxState = 4;
+ vex_bzero(&d->fxState, sizeof(d->fxState));
+ d->fxState[0].fx = AT_fx;
+ d->fxState[0].size = sizeof(U128);
+ d->fxState[1].fx = AT_fx;
+ d->fxState[1].size = sizeof(U128);
+ d->fxState[2].fx = AT_fx;
+ d->fxState[2].size = sizeof(U128);
+ d->fxState[3].fx = AT_fx;
+ d->fxState[3].size = sizeof(U128);
+
+ switch (AT) {
+ case 0:
+ d->fxState[0].offset = OFFB_ACC_0_r0;
+ d->fxState[1].offset = OFFB_ACC_0_r1;
+ d->fxState[2].offset = OFFB_ACC_0_r2;
+ d->fxState[3].offset = OFFB_ACC_0_r3;
+ break;
+ case 1:
+ d->fxState[0].offset = OFFB_ACC_1_r0;
+ d->fxState[1].offset = OFFB_ACC_1_r1;
+ d->fxState[2].offset = OFFB_ACC_1_r2;
+ d->fxState[3].offset = OFFB_ACC_1_r3;
+ break;
+ case 2:
+ d->fxState[0].offset = OFFB_ACC_2_r0;
+ d->fxState[1].offset = OFFB_ACC_2_r1;
+ d->fxState[2].offset = OFFB_ACC_2_r2;
+ d->fxState[3].offset = OFFB_ACC_2_r3;
+ break;
+ case 3:
+ d->fxState[0].offset = OFFB_ACC_3_r0;
+ d->fxState[1].offset = OFFB_ACC_3_r1;
+ d->fxState[2].offset = OFFB_ACC_3_r2;
+ d->fxState[3].offset = OFFB_ACC_3_r3;
+ break;
+ case 4:
+ d->fxState[0].offset = OFFB_ACC_4_r0;
+ d->fxState[1].offset = OFFB_ACC_4_r1;
+ d->fxState[2].offset = OFFB_ACC_4_r2;
+ d->fxState[3].offset = OFFB_ACC_4_r3;
+ break;
+ case 5:
+ d->fxState[0].offset = OFFB_ACC_5_r0;
+ d->fxState[1].offset = OFFB_ACC_5_r1;
+ d->fxState[2].offset = OFFB_ACC_5_r2;
+ d->fxState[3].offset = OFFB_ACC_5_r3;
+ break;
+ case 6:
+ d->fxState[0].offset = OFFB_ACC_6_r0;
+ d->fxState[1].offset = OFFB_ACC_6_r1;
+ d->fxState[2].offset = OFFB_ACC_6_r2;
+ d->fxState[3].offset = OFFB_ACC_6_r3;
+ break;
+ case 7:
+ d->fxState[0].offset = OFFB_ACC_7_r0;
+ d->fxState[1].offset = OFFB_ACC_7_r1;
+ d->fxState[2].offset = OFFB_ACC_7_r2;
+ d->fxState[3].offset = OFFB_ACC_7_r3;
+ break;
+ default:
+ vassert( (AT >= 0) && (AT < 8));
+ }
+ return;
+}
+#define MATRIX_4BIT_INT_GER 1
+#define MATRIX_8BIT_INT_GER 2
+#define MATRIX_16BIT_INT_GER 3
+#define MATRIX_16BIT_FLOAT_GER 4
+#define MATRIX_32BIT_FLOAT_GER 5
+/* Note, the 64-bit float instructions have their caller. */
+
+static void vsx_matrix_ger ( const VexAbiInfo* vbi,
+ UInt inst_class,
+ IRExpr *srcA, IRExpr *srcB,
+ UInt AT, UInt mask_inst ) {
+ /* This helper function does the VSX Matrix 4-bit Signed Integer GER
+ (Rank-8 Update) instructions xvi4ger8, xvi4ger8pp, pmxvi4ger8,
+ pmxvi4ger8pp. The instructions work on four V128 values, and three
+ 8-bit masks. */
+
+ IRTemp srcA_hi = newTemp( Ity_I64);
+ IRTemp srcA_lo = newTemp( Ity_I64);
+ IRTemp srcB_hi = newTemp( Ity_I64);
+ IRTemp srcB_lo = newTemp( Ity_I64);
+ IRDirty* d;
+ UInt instruction = mask_inst & 0xFF; /* Instruction is lower 8-bits. */
+ IREffect AT_fx;
+
+ assign( srcA_hi, unop( Iop_V128HIto64, srcA ) );
+ assign( srcA_lo, unop( Iop_V128to64, srcA ) );
+ assign( srcB_hi, unop( Iop_V128HIto64, srcB ) );
+ assign( srcB_lo, unop( Iop_V128to64, srcB ) );
+
+ /* Using a dirty helper so we can access the contents of the ACC for use in
+ by the instruction and then write the result directly back to the ACC.
+ The dirty helper does not return data. */
+ IRExpr** args = mkIRExprVec_7(
+ IRExpr_GSPTR(),
+ mkU32(offsetofPPCGuestState(guest_ACC_0_r0)),
+ mkexpr(srcA_hi), mkexpr(srcA_lo),
+ mkexpr(srcB_hi), mkexpr(srcB_lo),
+ mkU32( (mask_inst << 5) | AT ));
+
+ /* Set AT_fx to Write if the instruction only writes the ACC. Set
+ AT_fx to modify if the instruction uses the AT entry and writes
+ to the ACC entry. */
+ switch (instruction) {
+ case XVI4GER8:
+ case XVI8GER4:
+ case XVI16GER2:
+ case XVI16GER2S:
+ case XVF16GER2:
+ case XVF32GER:
+ AT_fx = Ifx_Write;
+ break;
+ case XVI4GER8PP:
+ case XVI8GER4PP:
+ case XVI16GER2PP:
+ case XVI16GER2SPP:
+ case XVF16GER2PP:
+ case XVF16GER2PN:
+ case XVF16GER2NP:
+ case XVF16GER2NN:
+ case XVF32GERPP:
+ case XVF32GERPN:
+ case XVF32GERNP:
+ case XVF32GERNN:
+ AT_fx = Ifx_Modify;
+ break;
+ default:
+ vassert(0); /* Unknown instruction */
+ }
+
+ switch(inst_class) {
+ case MATRIX_4BIT_INT_GER:
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_4bit_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_4bit_ger_dirty_helper ),
+ args );
+ break;
+
+ case MATRIX_8BIT_INT_GER:
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_8bit_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_8bit_ger_dirty_helper ),
+ args );
+ break;
+
+ case MATRIX_16BIT_INT_GER:
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_16bit_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_16bit_ger_dirty_helper ),
+ args );
+ break;
+
+ case MATRIX_16BIT_FLOAT_GER:
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_16bit_float_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_16bit_float_ger_dirty_helper ),
+ args );
+ break;
+
+ case MATRIX_32BIT_FLOAT_GER:
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_32bit_float_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_32bit_float_ger_dirty_helper ),
+ args );
+ break;
+
+ default:
+ vex_printf("ERROR: Unkown inst_class = %u in vsx_matrix_ger()\n",
+ inst_class);
+ return;
+ }
+
+ setup_fxstate_struct( d, AT, AT_fx );
+
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+}
+
+static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi,
+ IRExpr *srcA, IRExpr *srcA1,
+ IRExpr *srcB,
+ UInt AT, UInt mask_inst ) {
+ /* This helper function does the VSX Matrix 64-bit floating-point GER
+ (Rank-1 Update) instructions xvf64ger, xvf64gerpp, xvf64gerpn,
+ xvf64gernp, xvf64gernn, pmxvf64ger, pmxvf64gerpp, pmxvf64gerpn,
+ pmxvf64gernp, pmxvf64gernn. */
+ IRTemp srcX_hi = newTemp( Ity_I64);
+ IRTemp srcX_lo = newTemp( Ity_I64);
+ IRTemp srcX1_hi = newTemp( Ity_I64);
+ IRTemp srcX1_lo = newTemp( Ity_I64);
+ IRTemp srcY_hi = newTemp( Ity_I64);
+ IRTemp srcY_lo = newTemp( Ity_I64);
+ UInt start_i;
+ IRDirty* d;
+ ULong combined_args;
+ UInt instruction = mask_inst & 0xFF; /* Instruction is lower 8-bits. */
+ IREffect AT_fx;
+
+ assign( srcX_lo, unop( Iop_V128HIto64, srcA ) );
+ assign( srcX_hi, unop( Iop_V128to64, srcA ) );
+ assign( srcX1_lo, unop( Iop_V128HIto64, srcA1 ) );
+ assign( srcX1_hi, unop( Iop_V128to64, srcA1 ) );
+ assign( srcY_lo, unop( Iop_V128HIto64, srcB ) );
+ assign( srcY_hi, unop( Iop_V128to64, srcB ) );
+
+ /* Using a dirty helper so we can access the contents of the ACC for use in
+ by the instruction and then write the result directly back to the ACC.
+ The dirty helper does not return data.
+
+ There is a restriction of 8 args in a dirty helper. Can't pass the four
+ srcX values. So, just do two calls calculating the first two ACC
+ results then the second two ACC results. */
+
+ start_i = 0;
+ combined_args = (mask_inst << 8) | (start_i << 4) | AT;
+
+ IRExpr** args1 = mkIRExprVec_7(
+ IRExpr_GSPTR(),
+ mkU32( offsetofPPCGuestState(guest_ACC_0_r0) ),
+ mkexpr(srcX1_hi), mkexpr(srcX1_lo),
+ mkexpr(srcY_hi), mkexpr(srcY_lo),
+ mkU32( combined_args ));
+
+ /* Set AT_fx to Write if the instruction only writes the ACC. Set
+ AT_fx to modify if the instruction uses the AT entry and writes
+ to the ACC entry. */
+ switch (instruction) {
+ case XVF64GER:
+ AT_fx = Ifx_Write;
+ break;
+ case XVF64GERPP:
+ case XVF64GERPN:
+ case XVF64GERNP:
+ case XVF64GERNN:
+ AT_fx = Ifx_Modify;
+ break;
+ default:
+ vassert(0); /* Unknown instruction */
+ }
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_64bit_float_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_64bit_float_ger_dirty_helper ),
+ args1 );
+
+ setup_fxstate_struct( d, AT, AT_fx );
+
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+
+ start_i = 2;
+ combined_args = (mask_inst << 8) | (start_i << 4) | AT;
+
+ IRExpr** args2 = mkIRExprVec_7(
+ IRExpr_GSPTR(),
+ mkU32( offsetofPPCGuestState(guest_ACC_0_r0) ),
+ mkexpr(srcX_hi), mkexpr(srcX_lo),
+ mkexpr(srcY_hi), mkexpr(srcY_lo),
+ mkU32( combined_args ));
+
+ d = unsafeIRDirty_0_N (
+ 0/*regparms*/,
+ "vsx_matrix_64bit_float_ger_dirty_helper",
+ fnptr_to_fnentry( vbi, &vsx_matrix_64bit_float_ger_dirty_helper ),
+ args2 );
+
+ setup_fxstate_struct( d, AT, AT_fx );
+
+ /* execute the dirty call, side-effecting guest state */
+ stmt( IRStmt_Dirty(d) );
+}
+
static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr *vA, IRExpr *vB ) {
/* This function does an unsigned compare of two V128 values. The
* function is for use in 32-bit mode only as it is expensive. The
switch (opc1) {
case 0x30: // lfs (Load Float Single, PPC32 p441)
+ pDIP( is_prefix, "lfs fr%u,%u(r%u)", frT_addr, immediate_val, rA_addr );
pDIP( is_prefix, "lfs fr%u,%u(r%u)\n", frT_addr, immediate_val, rA_addr );
DIPp( is_prefix, ",%u", R );
putFReg( frT_addr,
break;
case 0x32: // lfd (Load Float Double, PPC32 p437)
- pDIP( is_prefix, "lfd fr%u,%u(r%u)\n", frT_addr, immediate_val, rA_addr );
+ pDIP( prefix, "lfd fr%u,%u(r%u)", frT_addr, immediate_val, rA_addr );
DIPp( is_prefix, ",%u", R );
putFReg( frT_addr, load(Ity_F64, mkexpr(EA)) );
break;
#undef MAX_FIELDS
}
+static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
+ const VexAbiInfo* vbi )
+{
+ UChar opc1 = ifieldOPC(theInstr);
+ UChar opc2 = IFIELD( theInstr, 1, 10);
+ UInt bit11_15 = IFIELD( theInstr, (31-15), 5);
+ char AT = ifieldAT(theInstr);
+ Bool is_prefix = prefix_instruction( prefix );
+ UChar rA_addr = ifieldRegA( theInstr );
+ UChar rB_addr = ifieldRegB( theInstr );
+
+ /* Note, not all of the instructions supported by this function are
+ prefix instructions. */
+ if ((opc1 == 0x3b)&& !is_prefix) {
+ // Note these are not prefix instructions
+ UInt XO = IFIELD( theInstr, 3, 8);
+ UInt inst_prefix = 0;
+
+ /* Note vsx_matrix_4bit_ger writes result to ACC register file. */
+ switch ( XO ) {
+ case XVI4GER8:
+ DIP("xvi4ger8 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT, ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI4GER8PP:
+ DIP("xvi4ger8pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT, ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI8GER4:
+ DIP("xvi8ger4 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT, ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI8GER4PP:
+ DIP("xvi8ger4pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT, ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI16GER2S:
+ DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT, ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI16GER2SPP:
+ DIP("xvi16ger2pps %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT, ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2:
+ DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2PP:
+ DIP("xvf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2PN:
+ DIP("xvf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2NP:
+ DIP("xvf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2NN:
+ DIP("xvf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GER:
+ DIP("xvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERPP:
+ DIP("xvf32gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERPN:
+ DIP("xvf32gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERNP:
+ DIP("xvf32gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERNN:
+ DIP("xvf32gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF64GER:
+ DIP("xvf64ger %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF64GERPP:
+ DIP("xvfd642gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF64GERPN:
+ DIP("xvf64gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF64GERNP:
+ DIP("xvf64gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF64GERNN:
+ DIP("xvf64gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( inst_prefix << 8 ) | XO ) );
+ break;
+ default:
+ vex_printf("ERROR, dis_vsx_accumulator_prefix, Unknown X0 = 0x%x value.\n", XO);
+ return False;
+ }
+
+ } else if ((opc1 == 0x3b) && prefix) {
+ // Note these are prefix instructions
+ UInt XO = IFIELD( theInstr, 3, 8);
+ UInt PMSK, XMSK, YMSK, MASKS;
+ UInt inst_prefix = 0x1;
+ MASKS = IFIELD( prefix, 0, 16);
+
+ switch ( XO ) {
+ case XVI4GER8:
+ PMSK = IFIELD( prefix, 8, 8);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+
+ DIP("pmxvi4ger8 %u,r%u, r%u,%u,%u,%u\n",
+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
+ vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT,
+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO) );
+ break;
+ case XVI4GER8PP:
+ PMSK = IFIELD( prefix, 8, 8);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvi4ger8pp %u,r%u, r%u,%u,%u,%u\n",
+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
+ vsx_matrix_ger( vbi, MATRIX_4BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT,
+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI8GER4:
+ PMSK = IFIELD( prefix, 12, 4);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvi8ger4 %u,r%u, r%u,%u,%u,%u\n",
+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT,
+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI8GER4PP:
+ PMSK = IFIELD( prefix, 12, 4);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvi8ger4pp %u,r%u, r%u,%u,%u,%u\n",
+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
+ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT,
+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI16GER2S:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvi16ger2s %u,r%u, r%u,%u,%u,%u\n",
+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT,
+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVI16GER2SPP:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvi16ger2pps %u,r%u, r%u,%u,%u,%u\n",
+ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
+ getVSReg( rA_addr ), getVSReg( rB_addr ),
+ AT,
+ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ),
+ AT, ( (MASKS << 9 )
+ | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2PP:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ),
+ AT, ( (MASKS << 9 )
+ | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2PN:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ),
+ AT, ( (MASKS << 9 )
+ | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2NP:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ),
+ AT, ( (MASKS << 9 )
+ | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF16GER2NN:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ),
+ AT, ( (MASKS << 9 )
+ | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GER:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERPP:
+ PMSK = IFIELD( prefix, 14, 2);
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf32gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERPN:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf32gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERNP:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf32gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF32GERNN:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 0, 4);
+ DIP("pmxvf32gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
+ getVSReg( rA_addr ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 ) | XO ) );
+ break;
+ case XVF64GER:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 2, 2);
+ DIP("pmxvf64ger %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 )
+ | XO ) );
+ break;
+ case XVF64GERPP:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 2, 2);
+ DIP("pmxvf64gerpp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 )
+ | XO ) );
+ break;
+ case XVF64GERPN:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 2, 2);
+ DIP("pmxvf64gerpn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 )
+ | XO ) );
+ break;
+ case XVF64GERNP:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 2, 2);
+ DIP("pmxvf64gernp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 )
+ | XO ) );
+ break;
+ case XVF64GERNN:
+ PMSK = 0;
+ XMSK = IFIELD( prefix, 4, 4);
+ YMSK = IFIELD( prefix, 2, 2);
+ DIP("pmxvf64gernn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
+ vsx_matrix_64bit_float_ger( vbi, getVSReg( rA_addr ),
+ getVSReg( rA_addr+1 ),
+ getVSReg( rB_addr ), AT,
+ ( ( MASKS << 9) | ( inst_prefix << 8 )
+ | XO ) );
+ break;
+ default:
+ return False;
+ }
+
+ } else if ((opc1 == 0x1F) && (opc2 == 0xB1) && (bit11_15 == 0) && !prefix) {
+ // FYI, this is not a prefix instruction
+ DIP("xxmfacc %u\n", AT);
+
+ putVSReg( 4*AT+0, getACC( AT, 0 ) );
+ putVSReg( 4*AT+1, getACC( AT, 1 ) );
+ putVSReg( 4*AT+2, getACC( AT, 2 ) );
+ putVSReg( 4*AT+3, getACC( AT, 3 ) );
+
+ } else if ((opc1 == 0x1F) && (opc2 == 0xB1) && (bit11_15 == 3) && !prefix) {
+ // FYI, this is not a prefix instruction
+ IRTemp zero128 = newTemp(Ity_V128);
+
+ DIP("xxsetaccz %u\n", AT);
+
+ assign( zero128, binop(Iop_64HLtoV128, mkU64( 0 ), mkU64( 0 ) ) );
+ putACC( AT, 0, mkexpr( zero128 ) );
+ putACC( AT, 1, mkexpr( zero128 ) );
+ putACC( AT, 2, mkexpr( zero128 ) );
+ putACC( AT, 3, mkexpr( zero128 ) );
+
+ } else if ((opc1 == 0x1F) && (opc2 == 0xB1) && (bit11_15 == 1) && !prefix) {
+ // FYI, this is not a prefix instruction
+ DIP("xxmtacc %u\n", AT);
+
+ putACC( AT, 0, getVSReg( 4*AT+0 ) );
+ putACC( AT, 1, getVSReg( 4*AT+1 ) );
+ putACC( AT, 2, getVSReg( 4*AT+2 ) );
+ putACC( AT, 3, getVSReg( 4*AT+3 ) );
+
+ } else {
+ vex_printf("ERROR, dis_vsx_accumulator_prefix, Unknown instruction theInstr = 0x%x\n",
+ theInstr);
+ return False;
+ }
+
+ return True;
+}
+
static Int dis_nop_prefix ( UInt prefix, UInt theInstr )
{
Bool is_prefix = prefix_instruction( prefix );
goto decode_failure;
default:
+ ; // Fall thru to the next check
+ }
+
+ if ( !prefix_instruction( prefix ) ) {
+ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
+ opc2 = IFIELD( theInstr, 3, 8 );
+ if ((opc2 == XVI4GER8) || // xvi4ger8
+ (opc2 == XVI4GER8PP) || // xvi4ger8pp
+ (opc2 == XVI8GER4) || // xvi8ger4
+ (opc2 == XVI8GER4PP) || // xvi8ger4pp
+ (opc2 == XVF16GER2) || // xvf16ger2
+ (opc2 == XVF16GER2PP) || // xvf16ger2pp
+ (opc2 == XVF16GER2PN) || // xvf16ger2pn
+ (opc2 == XVF16GER2NP) || // xvf16ger2np
+ (opc2 == XVF16GER2NN) || // xvf16ger2nn
+ (opc2 == XVI16GER2S) || // xvi16ger2s
+ (opc2 == XVI16GER2SPP) || // xvi16ger2spp
+ (opc2 == XVF32GER) || // xvf32ger
+ (opc2 == XVF32GERPP) || // xvf32gerpp
+ (opc2 == XVF32GERPN) || // xvf32gerpn
+ (opc2 == XVF32GERNP) || // xvf32gernp
+ (opc2 == XVF32GERNN) || // xvf32gernn
+ (opc2 == XVF64GER) || // xvf64ger
+ (opc2 == XVF64GERPP) || // xvf64gerpp
+ (opc2 == XVF64GERPN) || // xvf64gerpn
+ (opc2 == XVF64GERNP) || // xvf64gernp
+ (opc2 == XVF64GERNN)) { // xvf64gernn
+ if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) )
+ goto decode_success;
+ goto decode_failure;
+ } else {
+ vex_printf("ERROR, dis_vsx_accumulator_prefix, unknown opc2 = 0x%x\n",
+ opc2);
+ goto decode_failure;
+ }
+
+ } else {
+ // lxacc
+ if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) )
+ goto decode_success;
goto decode_failure;
}
break;
goto decode_failure;
case 0x3F:
+ if ( prefix_instruction( prefix ) ) { // stxacc
+ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
+ if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) )
+ goto decode_success;
+ goto decode_failure;
+ }
+
if (!allow_F) goto decode_noF;
/* Instrs using opc[1:5] never overlap instrs using opc[1:10],
so we can simply fall through the first switch statement */
}
break;
-
case 0x1F:
+ if ( prefix_instruction( prefix ) ) { // stxacc
+ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
+ if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) )
+ goto decode_success;
+ goto decode_failure;
+ }
/* For arith instns, bit10 is the OE flag (overflow enable) */
opc2 = IFIELD(theInstr, 1, 10);
switch (opc2) {
+ case 0xB1: // xxmfacc, xxsetaccz
+ {
+ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
+ if (dis_vsx_accumulator_prefix( prefix, theInstr, abiinfo ) )
+ goto decode_success;
+ goto decode_failure;
+ }
case 0xDB: // brh
case 0x9B: // brw
// if allow_V is not set, we'll skip trying to decode.
if (!allow_V) goto decode_noV;
- if (dis_vx_load( prefix, theInstr )) goto decode_success;
+ if (dis_vx_load( prefix, theInstr )) goto decode_success;
goto decode_failure;
case 0x00D: // lxvrbx
case 0x0C6: case 0x1C6: case 0x2C6: // vcmpeqfp, vcmpgefp, vcmpgtfp
case 0x3C6: // vcmpbfp
if (!allow_V) goto decode_noV;
- if (dis_av_fp_cmp( prefix, theInstr )) goto decode_success;
+ if (dis_av_fp_cmp( prefix, theInstr ))
+ goto decode_success;
goto decode_failure;
default: