From: Eyal Soha Date: Wed, 25 Oct 2023 03:52:56 +0000 (-0600) Subject: Add support for expensive cmpgt into vbits X-Git-Tag: VALGRIND_3_23_0~62 X-Git-Url: http://git.ipfire.org/?a=commitdiff_plain;h=94e826519a810e52bba539a7c41f4074f0631e10;p=thirdparty%2Fvalgrind.git Add support for expensive cmpgt into vbits --- diff --git a/memcheck/tests/vbit-test/binary.c b/memcheck/tests/vbit-test/binary.c index b93ab1585..4ac0435b0 100644 --- a/memcheck/tests/vbit-test/binary.c +++ b/memcheck/tests/vbit-test/binary.c @@ -364,6 +364,38 @@ check_result_for_binary(const irop_t *op, const test_data_t *data) panic(__func__); } break; + case UNDEF_GT_S_8x16: + expected_vbits = cmp_gt_vbits(1/* is_signed */, 8 /* bits_per_element */, 16 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_S_16x8: + expected_vbits = cmp_gt_vbits(1/* is_signed */, 16 /* bits_per_element */, 8 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_S_32x4: + expected_vbits = cmp_gt_vbits(1/* is_signed */, 32 /* bits_per_element */, 4 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_S_64x2: + expected_vbits = cmp_gt_vbits(1/* is_signed */, 64 /* bits_per_element */, 2 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_U_8x16: + expected_vbits = cmp_gt_vbits(0/* is_signed */, 8 /* bits_per_element */, 16 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_U_16x8: + expected_vbits = cmp_gt_vbits(0/* is_signed */, 16 /* bits_per_element */, 8 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_U_32x4: + expected_vbits = cmp_gt_vbits(0/* is_signed */, 32 /* bits_per_element */, 4 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; + case UNDEF_GT_U_64x2: + expected_vbits = cmp_gt_vbits(0/* is_signed */, 64 /* bits_per_element */, 2 /* element_count */, + opnd1->vbits, opnd2->vbits, opnd1->value, opnd2->value); + break; default: panic(__func__); diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c index 1ab0ee2fc..4755ce41c 100644 --- a/memcheck/tests/vbit-test/irops.c +++ b/memcheck/tests/vbit-test/irops.c @@ -858,14 +858,14 @@ static irop_t irops[] = { { DEFOP(Iop_CmpEQ16x8, UNDEF_UNKNOWN), }, { DEFOP(Iop_CmpEQ32x4, UNDEF_UNKNOWN), }, { DEFOP(Iop_CmpEQ64x2, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT8Sx16, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT16Sx8, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT32Sx4, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT64Sx2, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT8Ux16, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT16Ux8, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT32Ux4, UNDEF_UNKNOWN), }, - { DEFOP(Iop_CmpGT64Ux2, UNDEF_UNKNOWN), }, + { DEFOP(Iop_CmpGT8Sx16, UNDEF_GT_S_8x16), .amd64 = 1 }, + { DEFOP(Iop_CmpGT16Sx8, UNDEF_GT_S_16x8), .amd64 = 1 }, + { DEFOP(Iop_CmpGT32Sx4, UNDEF_GT_S_32x4), .amd64 = 1 }, + { DEFOP(Iop_CmpGT64Sx2, UNDEF_GT_S_64x2), .amd64 = 1 }, + { DEFOP(Iop_CmpGT8Ux16, UNDEF_GT_U_8x16) }, + { DEFOP(Iop_CmpGT16Ux8, UNDEF_GT_U_16x8) }, + { DEFOP(Iop_CmpGT32Ux4, UNDEF_GT_U_32x4) }, + { DEFOP(Iop_CmpGT64Ux2, UNDEF_GT_U_64x2) }, { DEFOP(Iop_Cnt8x16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Clz8x16, UNDEF_UNKNOWN), }, { DEFOP(Iop_Clz16x8, UNDEF_UNKNOWN), }, diff --git a/memcheck/tests/vbit-test/vbits.c b/memcheck/tests/vbit-test/vbits.c index 2f1e32a6d..ca4bc72bd 100644 --- a/memcheck/tests/vbit-test/vbits.c +++ b/memcheck/tests/vbit-test/vbits.c @@ -1160,6 +1160,19 @@ cmp_eq_ne_vbits(vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2) return res; } +/* Given unsigned vbits and value, return the minimum possible value. */ +uint64_t min_vbits(uint64_t vbits, uint64_t value) +{ + // This is derived from expensiveAddSub() in mc_translate.c. + return value & ~vbits; +} + +/* Given unsigned vbits and value, return the maximum possible value. */ +uint64_t max_vbits(uint64_t vbits, uint64_t value) +{ + // This is derived from expensiveAddSub() in mc_translate.c. + return value | vbits; +} /* Deal with precise integer ADD and SUB. */ vbits_t @@ -1170,11 +1183,10 @@ int_add_or_sub_vbits(int isAdd, get_binary_vbits_and_vals64(&vaa, &aa, &vbb, &bb, vbits1, vbits2, val1, val2); - // This is derived from expensiveAddSub() in mc_translate.c. - uint64_t a_min = aa & ~vaa; - uint64_t b_min = bb & ~vbb; - uint64_t a_max = aa | vaa; - uint64_t b_max = bb | vbb; + uint64_t a_min = min_vbits(vaa, aa); + uint64_t b_min = min_vbits(vbb, bb); + uint64_t a_max = max_vbits(vaa, aa); + uint64_t b_max = max_vbits(vbb, bb); uint64_t result; if (isAdd) { @@ -1194,3 +1206,60 @@ int_add_or_sub_vbits(int isAdd, return res; } + +/* Deal with precise CmpGTsbxe. + * + * b is the number of bits per element and e is the number of elements. x is + * either S for signed or U for unsigned. + */ + +vbits_t +cmp_gt_vbits(int is_signed, int bits_per_element, int element_count, + vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2) { + assert(vbits1.num_bits == vbits2.num_bits); + assert(bits_per_element*element_count == vbits1.num_bits); + assert(vbits1.num_bits == 128); // All the known variants are 128-bit. + + vbits_t res = { .num_bits = vbits1.num_bits, .bits.u128 = {0,0} }; + for (int word = 0; word < 2; word++) { + for (int element_in_word = 0; element_in_word < element_count/2; element_in_word++) { + // We don't have to worry about little-endian vs big-endian because the + // max bits_per_element is 64 and fits in a word. Extract a word. + uint64_t element1 = (val1.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element)); + uint64_t element2 = (val2.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element)); + uint64_t velement1 = (vbits1.bits.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element)); + uint64_t velement2 = (vbits2.bits.u128[word] >> (bits_per_element*element_in_word)) & (((uint64_t) -1) >> (64 - bits_per_element)); + + // If we are doing a signed comparison then we add one to the MSB of + // the element. This converts the signed value into an unsigned value + // in such a way that the greater than operation continues to return + // the same value when done in unsigned math. We don't want the + // addition to overflow so we jsut use XOR instead. + if (is_signed) { + element1 ^= (((uint64_t) 1) << (bits_per_element-1)); + element2 ^= (((uint64_t) 1) << (bits_per_element-1)); + } + + uint64_t min1 = min_vbits(velement1, element1); + uint64_t min2 = min_vbits(velement2, element2); + uint64_t max1 = max_vbits(velement1, element1); + uint64_t max2 = max_vbits(velement2, element2); + + // If the minimum possible value of element1 is greater than the + // maximum possible value of element2 then element1 is surely greater + // than element2. + int is_definitely_greater = min1 > max2; + // If the maximum value of element1 less than or equal to the minimum + // value of element2 then there is no way that element1 is greater than + // element2. + int is_definitely_not_greater = max1 <= min2; + int is_definite = is_definitely_greater || is_definitely_not_greater; + // If the answer is definite then the vbits should indicate that all + // bits are known, so 0. Otherwise, all 1s. + if (!is_definite) { + res.bits.u128[word] |= (((uint64_t) -1) >> (64 - bits_per_element)) << (bits_per_element*element_in_word); + } + } + } + return res; +} diff --git a/memcheck/tests/vbit-test/vbits.h b/memcheck/tests/vbit-test/vbits.h index 0a50fab61..53ba328aa 100644 --- a/memcheck/tests/vbit-test/vbits.h +++ b/memcheck/tests/vbit-test/vbits.h @@ -92,8 +92,12 @@ int completely_defined_vbits(vbits_t); vbits_t cmpord_vbits(unsigned v1_num_bits, unsigned v2_num_bits); vbits_t cmp_eq_ne_vbits(vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2); +uint64_t min_vbits(uint64_t vbits, uint64_t value); +uint64_t max_vbits(uint64_t vbits, uint64_t value); vbits_t int_add_or_sub_vbits(int isAdd, vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2); +vbits_t cmp_gt_vbits(int is_signed, int bits_per_element, int element_count, + vbits_t vbits1, vbits_t vbits2, value_t val1, value_t val2); #endif // VBITS_H diff --git a/memcheck/tests/vbit-test/vtest.h b/memcheck/tests/vbit-test/vtest.h index c724f4142..5f2b2e03f 100644 --- a/memcheck/tests/vbit-test/vtest.h +++ b/memcheck/tests/vbit-test/vtest.h @@ -159,6 +159,15 @@ typedef enum { */ UNDEF_NARROW256_AtoB, + UNDEF_GT_S_8x16, + UNDEF_GT_S_16x8, + UNDEF_GT_S_32x4, + UNDEF_GT_S_64x2, + UNDEF_GT_U_8x16, + UNDEF_GT_U_16x8, + UNDEF_GT_U_32x4, + UNDEF_GT_U_64x2, + // For IROps I don't know anything about UNDEF_UNKNOWN } undef_t;