}
}
- if (outs1 + outs2 <= 8) {
- if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
- DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
- " two-byte literals\n", outs1, outs2);
- aux->accel_type = ACCEL_DSHUFTI;
- aux->dshufti.offset = offset;
- shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
- &aux->dshufti.lo1,
- &aux->dshufti.hi1,
- &aux->dshufti.lo2,
- &aux->dshufti.hi2);
+ if (outs1 < outs2 && outs1 <= 2) { // Heuristic from UE-438.
+ DEBUG_PRINTF("building double-shufti for %zu one-byte and %zu"
+ " two-byte literals\n", outs1, outs2);
+ aux->accel_type = ACCEL_DSHUFTI;
+ aux->dshufti.offset = offset;
+ if (shuftiBuildDoubleMasks(info.double_stop1, info.double_stop2,
+ &aux->dshufti.lo1, &aux->dshufti.hi1,
+ &aux->dshufti.lo2, &aux->dshufti.hi2)) {
return;
}
}
static
bool double_byte_ok(const escape_info &info) {
return !info.outs2_broken
- && info.outs2_single.count() + info.outs2.size() <= 8
&& info.outs2_single.count() < info.outs2.size()
&& info.outs2_single.count() <= 2 && !info.outs2.empty();
}
}
}
- if (double_byte_ok(info)) {
+ if (double_byte_ok(info)
+ && shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
+ &accel->dshufti.lo1, &accel->dshufti.hi1,
+ &accel->dshufti.lo2, &accel->dshufti.hi2)) {
accel->accel_type = ACCEL_DSHUFTI;
accel->dshufti.offset = verify_u8(info.outs2_offset);
- shuftiBuildDoubleMasks(info.outs2_single, info.outs2,
- &accel->dshufti.lo1,
- &accel->dshufti.hi1,
- &accel->dshufti.lo2,
- &accel->dshufti.hi2);
DEBUG_PRINTF("state %hu is double shufti\n", this_idx);
return;
}
static
bool double_byte_ok(const escape_info &info) {
return !info.outs2_broken
- && info.outs2_single.count() + info.outs2.size() <= 8
&& info.outs2_single.count() < info.outs2.size()
&& info.outs2_single.count() <= 2 && !info.outs2.empty();
}
return as;
}
- if (two_count > 8) {
+ if (two_count > DOUBLE_SHUFTI_LIMIT) {
if (cr_2.count() < cr_1.count()) {
as.double_cr |= cr_2;
offset = offset_in + 1;
best.offset = offset;
/* merge best single and best double */
- if (!da.double_byte.empty() && da.double_byte.size() <= 8
+ if (!da.double_byte.empty() && da.double_byte.size() <= DOUBLE_SHUFTI_LIMIT
&& da.double_cr.count() < best.cr.count()) {
best.double_byte = da.double_byte;
best.double_cr = da.double_cr;
// literals)
if (depth > 1) {
for (unsigned int i = 0; i < (depth - 1); i++) {
- if (depthReach[i].count()*depthReach[i+1].count() <= 8) {
+ if (depthReach[i].count() * depthReach[i+1].count()
+ <= DOUBLE_SHUFTI_LIMIT) {
DEBUG_PRINTF("two-byte shufti, depth %u\n", i);
*as = AccelScheme(CharReach::dot(), i);
return true;
u32 offset,
ue2::flat_set<NFAVertex> *friends);
+#define DOUBLE_SHUFTI_LIMIT 20
+
struct AccelScheme {
AccelScheme(const CharReach &cr_in, u32 offset_in)
: cr(cr_in), offset(offset_in) {
size_t a_dcount = double_cr.count();
size_t b_dcount = b.double_cr.count();
- bool feasible_double_a
- = !a.double_byte.empty() && a.double_byte.size() <= 8;
- bool feasible_double_b
- = !b.double_byte.empty() && b.double_byte.size() <= 8;
+ bool feasible_double_a = !a.double_byte.empty()
+ && a.double_byte.size() <= DOUBLE_SHUFTI_LIMIT;
+ bool feasible_double_b = !b.double_byte.empty()
+ && b.double_byte.size() <= DOUBLE_SHUFTI_LIMIT;
if (feasible_double_a != feasible_double_b) {
return feasible_double_a > feasible_double_b;