allow some prefixes that may squash the literal match to run eagerly

author Alex Coyte <a.coyte@intel.com>

Thu, 23 Jun 2016 23:28:42 +0000 (09:28 +1000)

committer Matthew Barr <matthew.barr@intel.com>

Fri, 8 Jul 2016 01:01:34 +0000 (11:01 +1000)
author Alex Coyte <a.coyte@intel.com>
Thu, 23 Jun 2016 23:28:42 +0000 (09:28 +1000)
committer Matthew Barr <matthew.barr@intel.com>
Fri, 8 Jul 2016 01:01:34 +0000 (11:01 +1000)
diff --git a/src/nfa/castle.c b/src/nfa/castle.c

index 13a44a97cf30bb7133edb1a9f89367051371a3d7..bfdcf6b54f66fa81c0944cccf37e8c8debacc660 100644 (file)
--- a/src/nfa/castle.c
+++ b/src/nfa/castle.c
@@ -979,6 +979,46 @@ char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
      return castleInAccept(c, q, report, q_cur_offset(q));
  }
  
+char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q) {
+    assert(n && q);
+    assert(n->type == CASTLE_NFA_0);
+    DEBUG_PRINTF("entry\n");
+
+    const struct Castle *c = getImplNfa(n);
+    const u64a offset = q_cur_offset(q);
+    DEBUG_PRINTF("offset=%llu\n", offset);
+
+    if (c->exclusive) {
+        u8 *active = (u8 *)q->streamState;
+        u8 *groups = active + c->groupIterOffset;
+        for (u32 i = mmbit_iterate(groups, c->numGroups, MMB_INVALID);
+             i != MMB_INVALID; i = mmbit_iterate(groups, c->numGroups, i)) {
+            u8 *cur = active + i * c->activeIdxSize;
+            const u32 activeIdx = partial_load_u32(cur, c->activeIdxSize);
+            DEBUG_PRINTF("subcastle %u\n", activeIdx);
+            const struct SubCastle *sub = getSubCastle(c, activeIdx);
+            if (subCastleInAccept(c, q, sub->report, offset, activeIdx)) {
+                return 1;
+            }
+        }
+    }
+
+    if (c->exclusive != PURE_EXCLUSIVE) {
+        const u8 *active = (const u8 *)q->streamState + c->activeOffset;
+        for (u32 i = mmbit_iterate(active, c->numRepeats, MMB_INVALID);
+             i != MMB_INVALID; i = mmbit_iterate(active, c->numRepeats, i)) {
+            DEBUG_PRINTF("subcastle %u\n", i);
+            const struct SubCastle *sub = getSubCastle(c, i);
+            if (subCastleInAccept(c, q, sub->report, offset, i)) {
+                return 1;
+            }
+        }
+    }
+
+    return 0;
+}
+
+
  char nfaExecCastle0_queueInitState(UNUSED const struct NFA *n, struct mq *q) {
      assert(n && q);
      assert(n->type == CASTLE_NFA_0);
diff --git a/src/nfa/castle.h b/src/nfa/castle.h

index 8fc3514b43c22902de26e7d8a90284c123e3d1cc..84d79097e72127e206ff540547e8c38380058808 100644 (file)
--- a/src/nfa/castle.h
+++ b/src/nfa/castle.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -44,6 +44,7 @@ char nfaExecCastle0_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecCastle0_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecCastle0_inAccept(const struct NFA *n, ReportID report,
                               struct mq *q);
+char nfaExecCastle0_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecCastle0_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecCastle0_initCompressedState(const struct NFA *n, u64a offset,
                                          void *state, u8 key);
diff --git a/src/nfa/gough.c b/src/nfa/gough.c

index c52bca065fbfcb2754de484c2666dcb7b1b6e418..3b7a115d9363b7131c70dbf0b01b10973945eeb5 100644 (file)
--- a/src/nfa/gough.c
+++ b/src/nfa/gough.c
@@ -1048,6 +1048,14 @@ char nfaExecGough16_inAccept(const struct NFA *n, ReportID report,
      return nfaExecMcClellan16_inAccept(n, report, q);
  }
  
+char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q) {
+    return nfaExecMcClellan8_inAnyAccept(n, q);
+}
+
+char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q) {
+    return nfaExecMcClellan16_inAnyAccept(n, q);
+}
+
  static
  char goughCheckEOD(const struct NFA *nfa, u16 s,
                     const struct gough_som_info *som,
diff --git a/src/nfa/gough.h b/src/nfa/gough.h

index 41d4cb5a34fa64b10262473afd547cab2d6870d7..1a7dbd74de19a630ad261ab3431e12e58a148c29 100644 (file)
--- a/src/nfa/gough.h
+++ b/src/nfa/gough.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -46,6 +46,7 @@ char nfaExecGough8_Q2(const struct NFA *n, struct mq *q, s64a end);
  char nfaExecGough8_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecGough8_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecGough8_inAccept(const struct NFA *n, ReportID report, struct mq *q);
+char nfaExecGough8_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecGough8_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecGough8_initCompressedState(const struct NFA *n, u64a offset,
                                         void *state, u8 key);
@@ -68,6 +69,7 @@ char nfaExecGough16_Q2(const struct NFA *n, struct mq *q, s64a end);
  char nfaExecGough16_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecGough16_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecGough16_inAccept(const struct NFA *n, ReportID report, struct mq *q);
+char nfaExecGough16_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecGough16_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecGough16_initCompressedState(const struct NFA *n, u64a offset,
                                          void *state, u8 key);
diff --git a/src/nfa/lbr.h b/src/nfa/lbr.h

index b770477df1e351d99c6692da886bf9411e9aed83..a9e42046db31bfcb3c9666340da95febeeabd375 100644 (file)
--- a/src/nfa/lbr.h
+++ b/src/nfa/lbr.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -46,6 +46,7 @@ char nfaExecLbrDot_Q2(const struct NFA *n, struct mq *q, s64a end);
  char nfaExecLbrDot_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecLbrDot_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecLbrDot_inAccept(const struct NFA *n, ReportID report, struct mq *q);
+char nfaExecLbrDot_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecLbrDot_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecLbrDot_initCompressedState(const struct NFA *n, u64a offset,
                                         void *state, u8 key);
@@ -66,6 +67,7 @@ char nfaExecLbrVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecLbrVerm_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecLbrVerm_inAccept(const struct NFA *n, ReportID report,
                               struct mq *q);
+char nfaExecLbrVerm_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecLbrVerm_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecLbrVerm_initCompressedState(const struct NFA *n, u64a offset,
                                          void *state, u8 key);
@@ -86,6 +88,7 @@ char nfaExecLbrNVerm_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecLbrNVerm_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecLbrNVerm_inAccept(const struct NFA *n, ReportID report,
                                struct mq *q);
+char nfaExecLbrNVerm_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecLbrNVerm_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecLbrNVerm_initCompressedState(const struct NFA *n, u64a offset,
                                           void *state, u8 key);
@@ -106,6 +109,7 @@ char nfaExecLbrShuf_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecLbrShuf_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecLbrShuf_inAccept(const struct NFA *n, ReportID report,
                               struct mq *q);
+char nfaExecLbrShuf_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecLbrShuf_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecLbrShuf_initCompressedState(const struct NFA *n, u64a offset,
                                          void *state, u8 key);
@@ -126,6 +130,7 @@ char nfaExecLbrTruf_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecLbrTruf_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecLbrTruf_inAccept(const struct NFA *n, ReportID report,
                               struct mq *q);
+char nfaExecLbrTruf_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecLbrTruf_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecLbrTruf_initCompressedState(const struct NFA *n, u64a offset,
                                          void *state, u8 key);
diff --git a/src/nfa/lbr_common_impl.h b/src/nfa/lbr_common_impl.h

index 917a8e9166251cf5ab06e7a3eb982125932420c0..4fb8f62a58cd0e613fcc5ca7a42316bad3133344 100644 (file)
--- a/src/nfa/lbr_common_impl.h
+++ b/src/nfa/lbr_common_impl.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -94,6 +94,15 @@ char JOIN(ENGINE_EXEC_NAME, _inAccept)(const struct NFA *nfa,
      return lbrInAccept(l, lstate, q->streamState, offset, report);
  }
  
+char JOIN(ENGINE_EXEC_NAME, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
+    assert(nfa && q);
+    assert(isLbrType(nfa->type));
+    DEBUG_PRINTF("entry\n");
+
+    const struct lbr_common *l = getImplNfa(nfa);
+    return JOIN(ENGINE_EXEC_NAME, _inAccept)(nfa, l->report, q);
+}
+
  char JOIN(ENGINE_EXEC_NAME, _queueInitState)(const struct NFA *nfa,
                                               struct mq *q) {
      assert(nfa && q);
diff --git a/src/nfa/limex.h b/src/nfa/limex.h

index 57ee46df3c0627c97c7f80f53259e4f15373aa34..3d4d258b6c03ed285eff77b0dfe6de99255e7e48 100644 (file)
--- a/src/nfa/limex.h
+++ b/src/nfa/limex.h
@@ -60,6 +60,7 @@ extern "C"
      char gf_name##_reportCurrent(const struct NFA *n, struct mq *q);           \
      char gf_name##_inAccept(const struct NFA *n, ReportID report,              \
                              struct mq *q);                                     \
+    char gf_name##_inAnyAccept(const struct NFA *n, struct mq *q);             \
      char gf_name##_queueInitState(const struct NFA *n, struct mq *q);          \
      char gf_name##_initCompressedState(const struct NFA *n, u64a offset,       \
                                         void *state, u8 key);                   \
diff --git a/src/nfa/limex_common_impl.h b/src/nfa/limex_common_impl.h

index 6e4b7718c0851dbb8cbcded806db2c1cd41655d5..68e0c0ade2c70ca27329d9a4a2718c67002b525c 100644 (file)
--- a/src/nfa/limex_common_impl.h
+++ b/src/nfa/limex_common_impl.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -40,6 +40,7 @@
  #define TESTEOD_FN          JOIN(moNfaTestEod, SIZE)
  #define TESTEOD_REV_FN      JOIN(moNfaRevTestEod, SIZE)
  #define LIMEX_INACCEPT_FN   JOIN(limexInAccept, SIZE)
+#define LIMEX_INANYACCEPT_FN   JOIN(limexInAnyAccept, SIZE)
  #define EXPIRE_ESTATE_FN    JOIN(limexExpireExtendedState, SIZE)
  #define REPORTCURRENT_FN    JOIN(moNfaReportCurrent, SIZE)
  #define INITIAL_FN          JOIN(moNfaInitial, SIZE)
@@ -374,11 +375,32 @@ char LIMEX_INACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
      return 0;
  }
  
+static really_inline
+char LIMEX_INANYACCEPT_FN(const IMPL_NFA_T *limex, STATE_T state,
+                          union RepeatControl *repeat_ctrl, char *repeat_state,
+                          u64a offset) {
+    assert(limex);
+
+    const STATE_T acceptMask = LOAD_STATE(&limex->accept);
+    STATE_T accstate = AND_STATE(state, acceptMask);
+
+    // Are we in an accept state?
+    if (ISZERO_STATE(accstate)) {
+        DEBUG_PRINTF("no accept states are on\n");
+        return 0;
+    }
+
+    SQUASH_UNTUG_BR_FN(limex, repeat_ctrl, repeat_state, offset, &accstate);
+
+    return ISNONZERO_STATE(accstate);
+}
+
  #undef TESTEOD_FN
  #undef TESTEOD_REV_FN
  #undef REPORTCURRENT_FN
  #undef EXPIRE_ESTATE_FN
  #undef LIMEX_INACCEPT_FN
+#undef LIMEX_INANYACCEPT_FN
  #undef INITIAL_FN
  #undef TOP_FN
  #undef TOPN_FN
diff --git a/src/nfa/limex_compile.cpp b/src/nfa/limex_compile.cpp

index b8857922fdab9a9377a14166c0ed8c9b64f2d129..79e6db1c79ad6c0148710085516fa138554ee680 100644 (file)
--- a/src/nfa/limex_compile.cpp
+++ b/src/nfa/limex_compile.cpp
@@ -1008,7 +1008,8 @@ void findMaskedCompressionStates(const build_info &args,
      // Suffixes and outfixes can mask out leaf states, which should all be
      // accepts. Right now we can only do this when there is nothing in initDs,
      // as we switch that on unconditionally in the expand call.
-    if (generates_callbacks(h) && !hasInitDsStates(h, args.state_ids)) {
+    if (!inspects_states_for_accepts(h)
+        && !hasInitDsStates(h, args.state_ids)) {
          NFAStateSet nonleaf(args.num_states);
          for (const auto &e : edges_range(h)) {
              u32 from = args.state_ids.at(source(e, h));
diff --git a/src/nfa/limex_runtime_impl.h b/src/nfa/limex_runtime_impl.h

index 9924ef8c0a64bfea68b7f8961e23ae6ef995e8a8..19a5ebd3ee4c132958e9a5ac7316dd3ea4c24842 100644 (file)
--- a/src/nfa/limex_runtime_impl.h
+++ b/src/nfa/limex_runtime_impl.h
@@ -650,7 +650,27 @@ char JOIN(LIMEX_API_ROOT, _Q2)(const struct NFA *n, struct mq *q, s64a end) {
          ep = MIN(ep, end_abs);
          assert(ep >= sp);
  
-        assert(sp >= offset); // We no longer do history buffer scans here.
+        if (sp < offset) {
+            DEBUG_PRINTF("HISTORY BUFFER SCAN\n");
+            assert(offset - sp <= q->hlength);
+            u64a local_ep = MIN(offset, ep);
+            u64a final_look = 0;
+            /* we are starting inside the history buffer */
+            if (STREAMFIRST_FN(limex, q->history + q->hlength + sp - offset,
+                               local_ep - sp, &ctx, sp,
+                               &final_look) == MO_HALT_MATCHING) {
+                DEBUG_PRINTF("final_look:%llu sp:%llu end_abs:%llu "
+                             "offset:%llu\n", final_look, sp, end_abs, offset);
+                assert(q->cur);
+                q->cur--;
+                q->items[q->cur].type = MQE_START;
+                q->items[q->cur].location = sp + final_look - offset;
+                STORE_STATE(q->state, LOAD_STATE(&ctx.s));
+                return MO_MATCHES_PENDING;
+            }
+
+            sp = local_ep;
+        }
  
          if (sp >= ep) {
              goto scan_done;
@@ -868,6 +888,21 @@ char JOIN(LIMEX_API_ROOT, _inAccept)(const struct NFA *nfa,
                                       offset, report);
  }
  
+char JOIN(LIMEX_API_ROOT, _inAnyAccept)(const struct NFA *nfa, struct mq *q) {
+    assert(nfa && q);
+    assert(q->state && q->streamState);
+
+    const IMPL_NFA_T *limex = getImplNfa(nfa);
+    union RepeatControl *repeat_ctrl =
+        getRepeatControlBase(q->state, sizeof(STATE_T));
+    char *repeat_state = q->streamState + limex->stateSize;
+    STATE_T state = LOAD_STATE(q->state);
+    u64a offset = q->offset + q_last_loc(q) + 1;
+
+    return JOIN(limexInAnyAccept, SIZE)(limex, state, repeat_ctrl, repeat_state,
+                                        offset);
+}
+
  enum nfa_zombie_status JOIN(LIMEX_API_ROOT, _zombie_status)(
                                                           const struct NFA *nfa,
                                                           struct mq *q,
diff --git a/src/nfa/mcclellan.c b/src/nfa/mcclellan.c

index 314e88e750173bbfe578275057b5fa728cc11692..ac26c6a103f5382d632415208911e9e171e50c38 100644 (file)
--- a/src/nfa/mcclellan.c
+++ b/src/nfa/mcclellan.c
@@ -850,7 +850,7 @@ char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q) {
  }
  
  char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q) {
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      NfaCallback cb = q->cb;
      void *ctxt = q->context;
      u16 s = *(u16 *)q->state;
@@ -905,7 +905,7 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
                                  struct mq *q) {
      assert(n && q);
  
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      u8 s = *(u8 *)q->state;
      DEBUG_PRINTF("checking accepts for %hhu\n", s);
      if (s < m->accept_limit_8) {
@@ -915,25 +915,45 @@ char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
      return mcclellanHasAccept(m, get_aux(m, s), report);
  }
  
+char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q) {
+    assert(n && q);
+
+    const struct mcclellan *m = getImplNfa(n);
+    u8 s = *(u8 *)q->state;
+    DEBUG_PRINTF("checking accepts for %hhu\n", s);
+    assert(s < m->accept_limit_8 || get_aux(m, s)->accept);
+
+    return s >= m->accept_limit_8;
+}
  
  char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
                                   struct mq *q) {
      assert(n && q);
  
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      u16 s = *(u16 *)q->state;
      DEBUG_PRINTF("checking accepts for %hu\n", s);
  
      return mcclellanHasAccept(m, get_aux(m, s), report);
  }
  
+char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q) {
+    assert(n && q);
+
+    const struct mcclellan *m = getImplNfa(n);
+    u16 s = *(u16 *)q->state;
+    DEBUG_PRINTF("checking accepts for %hu\n", s);
+
+    return !!get_aux(m, s)->accept;
+}
+
  char nfaExecMcClellan8_Q2(const struct NFA *n, struct mq *q, s64a end) {
      u64a offset = q->offset;
      const u8 *buffer = q->buffer;
      NfaCallback cb = q->cb;
      void *context = q->context;
      assert(n->type == MCCLELLAN_NFA_8);
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      const u8 *hend = q->history + q->hlength;
  
      return nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
@@ -947,7 +967,7 @@ char nfaExecMcClellan16_Q2(const struct NFA *n, struct mq *q, s64a end) {
      NfaCallback cb = q->cb;
      void *context = q->context;
      assert(n->type == MCCLELLAN_NFA_16);
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      const u8 *hend = q->history + q->hlength;
  
      return nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
@@ -961,7 +981,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report) {
      NfaCallback cb = q->cb;
      void *context = q->context;
      assert(n->type == MCCLELLAN_NFA_8);
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      const u8 *hend = q->history + q->hlength;
  
      char rv = nfaExecMcClellan8_Q2i(n, offset, buffer, hend, cb, context, q,
@@ -980,7 +1000,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
      NfaCallback cb = q->cb;
      void *context = q->context;
      assert(n->type == MCCLELLAN_NFA_16);
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(n);
+    const struct mcclellan *m = getImplNfa(n);
      const u8 *hend = q->history + q->hlength;
  
      char rv = nfaExecMcClellan16_Q2i(n, offset, buffer, hend, cb, context, q,
@@ -996,7 +1016,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report) {
  
  char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
                                             void *state, UNUSED u8 key) {
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+    const struct mcclellan *m = getImplNfa(nfa);
      u8 s = offset ? m->start_floating : m->start_anchored;
      if (s) {
          *(u8 *)state = s;
@@ -1007,7 +1027,7 @@ char nfaExecMcClellan8_initCompressedState(const struct NFA *nfa, u64a offset,
  
  char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
                                              void *state, UNUSED u8 key) {
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+    const struct mcclellan *m = getImplNfa(nfa);
      u16 s = offset ? m->start_floating : m->start_anchored;
      if (s) {
          unaligned_store_u16(state, s);
@@ -1019,7 +1039,7 @@ char nfaExecMcClellan16_initCompressedState(const struct NFA *nfa, u64a offset,
  void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
                                    const u8 *buf, char top, size_t start_off,
                                    size_t len, NfaCallback cb, void *ctxt) {
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+    const struct mcclellan *m = getImplNfa(nfa);
  
      u8 s = top ? m->start_anchored : *(u8 *)state;
  
@@ -1037,7 +1057,7 @@ void nfaExecMcClellan8_SimpStream(const struct NFA *nfa, char *state,
  void nfaExecMcClellan16_SimpStream(const struct NFA *nfa, char *state,
                                     const u8 *buf, char top, size_t start_off,
                                     size_t len, NfaCallback cb, void *ctxt) {
-    const struct mcclellan *m = (const struct mcclellan *)getImplNfa(nfa);
+    const struct mcclellan *m = getImplNfa(nfa);
  
      u16 s = top ? m->start_anchored : unaligned_load_u16(state);
  
diff --git a/src/nfa/mcclellan.h b/src/nfa/mcclellan.h

index 6b4ec2d55d7e43cc66078b088dd507fc0801144c..677265f5b3df6f2d77ca58c59a3c3c7f25ca00ec 100644 (file)
--- a/src/nfa/mcclellan.h
+++ b/src/nfa/mcclellan.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -47,6 +47,7 @@ char nfaExecMcClellan8_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecMcClellan8_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecMcClellan8_inAccept(const struct NFA *n, ReportID report,
                                  struct mq *q);
+char nfaExecMcClellan8_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecMcClellan8_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecMcClellan8_initCompressedState(const struct NFA *n, u64a offset,
                                             void *state, u8 key);
@@ -70,6 +71,7 @@ char nfaExecMcClellan16_QR(const struct NFA *n, struct mq *q, ReportID report);
  char nfaExecMcClellan16_reportCurrent(const struct NFA *n, struct mq *q);
  char nfaExecMcClellan16_inAccept(const struct NFA *n, ReportID report,
                                   struct mq *q);
+char nfaExecMcClellan16_inAnyAccept(const struct NFA *n, struct mq *q);
  char nfaExecMcClellan16_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecMcClellan16_initCompressedState(const struct NFA *n, u64a offset,
                                              void *state, u8 key);
diff --git a/src/nfa/mcclellancompile_util.cpp b/src/nfa/mcclellancompile_util.cpp

index 234574d84d41010a405a6891dc8ff36f986e816a..2f1ffa022971dff9243ef97c64ca54dd61b28864 100644 (file)
--- a/src/nfa/mcclellancompile_util.cpp
+++ b/src/nfa/mcclellancompile_util.cpp
@@ -395,4 +395,36 @@ dstate_id_t get_sds_or_proxy(const raw_dfa &raw) {
      }
  }
  
+static
+bool can_die_early(const raw_dfa &raw, dstate_id_t s,
+                   map<dstate_id_t, u32> &visited, u32 age_limit) {
+    if (contains(visited, s) && visited[s] >= age_limit) {
+        /* we have already visited (or are in the process of visiting) here with
+         * a looser limit. */
+        return false;
+    }
+    visited[s] = age_limit;
+
+    if (s == DEAD_STATE) {
+        return true;
+    }
+
+    if (age_limit == 0) {
+        return false;
+    }
+
+    for (const auto &next : raw.states[s].next) {
+        if (can_die_early(raw, next, visited, age_limit - 1)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool can_die_early(const raw_dfa &raw, u32 age_limit) {
+    map<dstate_id_t, u32> visited;
+    return can_die_early(raw, raw.start_anchored, visited, age_limit);
+}
+
  } // namespace ue2
diff --git a/src/nfa/mcclellancompile_util.h b/src/nfa/mcclellancompile_util.h

index 7b6c033a9c09b1b2e24b5b7fefc1d9582c460bb6..3d3ee2e7bc53e3f072a3024bbc7b34d327357e76 100644 (file)
--- a/src/nfa/mcclellancompile_util.h
+++ b/src/nfa/mcclellancompile_util.h
@@ -57,6 +57,8 @@ size_t hash_dfa(const raw_dfa &rdfa);
  
  dstate_id_t get_sds_or_proxy(const raw_dfa &raw);
  
+bool can_die_early(const raw_dfa &raw, u32 age_limit);
+
  } // namespace ue2
  
  #endif
diff --git a/src/nfa/mpv.h b/src/nfa/mpv.h

index dc5dad6f90522341d407b4e9f6966b243a5e1484..a3f9071940c93bef8410bf83d627ba78b7b94f63 100644 (file)
--- a/src/nfa/mpv.h
+++ b/src/nfa/mpv.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -36,7 +36,6 @@ struct NFA;
  
  char nfaExecMpv0_Q(const struct NFA *n, struct mq *q, s64a end);
  char nfaExecMpv0_reportCurrent(const struct NFA *n, struct mq *q);
-char nfaExecMpv0_inAccept(const struct NFA *n, ReportID report, struct mq *q);
  char nfaExecMpv0_queueInitState(const struct NFA *n, struct mq *q);
  char nfaExecMpv0_initCompressedState(const struct NFA *n, u64a offset,
                                       void *state, u8 key);
@@ -47,6 +46,7 @@ char nfaExecMpv0_expandState(const struct NFA *nfa, void *dest, const void *src,
  
  #define nfaExecMpv0_testEOD NFA_API_NO_IMPL
  #define nfaExecMpv0_inAccept NFA_API_NO_IMPL
+#define nfaExecMpv0_inAnyAccept NFA_API_NO_IMPL
  #define nfaExecMpv0_QR NFA_API_NO_IMPL
  #define nfaExecMpv0_Q2 NFA_API_NO_IMPL /* for non-chained suffixes. */
  #define nfaExecMpv0_B_Reverse NFA_API_NO_IMPL
diff --git a/src/nfa/nfa_api.h b/src/nfa/nfa_api.h

index 84f5c4a01ae46ed27d5cc53f0f8ee36e81872500..dad3894ad43df169e4d27f39bf9e4a03f6f39908 100644 (file)
--- a/src/nfa/nfa_api.h
+++ b/src/nfa/nfa_api.h
@@ -175,10 +175,16 @@ char nfaReportCurrentMatches(const struct NFA *nfa, struct mq *q);
   */
  char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
  
+/**
+ * Returns non-zero if the NFA is in any accept state regardless of report
+ * ID.
+ */
+char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q);
+
  /**
   * Process the queued commands on the given NFA up to end or the first match.
   *
- * Note: This version is meant for rose prefix NFAs:
+ * Note: This version is meant for rose prefix/infix NFAs:
   *  - never uses a callback
   *  - loading of state at a point in history is not special cased
   *
@@ -187,9 +193,9 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q);
   *        end with some variant of end. The location field of the events must
   *        be monotonically increasing. If not all the data was processed during
   *        the call, the queue is updated to reflect the remaining work.
- * @param report we are interested in, if set at the end of the scan returns
- *        @ref MO_MATCHES_PENDING. If no report is desired, MO_INVALID_IDX should
- *        be passed in.
+ * @param report we are interested in. If the given report will be raised at
+ *        the end location, the function returns @ref MO_MATCHES_PENDING. If no
+ *        match information is desired, MO_INVALID_IDX should be passed in.
   * @return @ref MO_ALIVE if the nfa is still active with no matches pending,
   *         and @ref MO_MATCHES_PENDING if there are matches pending, 0 if not
   *         alive
diff --git a/src/nfa/nfa_api_dispatch.c b/src/nfa/nfa_api_dispatch.c

index 95b1898e6c8d4445897f06001e4a82936fdadb57..9591cad528860661aebbcdd776723368296873cc 100644 (file)
--- a/src/nfa/nfa_api_dispatch.c
+++ b/src/nfa/nfa_api_dispatch.c
@@ -228,7 +228,6 @@ char nfaQueueExecToMatch(const struct NFA *nfa, struct mq *q, s64a end) {
  
      assert(q);
      assert(end >= 0);
-    assert(q->context);
      assert(q->state);
      assert(q->cur < q->end);
      assert(q->end <= MAX_MQE_LEN);
@@ -285,6 +284,11 @@ char nfaInAcceptState(const struct NFA *nfa, ReportID report, struct mq *q) {
      return 0;
  }
  
+char nfaInAnyAcceptState(const struct NFA *nfa, struct mq *q) {
+    DISPATCH_BY_NFA_TYPE(_inAnyAccept(nfa, q));
+    return 0;
+}
+
  char nfaQueueExecRose(const struct NFA *nfa, struct mq *q, ReportID r) {
      DEBUG_PRINTF("nfa=%p\n", nfa);
  #ifdef DEBUG
diff --git a/src/nfa/nfa_kind.h b/src/nfa/nfa_kind.h

index 46d0bc4c121a18da8d9d20ccfcd35f32080d9ff8..adc7045f8b94fce65a3b0388648aa05957978fd3 100644 (file)
--- a/src/nfa/nfa_kind.h
+++ b/src/nfa/nfa_kind.h
@@ -47,6 +47,7 @@ enum nfa_kind {
      NFA_OUTFIX,  //!< "outfix" nfa not triggered by external events
      NFA_OUTFIX_RAW, //!< "outfix", but with unmanaged reports
      NFA_REV_PREFIX, //! reverse running prefixes (for som)
+    NFA_EAGER_PREFIX, //!< rose prefix that is also run up to matches
  };
  
  /** \brief True if this kind of engine is triggered by a top event. */
@@ -63,8 +64,10 @@ bool is_triggered(enum nfa_kind k) {
  }
  
  /**
- * \brief True if this kind of engine generates callback events when it
- * enters accept states.
+ * \brief True if this kind of engine generates actively checks for accept
+ * states either to halt matching or to raise a callback. Only these engines
+ * generated with this property should call nfaQueueExec() or
+ * nfaQueueExecToMatch().
   */
  inline
  bool generates_callbacks(enum nfa_kind k) {
@@ -73,6 +76,24 @@ bool generates_callbacks(enum nfa_kind k) {
      case NFA_OUTFIX:
      case NFA_OUTFIX_RAW:
      case NFA_REV_PREFIX:
+    case NFA_EAGER_PREFIX:
+        return true;
+    default:
+        return false;
+    }
+}
+
+/**
+ * \brief True if this kind of engine has its state inspected to see if it is in
+ * an accept state. Engines generated with this property will commonly call
+ * nfaQueueExecRose(), nfaInAcceptState(), and nfaInAnyAcceptState().
+ */
+inline
+bool inspects_states_for_accepts(enum nfa_kind k) {
+    switch (k) {
+    case NFA_PREFIX:
+    case NFA_INFIX:
+    case NFA_EAGER_PREFIX:
          return true;
      default:
          return false;
diff --git a/src/nfagraph/ng_execute.cpp b/src/nfagraph/ng_execute.cpp

index 92bef73796398f9e1f4735c0a008b1c13d693ff3..46307cd571ef705a325a1b81fc8db7522847b26b 100644 (file)
--- a/src/nfagraph/ng_execute.cpp
+++ b/src/nfagraph/ng_execute.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -324,4 +324,49 @@ flat_set<NFAVertex> execute_graph(const NGHolder &running_g,
                           initial_states);
  }
  
+static
+bool can_die_early(const NGHolder &g, const vector<StateInfo> &info,
+                   const dynamic_bitset<> &s,
+                   map<dynamic_bitset<>, u32> &visited, u32 age_limit) {
+    if (contains(visited, s) && visited[s] >= age_limit) {
+        /* we have already (or are in the process) of visiting here with a
+         * looser limit. */
+        return false;
+    }
+    visited[s] = age_limit;
+
+    if (s.none()) {
+        DEBUG_PRINTF("dead\n");
+        return true;
+    }
+
+    if (age_limit == 0) {
+        return false;
+    }
+
+    dynamic_bitset<> all_succ(s.size());
+    step(g, info, s, &all_succ);
+    all_succ.reset(NODE_START_DOTSTAR);
+
+    for (u32 i = 0; i < N_CHARS; i++) {
+        dynamic_bitset<> next = all_succ;
+        filter_by_reach(info, &next, CharReach(i));
+        if (can_die_early(g, info, next, visited, age_limit - 1)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+bool can_die_early(const NGHolder &g, u32 age_limit) {
+    if (proper_out_degree(g.startDs, g)) {
+        return false;
+    }
+    const vector<StateInfo> &info = makeInfoTable(g);
+    map<dynamic_bitset<>, u32> visited;
+    return can_die_early(g, info, makeStateBitset(g, {g.start}), visited,
+                         age_limit);
+}
+
  } // namespace ue2
diff --git a/src/nfagraph/ng_execute.h b/src/nfagraph/ng_execute.h

index e2c7c72d041a2580957eb2335b26a3c59a44279c..bdcfecfd42a35ff6822b8cb4dd4658a6a0ae6703 100644 (file)
--- a/src/nfagraph/ng_execute.h
+++ b/src/nfagraph/ng_execute.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -64,6 +64,9 @@ flat_set<NFAVertex> execute_graph(const NGHolder &g, const NGHolder &input_dag,
                                    const flat_set<NFAVertex> &input_start_states,
                                    const flat_set<NFAVertex> &initial);
  
+/* returns true if it is possible for the nfa to die within age_limit bytes */
+bool can_die_early(const NGHolder &g, u32 age_limit);
+
  } // namespace ue2
  
  #endif
diff --git a/src/nfagraph/ng_holder.h b/src/nfagraph/ng_holder.h

index 3243f665c997efd19795ece57d1192ffcce6ca68..07f21d0fab01e0d4004eaf7378d0214843d0ca8b 100644 (file)
--- a/src/nfagraph/ng_holder.h
+++ b/src/nfagraph/ng_holder.h
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -315,15 +315,26 @@ void remove_edges(const Container &c, NGHolder &h, bool renumber = true) {
      remove_edges(c.begin(), c.end(), h, renumber);
  }
  
-static UNUSED
+inline
  bool is_triggered(const NGHolder &g) {
      return is_triggered(g.kind);
  }
  
-static UNUSED
+inline
  bool generates_callbacks(const NGHolder &g) {
      return generates_callbacks(g.kind);
  }
+
+inline
+bool has_managed_reports(const NGHolder &g) {
+    return has_managed_reports(g.kind);
+}
+
+inline
+bool inspects_states_for_accepts(const NGHolder &g) {
+    return inspects_states_for_accepts(g.kind);
+}
+
  } // namespace ue2
  
  #endif
diff --git a/src/nfagraph/ng_limex.cpp b/src/nfagraph/ng_limex.cpp

index 713fe370b2227266a784ecd5a50402b712c32b65..af7779ba6224b26a8cdbf4cfe78f9ec0ae8fbadc 100644 (file)
--- a/src/nfagraph/ng_limex.cpp
+++ b/src/nfagraph/ng_limex.cpp
@@ -373,7 +373,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
               const map<u32, vector<vector<CharReach>>> &triggers,
               bool compress_state, bool do_accel, bool impl_test_only, u32 hint,
               const CompileContext &cc) {
-    if (!generates_callbacks(h_in)) {
+    if (!has_managed_reports(h_in)) {
          rm = nullptr;
      } else {
          assert(rm);
@@ -413,7 +413,7 @@ constructNFA(const NGHolder &h_in, const ReportManager *rm,
  
      set<NFAVertex> zombies = findZombies(*h, br_cyclic, state_ids, cc);
  
-    if (generates_callbacks(*h)) {
+    if (has_managed_reports(*h)) {
          assert(rm);
          remapReportsToPrograms(*h, *rm);
      }
@@ -508,7 +508,7 @@ u32 isImplementableNFA(const NGHolder &g, const ReportManager *rm,
          return true;
      }
  
-    if (!generates_callbacks(g)) {
+    if (!has_managed_reports(g)) {
          rm = nullptr;
      } else {
          assert(rm);
@@ -547,7 +547,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm
  
      removeRedundancy(g, som);
  
-    if (rm && generates_callbacks(g)) {
+    if (rm && has_managed_reports(g)) {
          pruneHighlanderDominated(g, *rm);
      }
  
@@ -560,7 +560,7 @@ void reduceImplementableGraph(NGHolder &g, som_type som, const ReportManager *rm
  
  u32 countAccelStates(const NGHolder &g, const ReportManager *rm,
                       const CompileContext &cc) {
-    if (!generates_callbacks(g)) {
+    if (!has_managed_reports(g)) {
          rm = nullptr;
      } else {
          assert(rm);
diff --git a/src/nfagraph/ng_mcclellan.cpp b/src/nfagraph/ng_mcclellan.cpp

index b1c6ff967bf67a3df5ba4c550001ddf44358303d..024cf2c182751191e26948ccd462de58ec8c7902 100644 (file)
--- a/src/nfagraph/ng_mcclellan.cpp
+++ b/src/nfagraph/ng_mcclellan.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -531,9 +531,9 @@ unique_ptr<raw_dfa> buildMcClellan(const NGHolder &graph,
      DEBUG_PRINTF("attempting to build ?%d? mcclellan\n", (int)graph.kind);
      assert(allMatchStatesHaveReports(graph));
  
-    bool prunable = grey.highlanderPruneDFA && generates_callbacks(graph);
-    assert(rm || !generates_callbacks(graph));
-    if (!generates_callbacks(graph)) {
+    bool prunable = grey.highlanderPruneDFA && has_managed_reports(graph);
+    assert(rm || !has_managed_reports(graph));
+    if (!has_managed_reports(graph)) {
          rm = nullptr;
      }
  
diff --git a/src/nfagraph/ng_split.cpp b/src/nfagraph/ng_split.cpp

index 42157e1eb37e9d44bd86b0c2af5c673e091d3377..751501369a4d0001d3789c32f4d2340c4998e467 100644 (file)
--- a/src/nfagraph/ng_split.cpp
+++ b/src/nfagraph/ng_split.cpp
@@ -1,5 +1,5 @@
  /*
- * Copyright (c) 2015, Intel Corporation
+ * Copyright (c) 2015-2016, Intel Corporation
   *
   * Redistribution and use in source and binary forms, with or without
   * modification, are permitted provided that the following conditions are met:
@@ -112,6 +112,12 @@ void splitLHS(const NGHolder &base, const vector<NFAVertex> &pivots,
      case NFA_SUFFIX:
          lhs->kind = NFA_INFIX;
          break;
+    case NFA_EAGER_PREFIX:
+        /* Current code should not be assigning eager until well after all the
+         * splitting is done. */
+        assert(0);
+        lhs->kind = NFA_EAGER_PREFIX;
+        break;
      case NFA_REV_PREFIX:
      case NFA_OUTFIX_RAW:
          assert(0);
@@ -154,6 +160,12 @@ void splitRHS(const NGHolder &base, const vector<NFAVertex> &pivots,
      case NFA_OUTFIX:
          rhs->kind = NFA_SUFFIX;
          break;
+    case NFA_EAGER_PREFIX:
+        /* Current code should not be assigning eager until well after all the
+         * splitting is done. */
+        assert(0);
+        rhs->kind = NFA_INFIX;
+        break;
      case NFA_REV_PREFIX:
      case NFA_OUTFIX_RAW:
          assert(0);
diff --git a/src/rose/block.c b/src/rose/block.c

index 55323c2e57ce2a111428e5ead370476a347032b8..a40d229be9018df41d27ee5c1a505af97c29ca92 100644 (file)
--- a/src/rose/block.c
+++ b/src/rose/block.c
@@ -266,6 +266,86 @@ int roseBlockFloating(const struct RoseEngine *t, struct hs_scratch *scratch) {
      return can_stop_matching(scratch);
  }
  
+static rose_inline
+void runEagerPrefixesBlock(const struct RoseEngine *t,
+                           struct hs_scratch *scratch) {
+    if (!t->eagerIterOffset) {
+        return;
+    }
+
+    char *state = scratch->core_info.state;
+    u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
+                                             * left_table */
+    const u32 arCount = t->activeLeftCount;
+    const u32 qCount = t->queueCount;
+    const struct LeftNfaInfo *left_table = getLeftTable(t);
+    const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset);
+
+    struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
+
+    u32 idx = 0;
+    u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state);
+    for (; ri != MMB_INVALID;
+           ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) {
+        const struct LeftNfaInfo *left = left_table + ri;
+        u32 qi = ri + t->leftfixBeginQueue;
+        DEBUG_PRINTF("leftfix %u/%u, maxLag=%u\n", ri, arCount, left->maxLag);
+
+        assert(!fatbit_isset(scratch->aqa, qCount, qi));
+        assert(left->eager);
+        assert(!left->infix);
+
+        struct mq *q = scratch->queues + qi;
+        const struct NFA *nfa = getNfaByQueue(t, qi);
+
+        if (scratch->core_info.len < nfa->minWidth) {
+            /* we know that there is not enough data for this to ever match, so
+             * we can immediately squash/ */
+            mmbit_unset(ara, arCount, ri);
+            scratch->tctxt.groups &= left->squash_mask;
+        }
+
+        s64a loc = MIN(scratch->core_info.len, EAGER_STOP_OFFSET);
+
+        fatbit_set(scratch->aqa, qCount, qi);
+        initRoseQueue(t, qi, left, scratch);
+
+        pushQueueAt(q, 0, MQE_START, 0);
+        pushQueueAt(q, 1, MQE_TOP, 0);
+        pushQueueAt(q, 2, MQE_END, loc);
+        nfaQueueInitState(nfa, q);
+
+        char alive = nfaQueueExecToMatch(q->nfa, q, loc);
+
+        if (!alive) {
+            DEBUG_PRINTF("queue %u dead, squashing\n", qi);
+            mmbit_unset(ara, arCount, ri);
+            fatbit_unset(scratch->aqa, qCount, qi);
+            scratch->tctxt.groups &= left->squash_mask;
+        } else if (q->cur == q->end) {
+            assert(alive != MO_MATCHES_PENDING);
+            if (loc == (s64a)scratch->core_info.len) {
+                /* We know that the prefix does not match in the block so we
+                 * can squash the groups anyway even though it did not die */
+                /* TODO: if we knew the minimum lag the leftfix is checked at we
+                 * could make this check tighter */
+                DEBUG_PRINTF("queue %u has no match in block, squashing\n", qi);
+                mmbit_unset(ara, arCount, ri);
+                fatbit_unset(scratch->aqa, qCount, qi);
+                scratch->tctxt.groups &= left->squash_mask;
+            } else {
+                DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+                q->cur = q->end = 0;
+                pushQueueAt(q, 0, MQE_START, loc);
+            }
+        } else {
+            assert(alive == MO_MATCHES_PENDING);
+            DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
+            q->end--; /* remove end item */
+        }
+    }
+}
+
  void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
      assert(t);
      assert(scratch);
@@ -314,6 +394,8 @@ void roseBlockExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
          hwlmExec(sbtable, scratch->core_info.buf, sblen, 0, roseCallback,
                   scratch, tctxt->groups);
      } else {
+        runEagerPrefixesBlock(t, scratch);
+
          if (roseBlockAnchored(t, scratch)) {
              return;
          }
diff --git a/src/rose/program_runtime.h b/src/rose/program_runtime.h

index 860f7599cce9e18d280ff18f56dc2c21becb3efd..3794ac3f3bf0cc90a4327159a9cdfd8005377a81 100644 (file)
--- a/src/rose/program_runtime.h
+++ b/src/rose/program_runtime.h
@@ -424,7 +424,7 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch,
      }
  
      s64a loc = (s64a)end - ci->buf_offset - leftfixLag;
-    assert(loc >= q_cur_loc(q));
+    assert(loc >= q_cur_loc(q) || left->eager);
      assert(leftfixReport != MO_INVALID_IDX);
  
      if (!is_infix && left->transient) {
@@ -471,7 +471,13 @@ char roseTestLeftfix(const struct RoseEngine *t, struct hs_scratch *scratch,
          DEBUG_PRINTF("checking for report %u\n", leftfixReport);
          DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
          return rv == MO_MATCHES_PENDING;
+    } else if (q_cur_loc(q) > loc) {
+        /* an eager leftfix may have already progressed past loc if there is no
+         * match at loc. */
+        assert(left->eager);
+        return 0;
      } else {
+        assert(q_cur_loc(q) == loc);
          DEBUG_PRINTF("checking for report %u\n", leftfixReport);
          char rv = nfaInAcceptState(q->nfa, leftfixReport, q);
          DEBUG_PRINTF("leftfix done %hhd\n", (signed char)rv);
diff --git a/src/rose/rose_build_add.cpp b/src/rose/rose_build_add.cpp

index ae1553614c4d4e30b881f59b6fcb93028c9474dd..fe2c259eaf5b9b4d2f4d1fd7ef2210fae3689b83 100644 (file)
--- a/src/rose/rose_build_add.cpp
+++ b/src/rose/rose_build_add.cpp
@@ -1038,6 +1038,7 @@ bool canImplementGraph(RoseBuildImpl *tbi, const RoseInGraph &in, NGHolder &h,
                  return false;
              }
              break;
+        case NFA_EAGER_PREFIX:
          case NFA_REV_PREFIX:
          case NFA_OUTFIX_RAW:
              DEBUG_PRINTF("kind %u\n", (u32)h.kind);
diff --git a/src/rose/rose_build_bytecode.cpp b/src/rose/rose_build_bytecode.cpp

index 3f36a05e1984163a56375640649c2d069597af16..3f56b1013bb7842d60f5418e41b569658abc8040 100644 (file)
--- a/src/rose/rose_build_bytecode.cpp
+++ b/src/rose/rose_build_bytecode.cpp
@@ -50,6 +50,7 @@
  #include "nfa/nfa_build_util.h"
  #include "nfa/nfa_internal.h"
  #include "nfa/shufticompile.h"
+#include "nfagraph/ng_execute.h"
  #include "nfagraph/ng_holder.h"
  #include "nfagraph/ng_lbr.h"
  #include "nfagraph/ng_limex.h"
@@ -1046,8 +1047,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
      // streaming mode.
      const bool compress_state = !is_transient;
  
-    assert(!left.graph()
-           || left.graph()->kind == (is_prefix ? NFA_PREFIX : NFA_INFIX));
+    assert(is_prefix || !left.graph() || left.graph()->kind == NFA_INFIX);
+    assert(!is_prefix || !left.graph() || left.graph()->kind == NFA_PREFIX
+           || left.graph()->kind == NFA_EAGER_PREFIX);
  
      // Holder should be implementable as an NFA at the very least.
      if (!left.dfa() && left.graph()) {
@@ -1089,7 +1091,9 @@ makeLeftNfa(const RoseBuildImpl &tbi, left_id &left,
  
      if (!n && left.graph()) {
          map<u32, vector<vector<CharReach>>> triggers;
-        findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
+        if (left.graph()->kind == NFA_INFIX) {
+            findTriggerSequences(tbi, infixTriggers.at(left), &triggers);
+        }
          n = constructNFA(*left.graph(), nullptr, fixed_depth_tops, triggers,
                           compress_state, cc);
      }
@@ -1125,17 +1129,309 @@ void setLeftNfaProperties(NFA &n, const left_id &left) {
      // graph.
  }
  
+static
+void appendTailToHolder(NGHolder &h, const flat_set<ReportID> &reports,
+                        const vector<NFAVertex> &starts,
+                        const vector<CharReach> &tail) {
+    assert(!tail.empty());
+    NFAVertex curr = add_vertex(h);
+    for (NFAVertex v : starts) {
+        assert(!edge(v, h.acceptEod, h).second);
+        assert(h[v].reports == reports);
+        h[v].reports.clear();
+        remove_edge(v, h.accept, h);
+        add_edge(v, curr, h);
+    }
+    auto it = tail.begin();
+    h[curr].char_reach = *it;
+    ++it;
+    while (it != tail.end()) {
+        NFAVertex old = curr;
+        curr = add_vertex(h);
+        add_edge(old, curr, h);
+        assert(!it->none());
+        h[curr].char_reach = *it;
+        ++it;
+    }
+
+    h[curr].reports = reports;
+    add_edge(curr, h.accept, h);
+}
+
+static
+void appendTailToHolder(NGHolder &h, const vector<CharReach> &tail) {
+    assert(in_degree(h.acceptEod, h) == 1);
+    assert(!tail.empty());
+
+    map<flat_set<ReportID>, vector<NFAVertex> > reporters;
+    for (auto v : inv_adjacent_vertices_range(h.accept, h)) {
+        reporters[h[v].reports].push_back(v);
+    }
+
+    for (const auto &e : reporters) {
+        appendTailToHolder(h, e.first, e.second, tail);
+    }
+
+    h.renumberEdges();
+}
+
+static
+u32 decreaseLag(const RoseBuildImpl &build, NGHolder &h,
+                const vector<RoseVertex> &succs) {
+    const RoseGraph &rg = build.g;
+    static const size_t MAX_RESTORE_LEN = 5;
+
+    vector<CharReach> restored(MAX_RESTORE_LEN);
+    for (RoseVertex v : succs) {
+        u32 lag = rg[v].left.lag;
+        for (u32 lit_id : rg[v].literals) {
+            u32 delay = build.literals.right.at(lit_id).delay;
+            const ue2_literal &literal = build.literals.right.at(lit_id).s;
+            assert(lag <= literal.length() + delay);
+            size_t base = literal.length() + delay - lag;
+            if (base >= literal.length()) {
+                return 0;
+            }
+            size_t len = literal.length() - base;
+            len = MIN(len, restored.size());
+            restored.resize(len);
+            auto lit_it = literal.begin() + base;
+            for (u32 i = 0; i < len; i++) {
+                assert(lit_it != literal.end());
+                restored[i] |= *lit_it;
+                ++lit_it;
+            }
+        }
+    }
+
+    assert(!restored.empty());
+
+    appendTailToHolder(h, restored);
+
+    return restored.size();
+}
+
+#define EAGER_DIE_BEFORE_LIMIT 10
+
+struct eager_info {
+    shared_ptr<NGHolder> new_graph;
+    u32 lag_adjust = 0;
+};
+
+static
+bool checkSuitableForEager(bool is_prefix, const left_id &left,
+                           const RoseBuildImpl &build,
+                           const vector<RoseVertex> &succs,
+                           rose_group squash_mask, rose_group initial_groups,
+                           eager_info &ei, const CompileContext &cc) {
+    DEBUG_PRINTF("checking prefix --> %016llx...\n", squash_mask);
+
+    const RoseGraph &rg = build.g;
+
+    if (!is_prefix) {
+        DEBUG_PRINTF("not prefix\n");
+        return false; /* only prefixes (for now...) */
+    }
+
+    if ((initial_groups & squash_mask) == initial_groups) {
+        DEBUG_PRINTF("no squash -- useless\n");
+        return false;
+    }
+
+    for (RoseVertex s : succs) {
+        if (build.isInETable(s)
+            || contains(rg[s].literals, build.eod_event_literal_id)) {
+            return false; /* Ignore EOD related prefixes */
+        }
+    }
+
+    if (left.dfa()) {
+        const raw_dfa &dfa = *left.dfa();
+        if (dfa.start_floating != DEAD_STATE) {
+            return false; /* not purely anchored */
+        }
+        if (!dfa.states[dfa.start_anchored].reports.empty()) {
+            return false; /* vacuous (todo: handle?) */
+        }
+
+        if (!can_die_early(dfa, EAGER_DIE_BEFORE_LIMIT)) {
+            return false;
+        }
+        ei.new_graph = rg[succs[0]].left.graph;
+    } else if (left.graph()) {
+        const NGHolder &g = *left.graph();
+        if (proper_out_degree(g.startDs, g)) {
+            return false; /* not purely anchored */
+        }
+        if (is_match_vertex(g.start, g)) {
+            return false; /* vacuous (todo: handle?) */
+        }
+
+        ei.new_graph = cloneHolder(*left.graph());
+        auto gg = ei.new_graph;
+        gg->kind = NFA_EAGER_PREFIX;
+
+        ei.lag_adjust = decreaseLag(build, *gg, succs);
+
+        if (!can_die_early(*gg, EAGER_DIE_BEFORE_LIMIT)) {
+            DEBUG_PRINTF("not eager as stuck alive\n");
+            return false;
+        }
+
+        /* We need to ensure that adding in the literals does not cause us to no
+         * longer be able to build an nfa. */
+        bool ok = isImplementableNFA(*gg, nullptr, cc);
+        if (!ok) {
+            return false;
+        }
+    } else {
+        DEBUG_PRINTF("unable to determine if good for eager running\n");
+        return false;
+    }
+
+    DEBUG_PRINTF("eager prefix\n");
+    return true;
+}
+
+static
+left_id updateLeftfixWithEager(RoseGraph &g, const eager_info &ei,
+                               const vector<RoseVertex> &succs) {
+    u32 lag_adjust = ei.lag_adjust;
+    auto gg = ei.new_graph;
+    for (RoseVertex v : succs) {
+        g[v].left.graph = gg;
+        assert(g[v].left.lag >= lag_adjust);
+        g[v].left.lag -= lag_adjust;
+        DEBUG_PRINTF("added %u literal chars back, new lag %u\n", lag_adjust,
+                     g[v].left.lag);
+    }
+    left_id leftfix = g[succs[0]].left;
+
+    if (leftfix.graph()) {
+        assert(leftfix.graph()->kind == NFA_PREFIX
+               || leftfix.graph()->kind == NFA_EAGER_PREFIX);
+        leftfix.graph()->kind = NFA_EAGER_PREFIX;
+    }
+    if (leftfix.dfa()) {
+        assert(leftfix.dfa()->kind == NFA_PREFIX);
+        leftfix.dfa()->kind = NFA_EAGER_PREFIX;
+    }
+
+    return leftfix;
+}
+
+static
+bool buildLeftfix(RoseBuildImpl &build, build_context &bc, bool prefix, u32 qi,
+                  const map<left_id, set<PredTopPair> > &infixTriggers,
+                  set<u32> *no_retrigger_queues, set<u32> *eager_queues,
+                  const map<left_id, eager_info> &eager,
+                  const vector<RoseVertex> &succs, left_id leftfix) {
+    RoseGraph &g = build.g;
+    const CompileContext &cc = build.cc;
+    const ReportManager &rm = build.rm;
+
+    bool is_transient = contains(build.transient, leftfix);
+    rose_group squash_mask = build.rose_squash_masks.at(leftfix);
+
+    DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : "");
+
+    if (contains(eager, leftfix)) {
+        eager_queues->insert(qi);
+        leftfix = updateLeftfixWithEager(g, eager.at(leftfix), succs);
+    }
+
+    aligned_unique_ptr<NFA> nfa;
+    // Need to build NFA, which is either predestined to be a Haig (in SOM mode)
+    // or could be all manner of things.
+    if (leftfix.haig()) {
+        nfa = goughCompile(*leftfix.haig(), build.ssm.somPrecision(), cc, rm);
+    }  else {
+        nfa = makeLeftNfa(build, leftfix, prefix, is_transient, infixTriggers,
+                          cc);
+    }
+
+    if (!nfa) {
+        assert(!"failed to build leftfix");
+        return false;
+    }
+
+    setLeftNfaProperties(*nfa, leftfix);
+
+    build.leftfix_queue_map.emplace(leftfix, qi);
+    nfa->queueIndex = qi;
+
+    if (!prefix && !leftfix.haig() && leftfix.graph()
+        && nfaStuckOn(*leftfix.graph())) {
+        DEBUG_PRINTF("%u sticks on\n", qi);
+        no_retrigger_queues->insert(qi);
+    }
+
+    DEBUG_PRINTF("built leftfix, qi=%u\n", qi);
+    add_nfa_to_blob(bc, *nfa);
+
+    // Leftfixes can have stop alphabets.
+    vector<u8> stop(N_CHARS, 0);
+    /* haigs track som information - need more care */
+    som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE;
+    if (leftfix.graph()) {
+        stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som);
+    } else if (leftfix.castle()) {
+        stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som);
+    }
+
+    // Infix NFAs can have bounds on their queue lengths.
+    u32 max_queuelen = UINT32_MAX;
+    if (!prefix) {
+        set<ue2_literal> lits;
+        for (RoseVertex v : succs) {
+            for (auto u : inv_adjacent_vertices_range(v, g)) {
+                for (u32 lit_id : g[u].literals) {
+                    lits.insert(build.literals.right.at(lit_id).s);
+                }
+            }
+        }
+        DEBUG_PRINTF("%zu literals\n", lits.size());
+        max_queuelen = findMaxInfixMatches(leftfix, lits);
+        if (max_queuelen < UINT32_MAX) {
+            max_queuelen++;
+        }
+    }
+
+    u32 max_width;
+    if (is_transient) {
+        depth d = findMaxWidth(leftfix);
+        assert(d.is_finite());
+        max_width = d;
+    } else {
+        max_width = 0;
+    }
+
+    u8 cm_count = 0;
+    CharReach cm_cr;
+    if (cc.grey.allowCountingMiracles) {
+        findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr);
+    }
+
+    for (RoseVertex v : succs) {
+        bc.leftfix_info.emplace(v, left_build_info(qi, g[v].left.lag, max_width,
+                                                   squash_mask, stop,
+                                                   max_queuelen, cm_count,
+                                                   cm_cr));
+    }
+
+    return true;
+}
+
  static
  bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
                      QueueIndexFactory &qif, set<u32> *no_retrigger_queues,
-                    bool do_prefix) {
-    const RoseGraph &g = tbi.g;
+                    set<u32> *eager_queues, bool do_prefix) {
+    RoseGraph &g = tbi.g;
      const CompileContext &cc = tbi.cc;
-    const ReportManager &rm = tbi.rm;
-
-    ue2::unordered_map<left_id, u32> seen; // already built queue indices
  
      map<left_id, set<PredTopPair> > infixTriggers;
+    vector<left_id> order;
+    unordered_map<left_id, vector<RoseVertex> > succs;
      findInfixTriggers(tbi, &infixTriggers);
  
      for (auto v : vertices_range(g)) {
@@ -1143,6 +1439,7 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
              continue;
          }
  
+        assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v));
          bool is_prefix = tbi.isRootSuccessor(v);
  
          if (do_prefix != is_prefix) {
@@ -1156,8 +1453,6 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
          // our in-edges.
          assert(roseHasTops(g, v));
  
-        u32 qi; // queue index, set below.
-        u32 lag = g[v].left.lag;
          bool is_transient = contains(tbi.transient, leftfix);
  
          // Transient leftfixes can sometimes be implemented solely with
@@ -1173,95 +1468,42 @@ bool buildLeftfixes(RoseBuildImpl &tbi, build_context &bc,
              }
          }
  
-        if (contains(seen, leftfix)) {
-            // NFA already built.
-            qi = seen[leftfix];
-            assert(contains(bc.engineOffsets, qi));
-            DEBUG_PRINTF("sharing leftfix, qi=%u\n", qi);
-        } else {
-            DEBUG_PRINTF("making %sleftfix\n", is_transient ? "transient " : "");
-
-            aligned_unique_ptr<NFA> nfa;
-
-            // Need to build NFA, which is either predestined to be a Haig (in
-            // SOM mode) or could be all manner of things.
-            if (leftfix.haig()) {
-                nfa = goughCompile(*leftfix.haig(), tbi.ssm.somPrecision(), cc,
-                                   rm);
-            }  else {
-                assert(tbi.isNonRootSuccessor(v) != tbi.isRootSuccessor(v));
-                nfa = makeLeftNfa(tbi, leftfix, is_prefix, is_transient,
-                                  infixTriggers, cc);
-            }
-
-            if (!nfa) {
-                assert(!"failed to build leftfix");
-                return false;
-            }
+        if (!contains(succs, leftfix)) {
+            order.push_back(leftfix);
+        }
  
-            setLeftNfaProperties(*nfa, leftfix);
+        succs[leftfix].push_back(v);
+    }
  
-            qi = qif.get_queue();
-            tbi.leftfix_queue_map.emplace(leftfix, qi);
-            nfa->queueIndex = qi;
+    rose_group initial_groups = tbi.getInitialGroups();
+    rose_group combined_eager_squashed_mask = ~0ULL;
  
-            if (!is_prefix && !leftfix.haig() && leftfix.graph() &&
-                nfaStuckOn(*leftfix.graph())) {
-                DEBUG_PRINTF("%u sticks on\n", qi);
-                no_retrigger_queues->insert(qi);
-            }
+    map<left_id, eager_info> eager;
  
-            DEBUG_PRINTF("built leftfix, qi=%u\n", qi);
-            add_nfa_to_blob(bc, *nfa);
-            seen.emplace(leftfix, qi);
-        }
+    for (const left_id &leftfix : order) {
+        const auto &left_succs = succs[leftfix];
  
          rose_group squash_mask = tbi.rose_squash_masks.at(leftfix);
+        eager_info ei;
  
-        // Leftfixes can have stop alphabets.
-        vector<u8> stop(N_CHARS, 0);
-        /* haigs track som information - need more care */
-        som_type som = leftfix.haig() ? SOM_LEFT : SOM_NONE;
-        if (leftfix.graph()) {
-            stop = findLeftOffsetStopAlphabet(*leftfix.graph(), som);
-        } else if (leftfix.castle()) {
-            stop = findLeftOffsetStopAlphabet(*leftfix.castle(), som);
-        }
-
-        // Infix NFAs can have bounds on their queue lengths.
-        u32 max_queuelen = UINT32_MAX;
-        if (!is_prefix) {
-            set<ue2_literal> lits;
-            for (auto u : inv_adjacent_vertices_range(v, tbi.g)) {
-                for (u32 lit_id : tbi.g[u].literals) {
-                    lits.insert(tbi.literals.right.at(lit_id).s);
-                }
-            }
-            DEBUG_PRINTF("%zu literals\n", lits.size());
-            max_queuelen = findMaxInfixMatches(leftfix, lits);
-            if (max_queuelen < UINT32_MAX) {
-                max_queuelen++;
-            }
-        }
-
-        u32 max_width;
-        if (is_transient) {
-            depth d = findMaxWidth(leftfix);
-            assert(d.is_finite());
-            max_width = d;
-        } else {
-            max_width = 0;
+        if (checkSuitableForEager(do_prefix, leftfix, tbi, left_succs,
+                                  squash_mask, initial_groups, ei, cc)) {
+            eager[leftfix] = ei;
+            combined_eager_squashed_mask &= squash_mask;
+            DEBUG_PRINTF("combo %016llx...\n", combined_eager_squashed_mask);
          }
+    }
  
-        u8 cm_count = 0;
-        CharReach cm_cr;
-        if (cc.grey.allowCountingMiracles) {
-            findCountingMiracleInfo(leftfix, stop, &cm_count, &cm_cr);
-        }
+    if (do_prefix && combined_eager_squashed_mask & initial_groups) {
+        DEBUG_PRINTF("eager groups won't squash everyone - be lazy\n");
+        eager_queues->clear();
+        eager.clear();
+    }
  
-        bc.leftfix_info.emplace(
-            v, left_build_info(qi, lag, max_width, squash_mask, stop,
-                               max_queuelen, cm_count, cm_cr));
+    for (const left_id &leftfix : order) {
+        buildLeftfix(tbi, bc, do_prefix, qif.get_queue(), infixTriggers,
+                     no_retrigger_queues, eager_queues, eager, succs[leftfix],
+                     leftfix);
      }
  
      return true;
@@ -1613,9 +1855,11 @@ void buildCountingMiracles(RoseBuildImpl &build, build_context &bc) {
      }
  }
  
+/* Note: buildNfas may reduce the lag for vertices that have prefixes */
  static
  bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
-               set<u32> *no_retrigger_queues, u32 *leftfixBeginQueue) {
+               set<u32> *no_retrigger_queues, set<u32> *eager_queues,
+               u32 *leftfixBeginQueue) {
      assignSuffixQueues(tbi, bc);
  
      if (!buildSuffixes(tbi, bc, no_retrigger_queues)) {
@@ -1624,11 +1868,13 @@ bool buildNfas(RoseBuildImpl &tbi, build_context &bc, QueueIndexFactory &qif,
  
      *leftfixBeginQueue = qif.allocated_count();
  
-    if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, true)) {
+    if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
+                        true)) {
          return false;
      }
  
-    if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, false)) {
+    if (!buildLeftfixes(tbi, bc, qif, no_retrigger_queues, eager_queues,
+                        false)) {
          return false;
      }
  
@@ -1672,10 +1918,10 @@ static
  void findTransientQueues(const map<RoseVertex, left_build_info> &leftfix_info,
                           set<u32> *out) {
      DEBUG_PRINTF("curating transient queues\n");
-    for (const auto &rbi : leftfix_info | map_values) {
-        if (rbi.transient) {
-            DEBUG_PRINTF("q %u is transient\n", rbi.queue);
-            out->insert(rbi.queue);
+    for (const auto &build : leftfix_info | map_values) {
+        if (build.transient) {
+            DEBUG_PRINTF("q %u is transient\n", build.queue);
+            out->insert(build.queue);
          }
      }
  }
@@ -3301,9 +3547,9 @@ void assignStateIndices(const RoseBuildImpl &build, build_context &bc) {
  }
  
  static
-bool hasUsefulStops(const left_build_info &rbi) {
+bool hasUsefulStops(const left_build_info &build) {
      for (u32 i = 0; i < N_CHARS; i++) {
-        if (rbi.stopAlphabet[i]) {
+        if (build.stopAlphabet[i]) {
              return true;
          }
      }
@@ -3312,6 +3558,7 @@ bool hasUsefulStops(const left_build_info &rbi) {
  
  static
  void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
+                        const set<u32> &eager_queues,
                          u32 leftfixBeginQueue, u32 leftfixCount,
                          vector<LeftNfaInfo> &leftTable, u32 *laggedRoseCount,
                          size_t *history) {
@@ -3371,6 +3618,7 @@ void buildLeftInfoTable(const RoseBuildImpl &tbi, build_context &bc,
              DEBUG_PRINTF("mw = %u\n", lbi.transient);
              left.transient = verify_u8(lbi.transient);
              left.infix = tbi.isNonRootSuccessor(v);
+            left.eager = contains(eager_queues, lbi.queue);
  
              // A rose has a lagIndex if it's non-transient and we are
              // streaming.
@@ -4271,6 +4519,25 @@ void fillMatcherDistances(const RoseBuildImpl &build, RoseEngine *engine) {
      }
  }
  
+static
+u32 buildEagerQueueIter(const set<u32> &eager, u32 leftfixBeginQueue,
+                        u32 queue_count,
+                        build_context &bc) {
+    if (eager.empty()) {
+        return 0;
+    }
+
+    vector<u32> vec;
+    for (u32 q : eager) {
+        assert(q >= leftfixBeginQueue);
+        vec.push_back(q - leftfixBeginQueue);
+    }
+
+    vector<mmbit_sparse_iter> iter;
+    mmbBuildSparseIterator(iter, vec, queue_count - leftfixBeginQueue);
+    return addIteratorToTable(bc, iter);
+}
+
  aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      DerivedBoundaryReports dboundary(boundary);
  
@@ -4305,7 +4572,10 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      u32 outfixEndQueue = qif.allocated_count();
      u32 leftfixBeginQueue = outfixEndQueue;
  
-    if (!buildNfas(*this, bc, qif, &no_retrigger_queues,
+    set<u32> eager_queues;
+
+    /* Note: buildNfas may reduce the lag for vertices that have prefixes */
+    if (!buildNfas(*this, bc, qif, &no_retrigger_queues, &eager_queues,
                     &leftfixBeginQueue)) {
          return nullptr;
      }
@@ -4325,7 +4595,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
  
      u32 laggedRoseCount = 0;
      vector<LeftNfaInfo> leftInfoTable;
-    buildLeftInfoTable(*this, bc, leftfixBeginQueue,
+    buildLeftInfoTable(*this, bc, eager_queues, leftfixBeginQueue,
                         queue_count - leftfixBeginQueue, leftInfoTable,
                         &laggedRoseCount, &historyRequired);
  
@@ -4340,6 +4610,8 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      buildActiveLeftIter(leftInfoTable, activeLeftIter);
  
      u32 lastByteOffset = buildLastByteIter(g, bc);
+    u32 eagerIterOffset = buildEagerQueueIter(eager_queues, leftfixBeginQueue,
+                                              queue_count, bc);
  
      // Enforce role table resource limit.
      if (num_vertices(g) > cc.grey.limitRoseRoleCount) {
@@ -4513,6 +4785,7 @@ aligned_unique_ptr<RoseEngine> RoseBuildImpl::buildFinalEngine(u32 minWidth) {
      engine->activeArrayCount = activeArrayCount;
      engine->activeLeftCount = activeLeftCount;
      engine->queueCount = queue_count;
+    engine->eagerIterOffset = eagerIterOffset;
      engine->handledKeyCount = bc.handledKeys.size();
  
      engine->group_weak_end = group_weak_end;
diff --git a/src/rose/rose_build_dump.cpp b/src/rose/rose_build_dump.cpp

index 46d1676de1ba5c43261a64e11d71907af5e2071f..2c3f326e441ee6fd1e12411dabbfca675bfb886d 100644 (file)
--- a/src/rose/rose_build_dump.cpp
+++ b/src/rose/rose_build_dump.cpp
@@ -76,6 +76,8 @@ string to_string(nfa_kind k) {
          return "REV_PREFIX";
      case NFA_OUTFIX_RAW:
          return "OUTFIX_RAW";
+    case NFA_EAGER_PREFIX:
+        return "EAGER_PREFIX";
      }
      assert(0);
      return "?";
diff --git a/src/rose/rose_build_impl.h b/src/rose/rose_build_impl.h

index 5f1871e44f8f304f470959915c0c4e14b1927925..71940e0733f20e5dcf5b84d2614a41a3ccf5b424 100644 (file)
--- a/src/rose/rose_build_impl.h
+++ b/src/rose/rose_build_impl.h
@@ -150,7 +150,7 @@ struct left_id {
          : g(in.graph.get()), c(in.castle.get()), d(in.dfa.get()),
            h(in.haig.get()), dfa_min_width(in.dfa_min_width),
            dfa_max_width(in.dfa_max_width) {
-        assert(!g || !generates_callbacks(*g));
+        assert(!g || !has_managed_reports(*g));
      }
      bool operator==(const left_id &b) const {
          bool rv = g == b.g && c == b.c && h == b.h && d == b.d;
diff --git a/src/rose/rose_dump.cpp b/src/rose/rose_dump.cpp

index 9f55dbf22a769a7fc0257095b1583a4648b66c73..1d63c71a8cd1c1440c8da95c090d71feb345ceb7 100644 (file)
--- a/src/rose/rose_dump.cpp
+++ b/src/rose/rose_dump.cpp
@@ -605,6 +605,9 @@ void dumpNfaNotes(ofstream &fout, const RoseEngine *t, const NFA *n) {
      }
  
      const LeftNfaInfo *left = getLeftInfoByQueue(t, qindex);
+    if (left->eager) {
+        fout << "eager ";
+    }
      if (left->transient) {
          fout << "transient " << (u32)left->transient << " ";
      }
@@ -1018,6 +1021,7 @@ void roseDumpStructRaw(const RoseEngine *t, FILE *f) {
      DUMP_U32(t, activeArrayCount);
      DUMP_U32(t, activeLeftCount);
      DUMP_U32(t, queueCount);
+    DUMP_U32(t, eagerIterOffset);
      DUMP_U32(t, handledKeyCount);
      DUMP_U32(t, leftOffset);
      DUMP_U32(t, roseCount);
diff --git a/src/rose/rose_internal.h b/src/rose/rose_internal.h

index 9dd173500ec05b2353b2e522d265e3a9833b9997..5b6a9dc6bf791edb88d5bea5b41877da320e20a8 100644 (file)
--- a/src/rose/rose_internal.h
+++ b/src/rose/rose_internal.h
@@ -144,6 +144,7 @@ struct LeftNfaInfo {
      u32 stopTable; // stop table index, or ROSE_OFFSET_INVALID
      u8 transient; /**< 0 if not transient, else max width of transient prefix */
      char infix; /* TODO: make flags */
+    char eager; /**< nfa should be run eagerly to first match or death */
      char eod_check; /**< nfa is used by the event eod literal */
      u32 countingMiracleOffset; /** if not 0, offset to RoseCountingMiracle. */
      rose_group squash_mask; /* & mask applied when rose nfa dies */
@@ -366,6 +367,9 @@ struct RoseEngine {
      u32 activeLeftCount; //number of nfas tracked in the active rose array
      u32 queueCount;      /**< number of nfa queues */
  
+    u32 eagerIterOffset; /**< offset to sparse iter for eager prefixes or 0 if
+                          * none */
+
      /** \brief Number of keys used by CHECK_SET_HANDLED instructions in role
       * programs. Used to size the handled_roles fatbit in scratch. */
      u32 handledKeyCount;
diff --git a/src/rose/runtime.h b/src/rose/runtime.h

index f7f6641d1521913616faf72ffdde768e39b58a8f..60c7d34b6482dc78dfb076bd73177cd8023ecacc 100644 (file)
--- a/src/rose/runtime.h
+++ b/src/rose/runtime.h
@@ -55,6 +55,11 @@
  
  #define rose_inline really_inline
  
+/* Maximum offset that we will eagerly run prefixes to. Beyond this point, eager
+ * prefixes are always run in exactly the same way as normal prefixes. */
+#define EAGER_STOP_OFFSET 64
+
+
  static really_inline
  const void *getByOffset(const struct RoseEngine *t, u32 offset) {
      assert(offset < t->size);
diff --git a/src/rose/stream.c b/src/rose/stream.c

index ffe965dd2ce06f4df02f7d8b7e2e8010845af7e0..181bfe659bdb8d7f2413f653e9ae8aa88bf65d3f 100644 (file)
--- a/src/rose/stream.c
+++ b/src/rose/stream.c
@@ -423,6 +423,92 @@ void do_rebuild(const struct RoseEngine *t, const struct HWLM *ftable,
      assert(!can_stop_matching(scratch));
  }
  
+static rose_inline
+void runEagerPrefixesStream(const struct RoseEngine *t,
+                            struct hs_scratch *scratch) {
+    if (!t->eagerIterOffset
+        || scratch->core_info.buf_offset >= EAGER_STOP_OFFSET) {
+        return;
+    }
+
+    char *state = scratch->core_info.state;
+    u8 *ara = getActiveLeftArray(t, state); /* indexed by offsets into
+                                             * left_table */
+    const u32 arCount = t->activeLeftCount;
+    const u32 qCount = t->queueCount;
+    const struct LeftNfaInfo *left_table = getLeftTable(t);
+    const struct mmbit_sparse_iter *it = getByOffset(t, t->eagerIterOffset);
+
+    struct mmbit_sparse_state si_state[MAX_SPARSE_ITER_STATES];
+
+    u32 idx = 0;
+    u32 ri = mmbit_sparse_iter_begin(ara, arCount, &idx, it, si_state);
+    for (; ri != MMB_INVALID;
+           ri = mmbit_sparse_iter_next(ara, arCount, ri, &idx, it, si_state)) {
+        const struct LeftNfaInfo *left = left_table + ri;
+        u32 qi = ri + t->leftfixBeginQueue;
+        DEBUG_PRINTF("leftfix %u of %u, maxLag=%u\n", ri, arCount, left->maxLag);
+
+        assert(!fatbit_isset(scratch->aqa, qCount, qi));
+        assert(left->eager);
+        assert(!left->infix);
+
+        struct mq *q = scratch->queues + qi;
+        const struct NFA *nfa = getNfaByQueue(t, qi);
+        s64a loc = MIN(scratch->core_info.len,
+                       EAGER_STOP_OFFSET - scratch->core_info.buf_offset);
+
+        fatbit_set(scratch->aqa, qCount, qi);
+        initRoseQueue(t, qi, left, scratch);
+
+        if (scratch->core_info.buf_offset) {
+            s64a sp = left->transient ? -(s64a)scratch->core_info.hlen
+                                      : -(s64a)loadRoseDelay(t, state, left);
+            pushQueueAt(q, 0, MQE_START, sp);
+            if (scratch->core_info.buf_offset + sp > 0) {
+                loadStreamState(nfa, q, sp);
+                /* if the leftfix fix is currently in a match state, we cannot
+                 * advance it. */
+                if (nfaInAnyAcceptState(nfa, q)) {
+                    continue;
+                }
+                pushQueueAt(q, 1, MQE_END, loc);
+            } else {
+                pushQueueAt(q, 1, MQE_TOP, sp);
+                pushQueueAt(q, 2, MQE_END, loc);
+                nfaQueueInitState(q->nfa, q);
+            }
+        } else {
+            pushQueueAt(q, 0, MQE_START, 0);
+            pushQueueAt(q, 1, MQE_TOP, 0);
+            pushQueueAt(q, 2, MQE_END, loc);
+            nfaQueueInitState(nfa, q);
+        }
+
+        char alive = nfaQueueExecToMatch(q->nfa, q, loc);
+
+        if (!alive) {
+            DEBUG_PRINTF("queue %u dead, squashing\n", qi);
+            mmbit_unset(ara, arCount, ri);
+            fatbit_unset(scratch->aqa, qCount, qi);
+            scratch->tctxt.groups &= left->squash_mask;
+        } else if (q->cur == q->end) {
+            assert(alive != MO_MATCHES_PENDING);
+            /* unlike in block mode we cannot squash groups if there is no match
+             * in this block as we need the groups on for later stream writes */
+            /* TODO: investigate possibility of a method to suppress groups for
+             * a single stream block. */
+            DEBUG_PRINTF("queue %u finished, nfa lives\n", qi);
+            q->cur = q->end = 0;
+            pushQueueAt(q, 0, MQE_START, loc);
+        } else {
+            assert(alive == MO_MATCHES_PENDING);
+            DEBUG_PRINTF("queue %u unfinished, nfa lives\n", qi);
+            q->end--; /* remove end item */
+        }
+    }
+}
+
  void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
      DEBUG_PRINTF("OH HAI\n");
      assert(t);
@@ -472,6 +558,8 @@ void roseStreamExec(const struct RoseEngine *t, struct hs_scratch *scratch) {
          streamInitSufPQ(t, state, scratch);
      }
  
+    runEagerPrefixesStream(t, scratch);
+
      u32 alen = t->anchoredDistance > offset ?
          MIN(length + offset, t->anchoredDistance) - offset : 0;
author	Alex Coyte <a.coyte@intel.com>
	Thu, 23 Jun 2016 23:28:42 +0000 (09:28 +1000)
committer	Matthew Barr <matthew.barr@intel.com>
	Fri, 8 Jul 2016 01:01:34 +0000 (11:01 +1000)
src/nfa/castle.c		patch \| blob \| blame \| history
src/nfa/castle.h		patch \| blob \| blame \| history
src/nfa/gough.c		patch \| blob \| blame \| history
src/nfa/gough.h		patch \| blob \| blame \| history
src/nfa/lbr.h		patch \| blob \| blame \| history
src/nfa/lbr_common_impl.h		patch \| blob \| blame \| history
src/nfa/limex.h		patch \| blob \| blame \| history
src/nfa/limex_common_impl.h		patch \| blob \| blame \| history
src/nfa/limex_compile.cpp		patch \| blob \| blame \| history
src/nfa/limex_runtime_impl.h		patch \| blob \| blame \| history
src/nfa/mcclellan.c		patch \| blob \| blame \| history
src/nfa/mcclellan.h		patch \| blob \| blame \| history
src/nfa/mcclellancompile_util.cpp		patch \| blob \| blame \| history
src/nfa/mcclellancompile_util.h		patch \| blob \| blame \| history
src/nfa/mpv.h		patch \| blob \| blame \| history
src/nfa/nfa_api.h		patch \| blob \| blame \| history
src/nfa/nfa_api_dispatch.c		patch \| blob \| blame \| history
src/nfa/nfa_kind.h		patch \| blob \| blame \| history
src/nfagraph/ng_execute.cpp		patch \| blob \| blame \| history
src/nfagraph/ng_execute.h		patch \| blob \| blame \| history
src/nfagraph/ng_holder.h		patch \| blob \| blame \| history
src/nfagraph/ng_limex.cpp		patch \| blob \| blame \| history
src/nfagraph/ng_mcclellan.cpp		patch \| blob \| blame \| history
src/nfagraph/ng_split.cpp		patch \| blob \| blame \| history
src/rose/block.c		patch \| blob \| blame \| history
src/rose/program_runtime.h		patch \| blob \| blame \| history
src/rose/rose_build_add.cpp		patch \| blob \| blame \| history
src/rose/rose_build_bytecode.cpp		patch \| blob \| blame \| history
src/rose/rose_build_dump.cpp		patch \| blob \| blame \| history
src/rose/rose_build_impl.h		patch \| blob \| blame \| history
src/rose/rose_dump.cpp		patch \| blob \| blame \| history
src/rose/rose_internal.h		patch \| blob \| blame \| history
src/rose/runtime.h		patch \| blob \| blame \| history
src/rose/stream.c		patch \| blob \| blame \| history