]> git.ipfire.org Git - people/ms/suricata.git/commitdiff
flow: redesign of flow timeout handling
authorVictor Julien <victor@inliniac.net>
Thu, 19 Dec 2019 16:26:45 +0000 (17:26 +0100)
committerVictor Julien <victor@inliniac.net>
Thu, 6 Aug 2020 14:23:28 +0000 (16:23 +0200)
Goals:
- reduce locking
- take advantage of 'hot' caches
- better locality

Locking reduction

New flow spare pool. The global pool is implmented as a list of blocks,
where each block has a 100 spare flows. Worker threads fetch a block at
a time, storing the block in the local thread storage.

Flow Recycler now returns flows to the pool is blocks as well.

Flow Recycler fetches all flows to be processed in one step instead of
one at a time.

Cache 'hot'ness

Worker threads now check the timeout of flows they evaluate during lookup.
The worker will have to read the flow into cache anyway, so the added
overhead of checking the timeout value is minimal. When a flow is considered
timed out, one of 2 things happens:

- if the flow is 'owned' by the thread it is handled locally. Handling means
  checking if the flow needs 'timeout' work.

- otherwise, the flow is added to a special 'evicted' list in the flow
  bucket where it will be picked up by the flow manager.

Flow Manager timing

By default the flow manager now tries to do passes of the flow hash in
smaller steps, where the goal is to do full pass in 8 x the lowest timeout
value it has to enforce. So if the lowest timeout value is 30s, a full pass
will take 4 minutes. The goal here is to reduce locking overhead and not
get in the way of the workers.

In emergency mode each pass is full, and lower timeouts are used.

Timing of the flow manager is also no longer relying on pthread condition
variables, as these generally cause waking up much quicker than the desired
timout. Instead a simple (u)sleep loop is used.

Both changes reduce the number of hash passes a lot.

Emergency behavior

In emergency mode there a number of changes to the workers. In this scenario
the flow memcap is fully used up and it is unavoidable that some flows won't
be tracked.

1. flow spare pool fetches are reduced to once a second. This avoids locking
   overhead, while the chance of success was very low.

2. getting an active flow directly from the hash skips flows that had very
   recent activity to avoid the scenario where all flows get only into the
   NEW state before getting reused. Rather allow some to have a chance of
   completing.

3. TCP packets that are not SYN packets will not get a used flow, unless
   stream.midstream is enabled. The goal here is again to avoid evicting
   active flows unnecessarily.

Better Localily

Flow Manager injects flows into the worker threads now, instead of one or
two packets. Advantage of this is that the worker threads can get packets
from their local packet pools, avoiding constant overhead of packets returning
to 'foreign' pools.

Counters

A lot of flow counters have been added and some have been renamed.

Overall the worker threads increment 'flow.wrk.*' counters, while the flow
manager increments 'flow.mgr.*'.

Additionally, none of the counters are snapshots anymore, they all increment
over time. The flow.memuse and flow.spare counters are exceptions.

Misc

FlowQueue has been split into a FlowQueuePrivate (unlocked) and FlowQueue.
Flow no longer has 'prev' pointers and used a unified 'next' pointer for
both hash and queue use.

29 files changed:
src/Makefile.am
src/decode.c
src/decode.h
src/flow-hash.c
src/flow-hash.h
src/flow-manager.c
src/flow-manager.h
src/flow-private.h
src/flow-queue.c
src/flow-queue.h
src/flow-spare-pool.c [new file with mode: 0644]
src/flow-spare-pool.h [new file with mode: 0644]
src/flow-timeout.c
src/flow-timeout.h
src/flow-util.c
src/flow-util.h
src/flow-worker.c
src/flow-worker.h
src/flow.c
src/flow.h
src/output-flow.c
src/output-json-flow.c
src/source-pcap-file.c
src/threadvars.h
src/tm-threads.c
src/tm-threads.h
src/util-atomic.h
src/util-logopenfile.c
src/util-unittest-helper.c

index df3f52d68c619a0547f325d019272b688f97ce21..ad3e6907d36614e8d50142d381f346f11c003730 100755 (executable)
@@ -333,6 +333,7 @@ flow-hash.c flow-hash.h \
 flow-manager.c flow-manager.h \
 flow-queue.c flow-queue.h \
 flow-storage.c flow-storage.h \
+flow-spare-pool.c flow-spare-pool.h \
 flow-timeout.c flow-timeout.h \
 flow-util.c flow-util.h \
 flow-var.c flow-var.h \
index 7083e62a182aff325d5b964447caf7c13ea44f17..47f210b24fe242281d7cae4cc48433b20f283b98 100644 (file)
@@ -402,13 +402,16 @@ void PacketDefragPktSetupParent(Packet *parent)
     DecodeSetNoPayloadInspectionFlag(parent);
 }
 
+/**
+ *  \note if p->flow is set, the flow is locked
+ */
 void PacketBypassCallback(Packet *p)
 {
 #ifdef CAPTURE_OFFLOAD
     /* Don't try to bypass if flow is already out or
      * if we have failed to do it once */
     if (p->flow) {
-        int state = SC_ATOMIC_GET(p->flow->flow_state);
+        int state = p->flow->flow_state;
         if ((state == FLOW_STATE_LOCAL_BYPASSED) ||
                 (state == FLOW_STATE_CAPTURE_BYPASSED)) {
             return;
@@ -431,7 +434,7 @@ void PacketBypassCallback(Packet *p)
     }
 #else /* CAPTURE_OFFLOAD */
     if (p->flow) {
-        int state = SC_ATOMIC_GET(p->flow->flow_state);
+        int state = p->flow->flow_state;
         if (state == FLOW_STATE_LOCAL_BYPASSED)
             return;
         FlowUpdateState(p->flow, FLOW_STATE_LOCAL_BYPASSED);
@@ -515,6 +518,17 @@ void DecodeRegisterPerfCounters(DecodeThreadVars *dtv, ThreadVars *tv)
     dtv->counter_flow_udp = StatsRegisterCounter("flow.udp", tv);
     dtv->counter_flow_icmp4 = StatsRegisterCounter("flow.icmpv4", tv);
     dtv->counter_flow_icmp6 = StatsRegisterCounter("flow.icmpv6", tv);
+    dtv->counter_flow_tcp_reuse = StatsRegisterCounter("flow.tcp_reuse", tv);
+    dtv->counter_flow_get_used = StatsRegisterCounter("flow.get_used", tv);
+    dtv->counter_flow_get_used_eval = StatsRegisterCounter("flow.get_used_eval", tv);
+    dtv->counter_flow_get_used_eval_reject = StatsRegisterCounter("flow.get_used_eval_reject", tv);
+    dtv->counter_flow_get_used_eval_busy = StatsRegisterCounter("flow.get_used_eval_busy", tv);
+    dtv->counter_flow_get_used_failed = StatsRegisterCounter("flow.get_used_failed", tv);
+
+    dtv->counter_flow_spare_sync_avg = StatsRegisterAvgCounter("flow.wrk.spare_sync_avg", tv);
+    dtv->counter_flow_spare_sync = StatsRegisterCounter("flow.wrk.spare_sync", tv);
+    dtv->counter_flow_spare_sync_incomplete = StatsRegisterCounter("flow.wrk.spare_sync_incomplete", tv);
+    dtv->counter_flow_spare_sync_empty = StatsRegisterCounter("flow.wrk.spare_sync_empty", tv);
 
     dtv->counter_defrag_ipv4_fragments =
         StatsRegisterCounter("defrag.ipv4.fragments", tv);
index ffeb57c845a60bec460fee6b282f3e0a38b8a973..0424b487b7db56690f630a9c325ec6c8ca50e030 100644 (file)
@@ -677,6 +677,17 @@ typedef struct DecodeThreadVars_
     uint16_t counter_flow_udp;
     uint16_t counter_flow_icmp4;
     uint16_t counter_flow_icmp6;
+    uint16_t counter_flow_tcp_reuse;
+    uint16_t counter_flow_get_used;
+    uint16_t counter_flow_get_used_eval;
+    uint16_t counter_flow_get_used_eval_reject;
+    uint16_t counter_flow_get_used_eval_busy;
+    uint16_t counter_flow_get_used_failed;
+
+    uint16_t counter_flow_spare_sync;
+    uint16_t counter_flow_spare_sync_empty;
+    uint16_t counter_flow_spare_sync_incomplete;
+    uint16_t counter_flow_spare_sync_avg;
 
     uint16_t counter_engine_events[DECODE_EVENT_MAX];
 
index 29baed2694d075bcf4ff36c22514dad950869e21..6397a0801880e65a853666be44f4c64bef2db4a7 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2013 Open Information Security Foundation
+/* Copyright (C) 2007-2020 Open Information Security Foundation
  *
  * You can copy, redistribute or modify this Program under the terms of
  * the GNU General Public License version 2 as published by the Free
@@ -36,6 +36,8 @@
 #include "flow-private.h"
 #include "flow-manager.h"
 #include "flow-storage.h"
+#include "flow-timeout.h"
+#include "flow-spare-pool.h"
 #include "app-layer-parser.h"
 
 #include "util-time.h"
 #include "conf.h"
 #include "output.h"
 #include "output-flow.h"
+#include "stream-tcp.h"
+
+extern TcpStreamCnf stream_config;
+
 
 FlowBucket *flow_hash;
 SC_ATOMIC_EXTERN(unsigned int, flow_prune_idx);
 SC_ATOMIC_EXTERN(unsigned int, flow_flags);
 
-static Flow *FlowGetUsedFlow(ThreadVars *tv, DecodeThreadVars *dtv);
+static Flow *FlowGetUsedFlow(ThreadVars *tv, DecodeThreadVars *dtv, const struct timeval *ts);
 
 /** \brief compare two raw ipv6 addrs
  *
@@ -407,8 +413,6 @@ void FlowSetupPacket(Packet *p)
     p->flow_hash = FlowGetHash(p);
 }
 
-int TcpSessionPacketSsnReuse(const Packet *p, const Flow *f, void *tcp_ssn);
-
 static inline int FlowCompare(Flow *f, const Packet *p)
 {
     if (p->proto == IPPROTO_ICMP) {
@@ -423,13 +427,26 @@ static inline int FlowCompare(Flow *f, const Packet *p)
  *
  *  We use this check to filter out flow creation based on:
  *  - ICMP error messages
+ *  - TCP flags (emergency mode only)
  *
  *  \param p packet
  *  \retval 1 true
  *  \retval 0 false
  */
-static inline int FlowCreateCheck(const Packet *p)
+static inline int FlowCreateCheck(const Packet *p, const bool emerg)
 {
+    /* if we're in emergency mode, don't try to create a flow for a TCP
+     * that is not a TCP SYN packet. */
+    if (emerg) {
+        if (PKT_IS_TCP(p)) {
+            if (p->tcph->th_flags == TH_SYN || stream_config.midstream == FALSE) {
+                ;
+            } else {
+                return 0;
+            }
+        }
+    }
+
     if (PKT_IS_ICMPV4(p)) {
         if (ICMPV4_IS_ERROR_MSG(p)) {
             return 0;
@@ -464,6 +481,50 @@ static inline void FlowUpdateCounter(ThreadVars *tv, DecodeThreadVars *dtv,
 #endif
 }
 
+/** \internal
+ *  \brief try to fetch a new set of flows from the master flow pool.
+ *
+ *  If in emergency mode, do this only once a second at max to avoid trying
+ *  to synchronise per packet in the worse case. */
+static inline Flow *FlowSpareSync(ThreadVars *tv, FlowLookupStruct *fls,
+        const Packet *p, const bool emerg)
+{
+    Flow *f = NULL;
+    bool spare_sync = false;
+    if (emerg) {
+        if ((uint32_t)p->ts.tv_sec > fls->emerg_spare_sync_stamp) {
+            fls->spare_queue = FlowSpareGetFromPool(); /* local empty, (re)populate and try again */
+            spare_sync = true;
+            f = FlowQueuePrivateGetFromTop(&fls->spare_queue);
+            if (f == NULL) {
+                /* wait till next full sec before retrying */
+                fls->emerg_spare_sync_stamp = (uint32_t)p->ts.tv_sec;
+            }
+        }
+    } else {
+        fls->spare_queue = FlowSpareGetFromPool(); /* local empty, (re)populate and try again */
+        f = FlowQueuePrivateGetFromTop(&fls->spare_queue);
+        spare_sync = true;
+    }
+#ifdef UNITTESTS
+    if (tv && fls->dtv) {
+#endif
+        if (spare_sync) {
+            if (f != NULL) {
+                StatsAddUI64(tv, fls->dtv->counter_flow_spare_sync_avg, fls->spare_queue.len+1);
+            } else if (f == NULL && fls->spare_queue.len == 0) {
+                StatsIncr(tv, fls->dtv->counter_flow_spare_sync_empty);
+            } else if (f != NULL && fls->spare_queue.len < 99) {
+                StatsIncr(tv, fls->dtv->counter_flow_spare_sync_incomplete);
+            }
+            StatsIncr(tv, fls->dtv->counter_flow_spare_sync);
+        }
+#ifdef UNITTESTS
+    }
+#endif
+    return f;
+}
+
 /**
  *  \brief Get a new flow
  *
@@ -471,59 +532,62 @@ static inline void FlowUpdateCounter(ThreadVars *tv, DecodeThreadVars *dtv,
  *  if the memcap is reached.
  *
  *  \param tv thread vars
- *  \param dtv decode thread vars (for flow log api thread data)
+ *  \param fls lookup support vars
  *
  *  \retval f *LOCKED* flow on succes, NULL on error.
  */
-static Flow *FlowGetNew(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *p)
+static Flow *FlowGetNew(ThreadVars *tv, FlowLookupStruct *fls, const Packet *p)
 {
-    Flow *f = NULL;
+    const bool emerg = ((SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY) != 0);
 
-    if (FlowCreateCheck(p) == 0) {
+    if (FlowCreateCheck(p, emerg) == 0) {
         return NULL;
     }
 
     /* get a flow from the spare queue */
-    f = FlowDequeue(&flow_spare_q);
+    Flow *f = FlowQueuePrivateGetFromTop(&fls->spare_queue);
+    if (f == NULL) {
+        f = FlowSpareSync(tv, fls, p, emerg);
+    }
     if (f == NULL) {
         /* If we reached the max memcap, we get a used flow */
         if (!(FLOW_CHECK_MEMCAP(sizeof(Flow) + FlowStorageSize()))) {
             /* declare state of emergency */
             if (!(SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY)) {
                 SC_ATOMIC_OR(flow_flags, FLOW_EMERGENCY);
-
                 FlowTimeoutsEmergency();
-
-                /* under high load, waking up the flow mgr each time leads
-                 * to high cpu usage. Flows are not timed out much faster if
-                 * we check a 1000 times a second. */
-                FlowWakeupFlowManagerThread();
             }
 
-            f = FlowGetUsedFlow(tv, dtv);
+            f = FlowGetUsedFlow(tv, fls->dtv, &p->ts);
             if (f == NULL) {
-                /* max memcap reached, so increments the counter */
-                if (tv != NULL && dtv != NULL) {
-                    StatsIncr(tv, dtv->counter_flow_memcap);
-                }
-
-                /* very rare, but we can fail. Just giving up */
                 return NULL;
             }
-
-            /* freed a flow, but it's unlocked */
-        } else {
-            /* now see if we can alloc a new flow */
-            f = FlowAlloc();
-            if (f == NULL) {
-                if (tv != NULL && dtv != NULL) {
-                    StatsIncr(tv, dtv->counter_flow_memcap);
-                }
-                return NULL;
+#ifdef UNITTESTS
+            if (tv != NULL && fls->dtv != NULL) {
+#endif
+                StatsIncr(tv, fls->dtv->counter_flow_get_used);
+#ifdef UNITTESTS
             }
+#endif
+            /* flow is still locked from FlowGetUsedFlow() */
+            FlowUpdateCounter(tv, fls->dtv, p->proto);
+            return f;
+        }
 
-            /* flow is initialized but *unlocked* */
+        /* now see if we can alloc a new flow */
+        f = FlowAlloc();
+        if (f == NULL) {
+#ifdef UNITTESTS
+            if (tv != NULL && fls->dtv != NULL) {
+#endif
+                StatsIncr(tv, fls->dtv->counter_flow_memcap);
+#ifdef UNITTESTS
+            }
+#endif
+            return NULL;
         }
+
+        /* flow is initialized but *unlocked* */
     } else {
         /* flow has been recycled before it went into the spare queue */
 
@@ -531,16 +595,21 @@ static Flow *FlowGetNew(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *p)
     }
 
     FLOWLOCK_WRLOCK(f);
-    FlowUpdateCounter(tv, dtv, p->proto);
+    FlowUpdateCounter(tv, fls->dtv, p->proto);
     return f;
 }
 
-static Flow *TcpReuseReplace(ThreadVars *tv, DecodeThreadVars *dtv,
+static Flow *TcpReuseReplace(ThreadVars *tv, FlowLookupStruct *fls,
                              FlowBucket *fb, Flow *old_f,
                              const uint32_t hash, const Packet *p)
 {
+    if (tv != NULL && fls->dtv != NULL) {
+        StatsIncr(tv, fls->dtv->counter_flow_tcp_reuse);
+    }
     /* tag flow as reused so future lookups won't find it */
     old_f->flags |= FLOW_TCP_REUSED;
+    /* time out immediately */
+    old_f->timeout_at = 0;
     /* get some settings that we move over to the new flow */
     FlowThreadId thread_id[2] = { old_f->thread_id[0], old_f->thread_id[1] };
 
@@ -548,7 +617,7 @@ static Flow *TcpReuseReplace(ThreadVars *tv, DecodeThreadVars *dtv,
     FLOWLOCK_UNLOCK(old_f);
 
     /* Get a new flow. It will be either a locked flow or NULL */
-    Flow *f = FlowGetNew(tv, dtv, p);
+    Flow *f = FlowGetNew(tv, fls, p);
     if (f == NULL) {
         return NULL;
     }
@@ -556,8 +625,7 @@ static Flow *TcpReuseReplace(ThreadVars *tv, DecodeThreadVars *dtv,
     /* flow is locked */
 
     /* put at the start of the list */
-    f->hnext = fb->head;
-    fb->head->hprev = f;
+    f->next = fb->head;
     fb->head = f;
 
     /* initialize and return */
@@ -571,6 +639,71 @@ static Flow *TcpReuseReplace(ThreadVars *tv, DecodeThreadVars *dtv,
     return f;
 }
 
+static inline bool FlowBelongsToUs(const ThreadVars *tv, const Flow *f)
+{
+#ifdef UNITTESTS
+    if (RunmodeIsUnittests()) {
+        return true;
+    }
+#endif
+    return f->thread_id[0] == tv->id;
+}
+
+static inline void MoveToWorkQueue(ThreadVars *tv, FlowLookupStruct *fls,
+        FlowBucket *fb, Flow *f, Flow *prev_f)
+{
+    /* remove from hash... */
+    if (prev_f) {
+        prev_f->next = f->next;
+    }
+    if (f == fb->head) {
+        fb->head = f->next;
+    }
+
+    if (f->proto != IPPROTO_TCP || FlowBelongsToUs(tv, f)) { // TODO thread_id[] direction
+        f->fb = NULL;
+        f->next = NULL;
+        FlowQueuePrivateAppendFlow(&fls->work_queue, f);
+    } else {
+        /* implied: TCP but our thread does not own it. So set it
+         * aside for the Flow Manager to pick it up. */
+        f->next = fb->evicted;
+        fb->evicted = f;
+        if (SC_ATOMIC_GET(f->fb->next_ts) != 0) {
+            SC_ATOMIC_SET(f->fb->next_ts, 0);
+        }
+        FLOWLOCK_UNLOCK(f);
+    }
+}
+
+static inline bool FlowIsTimedOut(const Flow *f, const uint32_t sec, const bool emerg)
+{
+    if (unlikely(f->timeout_at < sec)) {
+        return true;
+    } else if (unlikely(emerg)) {
+        extern FlowProtoTimeout flow_timeouts_delta[FLOW_PROTO_MAX];
+
+        int64_t timeout_at = f->timeout_at -
+            FlowGetFlowTimeoutDirect(flow_timeouts_delta, f->flow_state, f->protomap);
+        if ((int64_t)sec >= timeout_at)
+            return true;
+    }
+    return false;
+}
+
+static inline void FromHashLockBucket(FlowBucket *fb)
+{
+    FBLOCK_LOCK(fb);
+}
+static inline void FromHashLockTO(Flow *f)
+{
+    FLOWLOCK_WRLOCK(f);
+}
+static inline void FromHashLockCMP(Flow *f)
+{
+    FLOWLOCK_WRLOCK(f);
+}
+
 /** \brief Get Flow for packet
  *
  * Hash retrieval function for flows. Looks up the hash bucket containing the
@@ -578,7 +711,7 @@ static Flow *TcpReuseReplace(ThreadVars *tv, DecodeThreadVars *dtv,
  * the flow we need. If it isn't, walk the list until the right flow is found.
  *
  * If the flow is not found or the bucket was emtpy, a new flow is taken from
- * the queue. FlowDequeue() will alloc new flows as long as we stay within our
+ * the spare pool. The pool will alloc new flows as long as we stay within our
  * memcap limit.
  *
  * The p->flow pointer is updated to point to the flow.
@@ -588,20 +721,21 @@ static Flow *TcpReuseReplace(ThreadVars *tv, DecodeThreadVars *dtv,
  *
  *  \retval f *LOCKED* flow or NULL
  */
-Flow *FlowGetFlowFromHash(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *p, Flow **dest)
+Flow *FlowGetFlowFromHash(ThreadVars *tv, FlowLookupStruct *fls,
+        const Packet *p, Flow **dest)
 {
     Flow *f = NULL;
 
     /* get our hash bucket and lock it */
     const uint32_t hash = p->flow_hash;
     FlowBucket *fb = &flow_hash[hash % flow_config.hash_size];
-    FBLOCK_LOCK(fb);
+    FromHashLockBucket(fb);
 
     SCLogDebug("fb %p fb->head %p", fb, fb->head);
 
     /* see if the bucket already has a flow */
     if (fb->head == NULL) {
-        f = FlowGetNew(tv, dtv, p);
+        f = FlowGetNew(tv, fls, p);
         if (f == NULL) {
             FBLOCK_UNLOCK(fb);
             return NULL;
@@ -609,7 +743,6 @@ Flow *FlowGetFlowFromHash(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *p
 
         /* flow is locked */
         fb->head = f;
-        fb->tail = f;
 
         /* got one, now lock, initialize and return */
         FlowInit(f, p);
@@ -623,46 +756,55 @@ Flow *FlowGetFlowFromHash(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *p
         return f;
     }
 
+    const bool emerg = (SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY) != 0;
+    const uint32_t fb_nextts = !emerg ? SC_ATOMIC_GET(fb->next_ts) : 0;
     /* ok, we have a flow in the bucket. Let's find out if it is our flow */
-    Flow *pf = NULL; /* previous flow */
+    Flow *prev_f = NULL; /* previous flow */
     f = fb->head;
     do {
-        FLOWLOCK_WRLOCK(f);
-        if ((f->flags & (FLOW_TCP_REUSED|FLOW_TIMED_OUT)) == 0) {
-            uint32_t timeout = FlowGetFlowTimeout(f, SC_ATOMIC_GET(f->flow_state));
-            int32_t flow_times_out_at = (int32_t)(f->lastts.tv_sec + timeout);
-            /* do the timeout check */
-            if (flow_times_out_at >= p->ts.tv_sec) {
-                if (FlowCompare(f, p) != 0) {
-                    if (unlikely(TcpSessionPacketSsnReuse(p, f, f->protoctx) == 1)) {
-                        f = TcpReuseReplace(tv, dtv, fb, f, hash, p);
-                        if (f == NULL) {
-                            FBLOCK_UNLOCK(fb);
-                            return NULL;
-                        }
-                    }
-
-                    FlowReference(dest, f);
-
+        Flow *next_f = NULL;
+        const bool timedout =
+            (fb_nextts < (uint32_t)p->ts.tv_sec && FlowIsTimedOut(f, (uint32_t)p->ts.tv_sec, emerg));
+        if (timedout) {
+            FromHashLockTO(f);//FLOWLOCK_WRLOCK(f);
+            if (f->use_cnt == 0) {
+                next_f = f->next;
+                MoveToWorkQueue(tv, fls, fb, f, prev_f);
+                /* flow stays locked, ownership xfer'd to MoveToWorkQueue */
+                goto flow_removed;
+            }
+            FLOWLOCK_UNLOCK(f);
+        } else if (FlowCompare(f, p) != 0) {
+            FromHashLockCMP(f);//FLOWLOCK_WRLOCK(f);
+            /* found a matching flow that is not timed out */
+            if (unlikely(TcpSessionPacketSsnReuse(p, f, f->protoctx) == 1)) {
+                f = TcpReuseReplace(tv, fls, fb, f, hash, p);
+                if (f == NULL) {
                     FBLOCK_UNLOCK(fb);
-                    return f;
+                    return NULL;
                 }
-                f->flags |= FLOW_TIMED_OUT;
             }
+            FlowReference(dest, f);
+            FBLOCK_UNLOCK(fb);
+            return f; /* return w/o releasing flow lock */
         }
-        FLOWLOCK_UNLOCK(f);
-        if (f->hnext == NULL) {
-            pf = f;
-            f = pf->hnext = FlowGetNew(tv, dtv, p);
+        /* unless we removed 'f', prev_f needs to point to
+         * current 'f' when adding a new flow below. */
+        prev_f = f;
+        next_f = f->next;
+
+flow_removed:
+        if (next_f == NULL) {
+            f = FlowGetNew(tv, fls, p);
             if (f == NULL) {
                 FBLOCK_UNLOCK(fb);
                 return NULL;
             }
-            fb->tail = f;
 
             /* flow is locked */
 
-            f->hprev = pf;
+            f->next = fb->head;
+            fb->head = f;
 
             /* initialize and return */
             FlowInit(f, p);
@@ -673,8 +815,7 @@ Flow *FlowGetFlowFromHash(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *p
             FBLOCK_UNLOCK(fb);
             return f;
         }
-        pf = f;
-        f = f->hnext;
+        f = next_f;
     } while (f != NULL);
 
     /* should be unreachable */
@@ -710,16 +851,12 @@ Flow *FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t ha
     if (f != NULL) {
         return f;
     }
-
-    /* No existing flow so let's get one new */
-    f = FlowDequeue(&flow_spare_q);
+    /* TODO use spare pool */
+    /* now see if we can alloc a new flow */
+    f = FlowAlloc();
     if (f == NULL) {
-        /* now see if we can alloc a new flow */
-        f = FlowAlloc();
-        if (f == NULL) {
-            SCLogDebug("Can't get a spare flow at start");
-            return NULL;
-        }
+        SCLogDebug("Can't get a spare flow at start");
+        return NULL;
     }
     f->proto = key->proto;
     f->vlan_id[0] = key->vlan_id[0];
@@ -751,17 +888,10 @@ Flow *FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t ha
     FlowBucket *fb = &flow_hash[hash % flow_config.hash_size];
     FBLOCK_LOCK(fb);
     f->fb = fb;
-    if (fb->head == NULL) {
-        fb->head = f;
-        fb->tail = f;
-    } else {
-        f->hprev = fb->tail;
-        f->hprev->hnext = f;
-        fb->tail = f;
-    }
+    f->next = fb->head;
+    fb->head = f;
     FLOWLOCK_WRLOCK(f);
     FBLOCK_UNLOCK(fb);
-
     return f;
 }
 
@@ -796,7 +926,7 @@ Flow *FlowGetExistingFlowFromHash(FlowKey *key, const uint32_t hash)
     /* see if this is the flow we are looking for */
     if (FlowCompareKey(f, key) == 0) {
         while (f) {
-            f = f->hnext;
+            f = f->next;
 
             if (f == NULL) {
                 FBLOCK_UNLOCK(fb);
@@ -822,6 +952,64 @@ Flow *FlowGetExistingFlowFromHash(FlowKey *key, const uint32_t hash)
 
 #define FLOW_GET_NEW_TRIES 5
 
+/* inline locking wrappers to make profiling easier */
+
+static inline int GetUsedTryLockBucket(FlowBucket *fb)
+{
+    int r = FBLOCK_TRYLOCK(fb);
+    return r;
+}
+static inline int GetUsedTryLockFlow(Flow *f)
+{
+    int r = FLOWLOCK_TRYWRLOCK(f);
+    return r;
+}
+static inline uint32_t GetUsedAtomicUpdate(const uint32_t val)
+{
+    uint32_t r =  SC_ATOMIC_ADD(flow_prune_idx, val);
+    return r;
+}
+
+/** \internal
+ *  \brief check if flow has just seen an update.
+ */
+static inline bool StillAlive(const Flow *f, const struct timeval *ts)
+{
+    switch (f->flow_state) {
+        case FLOW_STATE_NEW:
+            if (ts->tv_sec - f->lastts.tv_sec <= 1) {
+                return true;
+            }
+            break;
+        case FLOW_STATE_ESTABLISHED:
+            if (ts->tv_sec - f->lastts.tv_sec <= 5) {
+                return true;
+            }
+            break;
+        case FLOW_STATE_CLOSED:
+            if (ts->tv_sec - f->lastts.tv_sec <= 3) {
+                return true;
+            }
+            break;
+        default:
+            if (ts->tv_sec - f->lastts.tv_sec < 30) {
+                return true;
+            }
+            break;
+    }
+    return false;
+}
+
+#ifdef UNITTESTS
+    #define STATSADDUI64(cnt, value) \
+        if (tv && dtv) { \
+            StatsAddUI64(tv, dtv->cnt, (value)); \
+        }
+#else
+    #define STATSADDUI64(cnt, value) \
+        StatsAddUI64(tv, dtv->cnt, (value));
+#endif
+
 /** \internal
  *  \brief Get a flow from the hash directly.
  *
@@ -837,75 +1025,65 @@ Flow *FlowGetExistingFlowFromHash(FlowKey *key, const uint32_t hash)
  *
  *  \retval f flow or NULL
  */
-static Flow *FlowGetUsedFlow(ThreadVars *tv, DecodeThreadVars *dtv)
+static Flow *FlowGetUsedFlow(ThreadVars *tv, DecodeThreadVars *dtv, const struct timeval *ts)
 {
-    uint32_t idx = SC_ATOMIC_ADD(flow_prune_idx, FLOW_GET_NEW_TRIES) % flow_config.hash_size;
-    uint32_t cnt = flow_config.hash_size;
+    uint32_t idx = GetUsedAtomicUpdate(FLOW_GET_NEW_TRIES) % flow_config.hash_size;
     uint32_t tried = 0;
 
-    while (cnt--) {
-        if (tried++ > FLOW_GET_NEW_TRIES)
+    while (1) {
+        if (tried++ > FLOW_GET_NEW_TRIES) {
+            STATSADDUI64(counter_flow_get_used_eval, tried);
             break;
-
+        }
         if (++idx >= flow_config.hash_size)
             idx = 0;
 
         FlowBucket *fb = &flow_hash[idx];
 
-        if (FBLOCK_TRYLOCK(fb) != 0)
+        if (SC_ATOMIC_GET(fb->next_ts) == INT_MAX)
             continue;
 
-        Flow *f = fb->tail;
+        if (GetUsedTryLockBucket(fb) != 0) {
+            STATSADDUI64(counter_flow_get_used_eval_busy, 1);
+            continue;
+        }
+
+        Flow *f = fb->head;
         if (f == NULL) {
             FBLOCK_UNLOCK(fb);
             continue;
         }
 
-        if (FLOWLOCK_TRYWRLOCK(f) != 0) {
+        if (GetUsedTryLockFlow(f) != 0) {
+            STATSADDUI64(counter_flow_get_used_eval_busy, 1);
             FBLOCK_UNLOCK(fb);
             continue;
         }
 
         /** never prune a flow that is used by a packet or stream msg
          *  we are currently processing in one of the threads */
-        if (SC_ATOMIC_GET(f->use_cnt) > 0) {
+        if (f->use_cnt > 0) {
+            STATSADDUI64(counter_flow_get_used_eval_busy, 1);
+            FBLOCK_UNLOCK(fb);
+            FLOWLOCK_UNLOCK(f);
+            continue;
+        }
+
+        if (StillAlive(f, ts)) {
+            STATSADDUI64(counter_flow_get_used_eval_reject, 1);
             FBLOCK_UNLOCK(fb);
             FLOWLOCK_UNLOCK(f);
             continue;
         }
 
         /* remove from the hash */
-        if (f->hprev != NULL)
-            f->hprev->hnext = f->hnext;
-        if (f->hnext != NULL)
-            f->hnext->hprev = f->hprev;
-        if (fb->head == f)
-            fb->head = f->hnext;
-        if (fb->tail == f)
-            fb->tail = f->hprev;
-
-        f->hnext = NULL;
-        f->hprev = NULL;
+        fb->head = f->next;
+        f->next = NULL;
         f->fb = NULL;
-        SC_ATOMIC_SET(fb->next_ts, 0);
         FBLOCK_UNLOCK(fb);
 
-        int state = SC_ATOMIC_GET(f->flow_state);
-        if (state == FLOW_STATE_NEW)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_NEW;
-        else if (state == FLOW_STATE_ESTABLISHED)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_ESTABLISHED;
-        else if (state == FLOW_STATE_CLOSED)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_CLOSED;
-#ifdef CAPTURE_OFFLOAD
-        else if (state == FLOW_STATE_CAPTURE_BYPASSED)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_BYPASSED;
-#endif
-        else if (state == FLOW_STATE_LOCAL_BYPASSED)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_BYPASSED;
-
+        /* rest of the flags is updated on-demand in output */
         f->flow_end_flags |= FLOW_END_FLAG_FORCED;
-
         if (SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY)
             f->flow_end_flags |= FLOW_END_FLAG_EMERGENCY;
 
@@ -915,9 +1093,12 @@ static Flow *FlowGetUsedFlow(ThreadVars *tv, DecodeThreadVars *dtv)
 
         FlowClearMemory(f, f->protomap);
 
-        FLOWLOCK_UNLOCK(f);
+        /* leave locked */
+
+        STATSADDUI64(counter_flow_get_used_eval, tried);
         return f;
     }
 
+    STATSADDUI64(counter_flow_get_used_failed, 1);
     return NULL;
 }
index 2b3c6e51cfaf9f4477456306fa5eccc606823832..e1026ffa310a440dbf74f77a6bef0aa53afe7b4f 100644 (file)
  * the same hashkey (the hash is a chained hash). When doing modifications
  * to the list, the entire bucket is locked. */
 typedef struct FlowBucket_ {
+    /** head of the list of active flows for this row. */
     Flow *head;
-    Flow *tail;
+    /** head of the list of evicted flows for this row. Waiting to be
+     *  collected by the Flow Manager. */
+    Flow *evicted;
 #ifdef FBLOCK_MUTEX
     SCMutex m;
 #elif defined FBLOCK_SPIN
@@ -74,11 +77,28 @@ typedef struct FlowBucket_ {
 
 /* prototypes */
 
-Flow *FlowGetFlowFromHash(ThreadVars *tv, DecodeThreadVars *dtv, const Packet *, Flow **);
+Flow *FlowGetFlowFromHash(ThreadVars *tv, FlowLookupStruct *tctx,
+        const Packet *, Flow **);
 
 Flow *FlowGetFromFlowKey(FlowKey *key, struct timespec *ttime, const uint32_t hash);
 Flow *FlowGetExistingFlowFromHash(FlowKey * key, uint32_t hash);
 uint32_t FlowKeyGetHash(FlowKey *flow_key);
 
+/** \note f->fb must be locked */
+static inline void RemoveFromHash(Flow *f, Flow *prev_f)
+{
+    FlowBucket *fb = f->fb;
+
+    /* remove from the hash */
+    if (prev_f != NULL) {
+        prev_f->next = f->next;
+    } else {
+        fb->head = f->next;
+    }
+
+    f->next = NULL;
+    f->fb = NULL;
+}
+
 #endif /* __FLOW_HASH_H__ */
 
index 21f3dcf9ca912d0fcbab316145ff5e9abd400397..905f57ed7d4b07649b7db57b0bbf6d2997bc034b 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2013 Open Information Security Foundation
+/* Copyright (C) 2007-2020 Open Information Security Foundation
  *
  * You can copy, redistribute or modify this Program under the terms of
  * the GNU General Public License version 2 as published by the Free
@@ -42,6 +42,7 @@
 #include "flow-timeout.h"
 #include "flow-manager.h"
 #include "flow-storage.h"
+#include "flow-spare-pool.h"
 
 #include "stream-tcp-private.h"
 #include "stream-tcp-reassemble.h"
@@ -83,15 +84,9 @@ SC_ATOMIC_DECLARE(uint32_t, flowmgr_cnt);
 static uint32_t flowrec_number = 1;
 /* atomic counter for flow recyclers, to assign instance id */
 SC_ATOMIC_DECLARE(uint32_t, flowrec_cnt);
-
+SC_ATOMIC_DECLARE(uint32_t, flowrec_busy);
 SC_ATOMIC_EXTERN(unsigned int, flow_flags);
 
-SCCtrlCondT flow_manager_ctrl_cond;
-SCCtrlMutex flow_manager_ctrl_mutex;
-
-SCCtrlCondT flow_recycler_ctrl_cond;
-SCCtrlMutex flow_recycler_ctrl_mutex;
-
 void FlowTimeoutsInit(void)
 {
     SC_ATOMIC_SET(flow_timeouts, flow_timeouts_normal);
@@ -115,13 +110,6 @@ typedef struct FlowTimeoutCounters_ {
     uint32_t est;
     uint32_t clo;
     uint32_t byp;
-    uint32_t tcp_reuse;
-
-    uint32_t flows_checked;
-    uint32_t flows_notimeout;
-    uint32_t flows_timeout;
-    uint32_t flows_timeout_inuse;
-    uint32_t flows_removed;
 
     uint32_t rows_checked;
     uint32_t rows_skipped;
@@ -129,6 +117,14 @@ typedef struct FlowTimeoutCounters_ {
     uint32_t rows_busy;
     uint32_t rows_maxlen;
 
+    uint32_t flows_checked;
+    uint32_t flows_notimeout;
+    uint32_t flows_timeout;
+    uint32_t flows_timeout_inuse;
+    uint32_t flows_removed;
+    uint32_t flows_aside;
+    uint32_t flows_aside_needs_work;
+
     uint32_t bypassed_count;
     uint64_t bypassed_pkts;
     uint64_t bypassed_bytes;
@@ -142,11 +138,6 @@ typedef struct FlowTimeoutCounters_ {
  */
 void FlowDisableFlowManagerThread(void)
 {
-    /* wake up threads */
-    uint32_t u;
-    for (u = 0; u < flowmgr_number; u++)
-        SCCtrlCondSignal(&flow_manager_ctrl_cond);
-
     SCMutexLock(&tv_root_lock);
     /* flow manager thread(s) is/are a part of mgmt threads */
     for (ThreadVars *tv = tv_root[TVT_MGMT]; tv != NULL; tv = tv->next) {
@@ -198,13 +189,13 @@ again:
  *  \retval 0 not timed out
  *  \retval 1 timed out
  */
-static int FlowManagerFlowTimeout(Flow *f, enum FlowState state, struct timeval *ts, int32_t *next_ts)
+static int FlowManagerFlowTimeout(Flow *f, struct timeval *ts, int32_t *next_ts, const bool emerg)
 {
-    /* set the timeout value according to the flow operating mode,
-     * flow's state and protocol.*/
-    uint32_t timeout = FlowGetFlowTimeout(f, state);
-
-    int32_t flow_times_out_at = (int32_t)(f->lastts.tv_sec + timeout);
+    int32_t flow_times_out_at = f->timeout_at;
+    if (emerg) {
+        extern FlowProtoTimeout flow_timeouts_delta[FLOW_PROTO_MAX];
+        flow_times_out_at -= FlowGetFlowTimeoutDirect(flow_timeouts_delta, f->flow_state, f->protomap);
+    }
     if (*next_ts == 0 || flow_times_out_at < *next_ts)
         *next_ts = flow_times_out_at;
 
@@ -220,7 +211,7 @@ static inline int FlowBypassedTimeout(Flow *f, struct timeval *ts,
                                       FlowTimeoutCounters *counters)
 {
 #ifdef CAPTURE_OFFLOAD
-    if (SC_ATOMIC_GET(f->flow_state) != FLOW_STATE_CAPTURE_BYPASSED) {
+    if (f->flow_state != FLOW_STATE_CAPTURE_BYPASSED) {
         return 1;
     }
 
@@ -272,12 +263,13 @@ static inline int FlowBypassedTimeout(Flow *f, struct timeval *ts,
  *  \retval 0 not timed out just yet
  *  \retval 1 fully timed out, lets kill it
  */
+#if 0
 static inline int FlowManagerFlowTimedOut(Flow *f, struct timeval *ts,
                                    FlowTimeoutCounters *counters)
 {
     /* never prune a flow that is used by a packet we
      * are currently processing in one of the threads */
-    if (SC_ATOMIC_GET(f->use_cnt) > 0) {
+    if (f->use_cnt > 0) {
         return 0;
     }
 
@@ -289,20 +281,76 @@ static inline int FlowManagerFlowTimedOut(Flow *f, struct timeval *ts,
 
     if (!(f->flags & FLOW_TIMEOUT_REASSEMBLY_DONE) &&
 #ifdef CAPTURE_OFFLOAD
-            SC_ATOMIC_GET(f->flow_state) != FLOW_STATE_CAPTURE_BYPASSED &&
+            f->flow_state != FLOW_STATE_CAPTURE_BYPASSED &&
 #endif
-            SC_ATOMIC_GET(f->flow_state) != FLOW_STATE_LOCAL_BYPASSED &&
+            f->flow_state != FLOW_STATE_LOCAL_BYPASSED &&
             FlowForceReassemblyNeedReassembly(f, &server, &client) == 1) {
         FlowForceReassemblyForFlow(f, server, client);
         return 0;
     }
 #ifdef DEBUG
     /* this should not be possible */
-    BUG_ON(SC_ATOMIC_GET(f->use_cnt) > 0);
+    BUG_ON(f->use_cnt > 0);
 #endif
 
     return 1;
 }
+#endif
+
+static inline int FMTryLockBucket(FlowBucket *fb)
+{
+    int r = FBLOCK_TRYLOCK(fb);
+    return r;
+}
+static inline void FMFlowLock(Flow *f)
+{
+    FLOWLOCK_WRLOCK(f);
+}
+
+typedef struct FlowManagerTimeoutThread {
+    /* used to temporarily store flows that have timed out and are
+     * removed from the hash */
+    FlowQueuePrivate aside_queue;
+} FlowManagerTimeoutThread;
+
+static uint32_t ProcessAsideQueue(FlowManagerTimeoutThread *td, FlowTimeoutCounters *counters)
+{
+    FlowQueuePrivate recycle = { NULL, NULL, 0 };
+    counters->flows_aside += td->aside_queue.len;
+
+    uint32_t cnt = 0;
+    Flow *f;
+    while ((f = FlowQueuePrivateGetFromTop(&td->aside_queue)) != NULL) {
+        /* flow is still locked */
+
+        if (f->proto == IPPROTO_TCP &&
+                !(f->flags & FLOW_TIMEOUT_REASSEMBLY_DONE) &&
+#ifdef CAPTURE_OFFLOAD
+                f->flow_state != FLOW_STATE_CAPTURE_BYPASSED &&
+#endif
+                f->flow_state != FLOW_STATE_LOCAL_BYPASSED &&
+                FlowForceReassemblyNeedReassembly(f) == 1)
+        {
+            FlowForceReassemblyForFlow(f); // TODO error check?
+            /* flow ownership is passed to the worker thread */
+
+            /* flow remains locked */
+            counters->flows_aside_needs_work++;
+            continue;
+        }
+        FLOWLOCK_UNLOCK(f);
+
+        FlowQueuePrivateAppendFlow(&recycle, f);
+        if (recycle.len == 100) {
+            FlowQueueAppendPrivate(&flow_recycle_q, &recycle);
+        }
+        cnt++;
+    }
+    if (recycle.len) {
+        FlowQueueAppendPrivate(&flow_recycle_q, &recycle);
+    }
+    return cnt;
+}
 
 /**
  *  \internal
@@ -313,14 +361,13 @@ static inline int FlowManagerFlowTimedOut(Flow *f, struct timeval *ts,
  *  \param ts timestamp
  *  \param emergency bool indicating emergency mode
  *  \param counters ptr to FlowTimeoutCounters structure
- *
- *  \retval cnt timed out flows
  */
-static uint32_t FlowManagerHashRowTimeout(Flow *f, struct timeval *ts,
+static void FlowManagerHashRowTimeout(FlowManagerTimeoutThread *td,
+        Flow *f, struct timeval *ts,
         int emergency, FlowTimeoutCounters *counters, int32_t *next_ts)
 {
-    uint32_t cnt = 0;
     uint32_t checked = 0;
+    Flow *prev_f = NULL;
 
     do {
         checked++;
@@ -330,170 +377,179 @@ static uint32_t FlowManagerHashRowTimeout(Flow *f, struct timeval *ts,
          * can't disappear) and flow_state is atomic. lastts can only
          * be modified when we have both the flow and hash row lock */
 
-        enum FlowState state = SC_ATOMIC_GET(f->flow_state);
-
         /* timeout logic goes here */
-        if (FlowManagerFlowTimeout(f, state, ts, next_ts) == 0) {
+        if (FlowManagerFlowTimeout(f, ts, next_ts, emergency) == 0) {
 
             counters->flows_notimeout++;
 
-            f = f->hprev;
+            prev_f = f;
+            f = f->next;
             continue;
         }
 
-        /* before grabbing the flow lock, make sure we have at least
-         * 3 packets in the pool */
-        PacketPoolWaitForN(3);
-
-        FLOWLOCK_WRLOCK(f);
+        FMFlowLock(f); //FLOWLOCK_WRLOCK(f);
 
-        Flow *next_flow = f->hprev;
+        Flow *next_flow = f->next;
 
         counters->flows_timeout++;
 
-        /* check if the flow is fully timed out and
-         * ready to be discarded. */
-        if (FlowManagerFlowTimedOut(f, ts, counters) == 1) {
-            /* remove from the hash */
-            if (f->hprev != NULL)
-                f->hprev->hnext = f->hnext;
-            if (f->hnext != NULL)
-                f->hnext->hprev = f->hprev;
-            if (f->fb->head == f)
-                f->fb->head = f->hnext;
-            if (f->fb->tail == f)
-                f->fb->tail = f->hprev;
-
-            f->hnext = NULL;
-            f->hprev = NULL;
-
-            if (f->flags & FLOW_TCP_REUSED)
-                counters->tcp_reuse++;
-
-            if (state == FLOW_STATE_NEW)
-                f->flow_end_flags |= FLOW_END_FLAG_STATE_NEW;
-            else if (state == FLOW_STATE_ESTABLISHED)
-                f->flow_end_flags |= FLOW_END_FLAG_STATE_ESTABLISHED;
-            else if (state == FLOW_STATE_CLOSED)
-                f->flow_end_flags |= FLOW_END_FLAG_STATE_CLOSED;
-            else if (state == FLOW_STATE_LOCAL_BYPASSED)
-                f->flow_end_flags |= FLOW_END_FLAG_STATE_BYPASSED;
-#ifdef CAPTURE_OFFLOAD
-            else if (state == FLOW_STATE_CAPTURE_BYPASSED)
-                f->flow_end_flags |= FLOW_END_FLAG_STATE_BYPASSED;
-#endif
-
-            if (emergency)
-                f->flow_end_flags |= FLOW_END_FLAG_EMERGENCY;
-            f->flow_end_flags |= FLOW_END_FLAG_TIMEOUT;
-
-            /* no one is referring to this flow, use_cnt 0, removed from hash
-             * so we can unlock it and pass it to the flow recycler */
+        /* never prune a flow that is used by a packet we
+         * are currently processing in one of the threads */
+        if (f->use_cnt > 0 || !FlowBypassedTimeout(f, ts, counters)) {
             FLOWLOCK_UNLOCK(f);
-            FlowEnqueue(&flow_recycle_q, f);
-
-            cnt++;
-
-            switch (state) {
-                case FLOW_STATE_NEW:
-                default:
-                    counters->new++;
-                    break;
-                case FLOW_STATE_ESTABLISHED:
-                    counters->est++;
-                    break;
-                case FLOW_STATE_CLOSED:
-                    counters->clo++;
-                    break;
-                case FLOW_STATE_LOCAL_BYPASSED:
-#ifdef CAPTURE_OFFLOAD
-                case FLOW_STATE_CAPTURE_BYPASSED:
-#endif
-                    counters->byp++;
-                    break;
-            }
-            counters->flows_removed++;
-        } else {
+            prev_f = f;
             counters->flows_timeout_inuse++;
-            FLOWLOCK_UNLOCK(f);
+            f = f->next;
+            continue;
         }
 
+        RemoveFromHash(f, prev_f);
+
+        FlowQueuePrivateAppendFlow(&td->aside_queue, f);
+        /* flow is still locked in the queue */
+
         f = next_flow;
     } while (f != NULL);
 
     counters->flows_checked += checked;
     if (checked > counters->rows_maxlen)
         counters->rows_maxlen = checked;
+}
 
-    return cnt;
+static void FlowManagerHashRowClearEvictedList(FlowManagerTimeoutThread *td,
+        Flow *f, struct timeval *ts, FlowTimeoutCounters *counters)
+{
+    do {
+        FLOWLOCK_WRLOCK(f);
+        Flow *next_flow = f->next;
+        f->next = NULL;
+        f->fb = NULL;
+
+        BUG_ON(f->use_cnt > 0 || !FlowBypassedTimeout(f, ts, counters));
+
+        FlowQueuePrivateAppendFlow(&td->aside_queue, f);
+        /* flow is still locked in the queue */
+
+        f = next_flow;
+    } while (f != NULL);
 }
 
 /**
  *  \brief time out flows from the hash
  *
  *  \param ts timestamp
- *  \param try_cnt number of flows to time out max (0 is unlimited)
  *  \param hash_min min hash index to consider
  *  \param hash_max max hash index to consider
  *  \param counters ptr to FlowTimeoutCounters structure
  *
  *  \retval cnt number of timed out flow
  */
-static uint32_t FlowTimeoutHash(struct timeval *ts, uint32_t try_cnt,
-        uint32_t hash_min, uint32_t hash_max,
+static uint32_t FlowTimeoutHash(FlowManagerTimeoutThread *td,
+        struct timeval *ts,
+        const uint32_t hash_min, const uint32_t hash_max,
         FlowTimeoutCounters *counters)
 {
-    uint32_t idx = 0;
     uint32_t cnt = 0;
-    int emergency = 0;
-
-    if (SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY)
-        emergency = 1;
-
-    for (idx = hash_min; idx < hash_max; idx++) {
-        FlowBucket *fb = &flow_hash[idx];
-
-        counters->rows_checked++;
+    const int emergency = ((SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY));
+    const uint32_t rows_checked = hash_max - hash_min;
+    uint32_t rows_skipped = 0;
+    uint32_t rows_busy = 0;
+    uint32_t rows_empty = 0;
+
+#if __WORDSIZE==64
+#define BITS 64
+#define TYPE uint64_t
+#else
+#define BITS 32
+#define TYPE uint32_t
+#endif
 
-        int32_t check_ts = SC_ATOMIC_GET(fb->next_ts);
-        if (check_ts > (int32_t)ts->tv_sec) {
-            counters->rows_skipped++;
-            continue;
+    for (uint32_t idx = hash_min; idx < hash_max; idx+=BITS) {
+        TYPE check_bits = 0;
+        const uint32_t check = MIN(BITS, (hash_max - idx));
+        for (uint32_t i = 0; i < check; i++) {
+            FlowBucket *fb = &flow_hash[idx+i];
+            check_bits |= (TYPE)(SC_ATOMIC_LOAD_EXPLICIT(fb->next_ts, SC_ATOMIC_MEMORY_ORDER_RELAXED) <= (int32_t)ts->tv_sec) << (TYPE)i;
         }
-
-        /* before grabbing the row lock, make sure we have at least
-         * 9 packets in the pool */
-        PacketPoolWaitForN(9);
-
-        if (FBLOCK_TRYLOCK(fb) != 0) {
-            counters->rows_busy++;
+        if (check_bits == 0)
             continue;
-        }
 
-        /* flow hash bucket is now locked */
-
-        if (fb->tail == NULL) {
-            SC_ATOMIC_SET(fb->next_ts, INT_MAX);
-            counters->rows_empty++;
-            goto next;
+        for (uint32_t i = 0; i < check; i++) {
+            FlowBucket *fb = &flow_hash[idx+i];
+            if ((check_bits & ((TYPE)1 << (TYPE)i)) != 0 && SC_ATOMIC_GET(fb->next_ts) <= (int32_t)ts->tv_sec) {
+                if (FMTryLockBucket(fb) == 0) {
+                    Flow *evicted = NULL;
+                    if (fb->evicted != NULL || fb->head != NULL) {
+                        /* if evicted is set, we only process that list right now.
+                         * Since its set we've had traffic that touched this row
+                         * very recently, and there is a good chance more of it will
+                         * come in in the near future. So unlock the row asap and leave
+                         * the possible eviction of flows to the packet lookup path. */
+                        if (fb->evicted != NULL) {
+                            /* transfer out of bucket so we can do additional work outside
+                             * of the bucket lock */
+                            evicted = fb->evicted;
+                            fb->evicted = NULL;
+                        } else if (fb->head != NULL) {
+                            int32_t next_ts = 0;
+                            FlowManagerHashRowTimeout(td,
+                                    fb->head, ts, emergency, counters, &next_ts);
+
+                            if (SC_ATOMIC_GET(fb->next_ts) != next_ts)
+                                SC_ATOMIC_SET(fb->next_ts, next_ts);
+                        }
+                        if (fb->evicted == NULL && fb->head == NULL) {
+                            SC_ATOMIC_SET(fb->next_ts, INT_MAX);
+                        } else if (fb->evicted != NULL && fb->head == NULL) {
+                            SC_ATOMIC_SET(fb->next_ts, 0);
+                        }
+                    } else {
+                        SC_ATOMIC_SET(fb->next_ts, INT_MAX);
+                        rows_empty++;
+                    }
+                    FBLOCK_UNLOCK(fb);
+                    /* processed evicted list */
+                    if (evicted) {
+                        FlowManagerHashRowClearEvictedList(td, evicted, ts, counters);
+                    }
+                } else {
+                    rows_busy++;
+                }
+            } else {
+                rows_skipped++;
+            }
         }
-
-        int32_t next_ts = 0;
-
-        /* we have a flow, or more than one */
-        cnt += FlowManagerHashRowTimeout(fb->tail, ts, emergency, counters, &next_ts);
-
-        if (SC_ATOMIC_GET(fb->next_ts) != next_ts) {
-            SC_ATOMIC_SET(fb->next_ts, next_ts);
+        if (td->aside_queue.len) {
+            cnt += ProcessAsideQueue(td, counters);
         }
+    }
 
-next:
-        FBLOCK_UNLOCK(fb);
+    counters->rows_checked += rows_checked;
+    counters->rows_skipped += rows_skipped;
+    counters->rows_busy += rows_busy;
+    counters->rows_empty += rows_empty;
 
-        if (try_cnt > 0 && cnt >= try_cnt)
-            break;
+    if (td->aside_queue.len) {
+        cnt += ProcessAsideQueue(td, counters);
     }
+    counters->flows_removed += cnt;
+    return cnt;
+}
 
+static uint32_t FlowTimeoutHashInChunks(FlowManagerTimeoutThread *td,
+        struct timeval *ts,
+        const uint32_t hash_min, const uint32_t hash_max,
+        FlowTimeoutCounters *counters, uint32_t iter, const uint32_t chunks)
+{
+    const uint32_t rows = hash_max - hash_min;
+    const uint32_t chunk_size = rows / chunks;
+
+    const uint32_t min = iter * chunk_size + hash_min;
+    uint32_t max = min + chunk_size;
+    if (iter + 1 == chunks) {
+        max = rows;
+    }
+    const uint32_t cnt = FlowTimeoutHash(td, ts, min, max, counters);
     return cnt;
 }
 
@@ -506,44 +562,30 @@ next:
  *
  *  \retval cnt removed out flows
  */
-static uint32_t FlowManagerHashRowCleanup(Flow *f)
+static uint32_t FlowManagerHashRowCleanup(Flow *f, FlowQueuePrivate *recycle_q, const int mode)
 {
     uint32_t cnt = 0;
 
     do {
         FLOWLOCK_WRLOCK(f);
 
-        Flow *next_flow = f->hprev;
-
-        int state = SC_ATOMIC_GET(f->flow_state);
+        Flow *next_flow = f->next;
 
         /* remove from the hash */
-        if (f->hprev != NULL)
-            f->hprev->hnext = f->hnext;
-        if (f->hnext != NULL)
-            f->hnext->hprev = f->hprev;
-        if (f->fb->head == f)
-            f->fb->head = f->hnext;
-        if (f->fb->tail == f)
-            f->fb->tail = f->hprev;
-
-        f->hnext = NULL;
-        f->hprev = NULL;
-
-        if (state == FLOW_STATE_NEW)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_NEW;
-        else if (state == FLOW_STATE_ESTABLISHED)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_ESTABLISHED;
-        else if (state == FLOW_STATE_CLOSED)
-            f->flow_end_flags |= FLOW_END_FLAG_STATE_CLOSED;
-
+        if (mode == 0) {
+            RemoveFromHash(f, NULL);
+        } else {
+            FlowBucket *fb = f->fb;
+            fb->evicted = f->next;
+            f->next = NULL;
+            f->fb = NULL;
+        }
         f->flow_end_flags |= FLOW_END_FLAG_SHUTDOWN;
 
         /* no one is referring to this flow, use_cnt 0, removed from hash
          * so we can unlock it and move it to the recycle queue. */
         FLOWLOCK_UNLOCK(f);
-
-        FlowEnqueue(&flow_recycle_q, f);
+        FlowQueuePrivateAppendFlow(recycle_q, f);
 
         cnt++;
 
@@ -560,6 +602,7 @@ static uint32_t FlowManagerHashRowCleanup(Flow *f)
  */
 static uint32_t FlowCleanupHash(void)
 {
+    FlowQueuePrivate local_queue = { NULL, NULL, 0 };
     uint32_t cnt = 0;
 
     for (uint32_t idx = 0; idx < flow_config.hash_size; idx++) {
@@ -567,24 +610,45 @@ static uint32_t FlowCleanupHash(void)
 
         FBLOCK_LOCK(fb);
 
-        if (fb->tail != NULL) {
+        if (fb->head != NULL) {
             /* we have a flow, or more than one */
-            cnt += FlowManagerHashRowCleanup(fb->tail);
+            cnt += FlowManagerHashRowCleanup(fb->head, &local_queue, 0);
+        }
+        if (fb->evicted != NULL) {
+            /* we have a flow, or more than one */
+            cnt += FlowManagerHashRowCleanup(fb->evicted, &local_queue, 1);
         }
 
         FBLOCK_UNLOCK(fb);
+        if (local_queue.len >= 25) {
+            FlowQueueAppendPrivate(&flow_recycle_q, &local_queue);
+        }
     }
+    FlowQueueAppendPrivate(&flow_recycle_q, &local_queue);
 
     return cnt;
 }
 
-extern int g_detect_disabled;
+static void Recycler(ThreadVars *tv, void *output_thread_data, Flow *f)
+{
+    FLOWLOCK_WRLOCK(f);
 
-typedef struct FlowManagerThreadData_ {
-    uint32_t instance;
-    uint32_t min;
-    uint32_t max;
+    (void)OutputFlowLog(tv, output_thread_data, f);
+
+    FlowClearMemory (f, f->protomap);
+    FLOWLOCK_UNLOCK(f);
+    FlowSparePoolReturnFlow(f);
+}
+
+typedef struct FlowQueueTimeoutCounters {
+    uint32_t flows_removed;
+    uint32_t flows_timeout;
+} FlowQueueTimeoutCounters;
 
+extern int g_detect_disabled;
+
+typedef struct FlowCounters_ {
+    uint16_t flow_mgr_full_pass;
     uint16_t flow_mgr_cnt_clo;
     uint16_t flow_mgr_cnt_new;
     uint16_t flow_mgr_cnt_est;
@@ -592,26 +656,55 @@ typedef struct FlowManagerThreadData_ {
     uint16_t flow_mgr_spare;
     uint16_t flow_emerg_mode_enter;
     uint16_t flow_emerg_mode_over;
-    uint16_t flow_tcp_reuse;
 
     uint16_t flow_mgr_flows_checked;
     uint16_t flow_mgr_flows_notimeout;
     uint16_t flow_mgr_flows_timeout;
     uint16_t flow_mgr_flows_timeout_inuse;
-    uint16_t flow_mgr_flows_removed;
+    uint16_t flow_mgr_flows_aside;
+    uint16_t flow_mgr_flows_aside_needs_work;
 
-    uint16_t flow_mgr_rows_checked;
-    uint16_t flow_mgr_rows_skipped;
-    uint16_t flow_mgr_rows_empty;
-    uint16_t flow_mgr_rows_busy;
     uint16_t flow_mgr_rows_maxlen;
 
     uint16_t flow_bypassed_cnt_clo;
     uint16_t flow_bypassed_pkts;
     uint16_t flow_bypassed_bytes;
+} FlowCounters;
+
+typedef struct FlowManagerThreadData_ {
+    uint32_t instance;
+    uint32_t min;
+    uint32_t max;
+
+    FlowCounters cnt;
 
+    FlowManagerTimeoutThread timeout;
 } FlowManagerThreadData;
 
+static void FlowCountersInit(ThreadVars *t, FlowCounters *fc)
+{
+    fc->flow_mgr_full_pass = StatsRegisterCounter("flow.mgr.full_hash_pass", t);
+    fc->flow_mgr_cnt_clo = StatsRegisterCounter("flow.mgr.closed_pruned", t);
+    fc->flow_mgr_cnt_new = StatsRegisterCounter("flow.mgr.new_pruned", t);
+    fc->flow_mgr_cnt_est = StatsRegisterCounter("flow.mgr.est_pruned", t);
+    fc->flow_mgr_cnt_byp = StatsRegisterCounter("flow.mgr.bypassed_pruned", t);
+    fc->flow_mgr_spare = StatsRegisterCounter("flow.spare", t);
+    fc->flow_emerg_mode_enter = StatsRegisterCounter("flow.emerg_mode_entered", t);
+    fc->flow_emerg_mode_over = StatsRegisterCounter("flow.emerg_mode_over", t);
+
+    fc->flow_mgr_rows_maxlen = StatsRegisterMaxCounter("flow.mgr.rows_maxlen", t);
+    fc->flow_mgr_flows_checked = StatsRegisterCounter("flow.mgr.flows_checked", t);
+    fc->flow_mgr_flows_notimeout = StatsRegisterCounter("flow.mgr.flows_notimeout", t);
+    fc->flow_mgr_flows_timeout = StatsRegisterCounter("flow.mgr.flows_timeout", t);
+    fc->flow_mgr_flows_timeout_inuse = StatsRegisterCounter("flow.mgr.flows_timeout_inuse", t);
+    fc->flow_mgr_flows_aside = StatsRegisterCounter("flow.mgr.flows_evicted", t);
+    fc->flow_mgr_flows_aside_needs_work = StatsRegisterCounter("flow.mgr.flows_evicted_needs_work", t);
+
+    fc->flow_bypassed_cnt_clo = StatsRegisterCounter("flow_bypassed.closed", t);
+    fc->flow_bypassed_pkts = StatsRegisterCounter("flow_bypassed.pkts", t);
+    fc->flow_bypassed_bytes = StatsRegisterCounter("flow_bypassed.bytes", t);
+}
+
 static TmEcode FlowManagerThreadInit(ThreadVars *t, const void *initdata, void **data)
 {
     FlowManagerThreadData *ftd = SCCalloc(1, sizeof(FlowManagerThreadData));
@@ -627,10 +720,10 @@ static TmEcode FlowManagerThreadInit(ThreadVars *t, const void *initdata, void *
     if (ftd->instance == 0)
         ftd->max = range;
     else if ((ftd->instance + 1) == flowmgr_number) {
-        ftd->min = (range * (ftd->instance - 1));
+        ftd->min = (range * ftd->instance) + 1;
         ftd->max = flow_config.hash_size;
     } else {
-        ftd->min = (range * (ftd->instance - 1));
+        ftd->min = (range * ftd->instance) + 1;
         ftd->max = (range * ftd->instance);
     }
     BUG_ON(ftd->min > flow_config.hash_size || ftd->max > flow_config.hash_size);
@@ -640,30 +733,7 @@ static TmEcode FlowManagerThreadInit(ThreadVars *t, const void *initdata, void *
     /* pass thread data back to caller */
     *data = ftd;
 
-    ftd->flow_mgr_cnt_clo = StatsRegisterCounter("flow_mgr.closed_pruned", t);
-    ftd->flow_mgr_cnt_new = StatsRegisterCounter("flow_mgr.new_pruned", t);
-    ftd->flow_mgr_cnt_est = StatsRegisterCounter("flow_mgr.est_pruned", t);
-    ftd->flow_mgr_cnt_byp = StatsRegisterCounter("flow_mgr.bypassed_pruned", t);
-    ftd->flow_mgr_spare = StatsRegisterCounter("flow.spare", t);
-    ftd->flow_emerg_mode_enter = StatsRegisterCounter("flow.emerg_mode_entered", t);
-    ftd->flow_emerg_mode_over = StatsRegisterCounter("flow.emerg_mode_over", t);
-    ftd->flow_tcp_reuse = StatsRegisterCounter("flow.tcp_reuse", t);
-
-    ftd->flow_mgr_flows_checked = StatsRegisterCounter("flow_mgr.flows_checked", t);
-    ftd->flow_mgr_flows_notimeout = StatsRegisterCounter("flow_mgr.flows_notimeout", t);
-    ftd->flow_mgr_flows_timeout = StatsRegisterCounter("flow_mgr.flows_timeout", t);
-    ftd->flow_mgr_flows_timeout_inuse = StatsRegisterCounter("flow_mgr.flows_timeout_inuse", t);
-    ftd->flow_mgr_flows_removed = StatsRegisterCounter("flow_mgr.flows_removed", t);
-
-    ftd->flow_mgr_rows_checked = StatsRegisterCounter("flow_mgr.rows_checked", t);
-    ftd->flow_mgr_rows_skipped = StatsRegisterCounter("flow_mgr.rows_skipped", t);
-    ftd->flow_mgr_rows_empty = StatsRegisterCounter("flow_mgr.rows_empty", t);
-    ftd->flow_mgr_rows_busy = StatsRegisterCounter("flow_mgr.rows_busy", t);
-    ftd->flow_mgr_rows_maxlen = StatsRegisterCounter("flow_mgr.rows_maxlen", t);
-
-    ftd->flow_bypassed_cnt_clo = StatsRegisterCounter("flow_bypassed.closed", t);
-    ftd->flow_bypassed_pkts = StatsRegisterCounter("flow_bypassed.pkts", t);
-    ftd->flow_bypassed_bytes = StatsRegisterCounter("flow_bypassed.bytes", t);
+    FlowCountersInit(t, &ftd->cnt);
 
     PacketPoolInit();
     return TM_ECODE_OK;
@@ -676,6 +746,25 @@ static TmEcode FlowManagerThreadDeinit(ThreadVars *t, void *data)
     return TM_ECODE_OK;
 }
 
+static uint32_t FlowTimeoutsMin(void)
+{
+    FlowProtoTimeoutPtr t = SC_ATOMIC_GET(flow_timeouts);
+    uint32_t m = -1;
+    for (unsigned int i = 0; i < FLOW_PROTO_MAX; i++) {
+        m = MIN(m, t[i].new_timeout);
+        m = MIN(m, t[i].est_timeout);
+
+        if (i == FLOW_PROTO_TCP) {
+            m = MIN(m, t[i].closed_timeout);
+        }
+        if (i == FLOW_PROTO_TCP || i == FLOW_PROTO_UDP) {
+            m = MIN(m, t[i].bypassed_timeout);
+        }
+    }
+    return m;
+}
+
+//#define FM_PROFILE
 
 /** \brief Thread that manages the flow table and times out flows.
  *
@@ -688,12 +777,10 @@ static TmEcode FlowManager(ThreadVars *th_v, void *thread_data)
     FlowManagerThreadData *ftd = thread_data;
     struct timeval ts;
     uint32_t established_cnt = 0, new_cnt = 0, closing_cnt = 0;
-    int emerg = FALSE;
-    int prev_emerg = FALSE;
-    struct timespec cond_time;
-    int flow_update_delay_sec = FLOW_NORMAL_MODE_UPDATE_DELAY_SEC;
-    int flow_update_delay_nsec = FLOW_NORMAL_MODE_UPDATE_DELAY_NSEC;
+    bool emerg = false;
+    bool prev_emerg = false;
     uint32_t other_last_sec = 0; /**< last sec stamp when defrag etc ran */
+    uint32_t flow_last_sec = 0;
 /* VJ leaving disabled for now, as hosts are only used by tags and the numbers
  * are really low. Might confuse ppl
     uint16_t flow_mgr_host_prune = StatsRegisterCounter("hosts.pruned", th_v);
@@ -701,6 +788,13 @@ static TmEcode FlowManager(ThreadVars *th_v, void *thread_data)
     uint16_t flow_mgr_host_spare = StatsRegisterCounter("hosts.spare", th_v);
 */
     memset(&ts, 0, sizeof(ts));
+    uint32_t hash_passes = 0;
+    uint32_t hash_row_checks = 0;
+    uint32_t hash_passes_chunks = 0;
+    uint32_t hash_full_passes = 0;
+
+    const uint32_t min_timeout = FlowTimeoutsMin();
+    const uint32_t pass_in_sec = min_timeout ? min_timeout * 8 : 60;
 
     /* don't start our activities until time is setup */
     while (!TimeModeIsReady()) {
@@ -708,135 +802,249 @@ static TmEcode FlowManager(ThreadVars *th_v, void *thread_data)
             return TM_ECODE_OK;
     }
 
+    SCLogNotice("FM %s/%d starting. min_timeout %us. Full hash pass in %us",
+            th_v->name, ftd->instance, min_timeout, pass_in_sec);
+
+#ifdef FM_PROFILE
+    struct timeval endts;
+    struct timeval active;
+    struct timeval paused;
+    struct timeval sleeping;
+    memset(&endts, 0, sizeof(endts));
+    memset(&active, 0, sizeof(active));
+    memset(&paused, 0, sizeof(paused));
+    memset(&sleeping, 0, sizeof(sleeping));
+#endif
+
+    struct timeval startts;
+    memset(&startts, 0, sizeof(startts));
+    gettimeofday(&startts, NULL);
+
+    uint32_t hash_pass_iter = 0;
+    uint32_t emerg_over_cnt = 0;
+    uint64_t next_run_ms = 0;
+
     while (1)
     {
         if (TmThreadsCheckFlag(th_v, THV_PAUSE)) {
             TmThreadsSetFlag(th_v, THV_PAUSED);
+#ifdef FM_PROFILE
+            struct timeval pause_startts;
+            memset(&pause_startts, 0, sizeof(pause_startts));
+            gettimeofday(&pause_startts, NULL);
+#endif
             TmThreadTestThreadUnPaused(th_v);
+#ifdef FM_PROFILE
+            struct timeval pause_endts;
+            memset(&pause_endts, 0, sizeof(pause_endts));
+            gettimeofday(&pause_endts, NULL);
+            struct timeval pause_time;
+            memset(&pause_time, 0, sizeof(pause_time));
+            timersub(&pause_endts, &pause_startts, &pause_time);
+            timeradd(&paused, &pause_time, &paused);
+#endif
             TmThreadsUnsetFlag(th_v, THV_PAUSED);
         }
 
         if (SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY) {
-            emerg = TRUE;
-
-            if (emerg == TRUE && prev_emerg == FALSE) {
-                prev_emerg = TRUE;
-
-                SCLogDebug("Flow emergency mode entered...");
-
-                StatsIncr(th_v, ftd->flow_emerg_mode_enter);
-            }
+            emerg = true;
         }
-
+#ifdef FM_PROFILE
+        struct timeval run_startts;
+        memset(&run_startts, 0, sizeof(run_startts));
+        gettimeofday(&run_startts, NULL);
+#endif
         /* Get the time */
         memset(&ts, 0, sizeof(ts));
         TimeGet(&ts);
         SCLogDebug("ts %" PRIdMAX "", (intmax_t)ts.tv_sec);
-
-        /* see if we still have enough spare flows */
-        if (ftd->instance == 0)
-            FlowUpdateSpareFlows();
-
-        /* try to time out flows */
-        FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-        FlowTimeoutHash(&ts, 0 /* check all */, ftd->min, ftd->max, &counters);
-
-
-        if (ftd->instance == 0 &&
-                (other_last_sec == 0 || other_last_sec < (uint32_t)ts.tv_sec)) {
-            DefragTimeoutHash(&ts);
-            //uint32_t hosts_pruned =
-            HostTimeoutHash(&ts);
-            IPPairTimeoutHash(&ts);
-            other_last_sec = (uint32_t)ts.tv_sec;
+        const uint64_t ts_ms = ts.tv_sec * 1000 + ts.tv_usec / 1000;
+        const uint32_t rt = (uint32_t)ts.tv_sec;
+        const bool emerge_p = (emerg && !prev_emerg);
+        if (emerge_p) {
+            next_run_ms = 0;
+            prev_emerg = true;
+            SCLogNotice("Flow emergency mode entered...");
+            StatsIncr(th_v, ftd->cnt.flow_emerg_mode_enter);
         }
-/*
-        StatsAddUI64(th_v, flow_mgr_host_prune, (uint64_t)hosts_pruned);
-        uint32_t hosts_active = HostGetActiveCount();
-        StatsSetUI64(th_v, flow_mgr_host_active, (uint64_t)hosts_active);
-        uint32_t hosts_spare = HostGetSpareCount();
-        StatsSetUI64(th_v, flow_mgr_host_spare, (uint64_t)hosts_spare);
-*/
-        StatsAddUI64(th_v, ftd->flow_mgr_cnt_clo, (uint64_t)counters.clo);
-        StatsAddUI64(th_v, ftd->flow_mgr_cnt_new, (uint64_t)counters.new);
-        StatsAddUI64(th_v, ftd->flow_mgr_cnt_est, (uint64_t)counters.est);
-        StatsAddUI64(th_v, ftd->flow_mgr_cnt_byp, (uint64_t)counters.byp);
-        StatsAddUI64(th_v, ftd->flow_tcp_reuse, (uint64_t)counters.tcp_reuse);
-
-        StatsSetUI64(th_v, ftd->flow_mgr_flows_checked, (uint64_t)counters.flows_checked);
-        StatsSetUI64(th_v, ftd->flow_mgr_flows_notimeout, (uint64_t)counters.flows_notimeout);
-        StatsSetUI64(th_v, ftd->flow_mgr_flows_timeout, (uint64_t)counters.flows_timeout);
-        StatsSetUI64(th_v, ftd->flow_mgr_flows_removed, (uint64_t)counters.flows_removed);
-        StatsSetUI64(th_v, ftd->flow_mgr_flows_timeout_inuse, (uint64_t)counters.flows_timeout_inuse);
-
-        StatsSetUI64(th_v, ftd->flow_mgr_rows_checked, (uint64_t)counters.rows_checked);
-        StatsSetUI64(th_v, ftd->flow_mgr_rows_skipped, (uint64_t)counters.rows_skipped);
-        StatsSetUI64(th_v, ftd->flow_mgr_rows_maxlen, (uint64_t)counters.rows_maxlen);
-        StatsSetUI64(th_v, ftd->flow_mgr_rows_busy, (uint64_t)counters.rows_busy);
-        StatsSetUI64(th_v, ftd->flow_mgr_rows_empty, (uint64_t)counters.rows_empty);
-
-        StatsAddUI64(th_v, ftd->flow_bypassed_cnt_clo, (uint64_t)counters.bypassed_count);
-        StatsAddUI64(th_v, ftd->flow_bypassed_pkts, (uint64_t)counters.bypassed_pkts);
-        StatsAddUI64(th_v, ftd->flow_bypassed_bytes, (uint64_t)counters.bypassed_bytes);
-
-        uint32_t len = 0;
-        FQLOCK_LOCK(&flow_spare_q);
-        len = flow_spare_q.len;
-        FQLOCK_UNLOCK(&flow_spare_q);
-        StatsSetUI64(th_v, ftd->flow_mgr_spare, (uint64_t)len);
-
-        /* Don't fear, FlowManagerThread is here...
-         * clear emergency bit if we have at least xx flows pruned. */
-        if (emerg == TRUE) {
-            SCLogDebug("flow_sparse_q.len = %"PRIu32" prealloc: %"PRIu32
-                       "flow_spare_q status: %"PRIu32"%% flows at the queue",
-                       len, flow_config.prealloc, len * 100 / flow_config.prealloc);
+        if (ts_ms >= next_run_ms) {
+            if (ftd->instance == 0) {
+                const uint32_t sq_len = FlowSpareGetPoolSize();
+                const uint32_t spare_perc = sq_len * 100 / flow_config.prealloc;
+                /* see if we still have enough spare flows */
+                if (spare_perc < 90) {
+                    FlowSparePoolUpdate(sq_len);
+                }
+            }
+            const uint32_t secs_passed = rt - flow_last_sec;
+
+            /* try to time out flows */
+            FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+
+            if (emerg) {
+                /* in emergency mode, do a full pass of the hash table */
+                FlowTimeoutHash(&ftd->timeout, &ts, ftd->min, ftd->max, &counters);
+                hash_passes++;
+                hash_full_passes++;
+                hash_passes_chunks += 1;
+                hash_passes++;
+                hash_row_checks += counters.rows_checked;
+                StatsIncr(th_v, ftd->cnt.flow_mgr_full_pass);
+            } else {
+                /* non-emergency mode: scan part of the hash */
+                const uint32_t chunks = MIN(secs_passed, pass_in_sec);
+                for (uint32_t i = 0; i < chunks; i++) {
+                    FlowTimeoutHashInChunks(&ftd->timeout, &ts, ftd->min, ftd->max,
+                            &counters, hash_pass_iter, pass_in_sec);
+                    hash_pass_iter++;
+                    if (hash_pass_iter == pass_in_sec) {
+                        hash_pass_iter = 0;
+                        hash_full_passes++;
+                        StatsIncr(th_v, ftd->cnt.flow_mgr_full_pass);
+                    }
+                }
+                hash_passes++;
+                hash_row_checks += counters.rows_checked;
+                hash_passes_chunks += chunks;
+            }
+            flow_last_sec = rt;
+
+            /*
+               StatsAddUI64(th_v, flow_mgr_host_prune, (uint64_t)hosts_pruned);
+               uint32_t hosts_active = HostGetActiveCount();
+               StatsSetUI64(th_v, flow_mgr_host_active, (uint64_t)hosts_active);
+               uint32_t hosts_spare = HostGetSpareCount();
+               StatsSetUI64(th_v, flow_mgr_host_spare, (uint64_t)hosts_spare);
+             */
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_cnt_clo, (uint64_t)counters.clo);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_cnt_new, (uint64_t)counters.new);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_cnt_est, (uint64_t)counters.est);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_cnt_byp, (uint64_t)counters.byp);
+
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_checked, (uint64_t)counters.flows_checked);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_notimeout, (uint64_t)counters.flows_notimeout);
+
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_timeout, (uint64_t)counters.flows_timeout);
+            //StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_removed, (uint64_t)counters.flows_removed);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_timeout_inuse, (uint64_t)counters.flows_timeout_inuse);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_aside, (uint64_t)counters.flows_aside);
+            StatsAddUI64(th_v, ftd->cnt.flow_mgr_flows_aside_needs_work, (uint64_t)counters.flows_aside_needs_work);
+
+            StatsAddUI64(th_v, ftd->cnt.flow_bypassed_cnt_clo, (uint64_t)counters.bypassed_count);
+            StatsAddUI64(th_v, ftd->cnt.flow_bypassed_pkts, (uint64_t)counters.bypassed_pkts);
+            StatsAddUI64(th_v, ftd->cnt.flow_bypassed_bytes, (uint64_t)counters.bypassed_bytes);
+
+            StatsSetUI64(th_v, ftd->cnt.flow_mgr_rows_maxlen, (uint64_t)counters.rows_maxlen);
+            // TODO AVG MAXLEN
+            // TODO LOOKUP STEPS MAXLEN and AVG LEN
+            /* Don't fear, FlowManagerThread is here...
+             * clear emergency bit if we have at least xx flows pruned. */
+            uint32_t len = FlowSpareGetPoolSize();
+            StatsSetUI64(th_v, ftd->cnt.flow_mgr_spare, (uint64_t)len);
+            if (emerg == true) {
+                SCLogDebug("flow_sparse_q.len = %"PRIu32" prealloc: %"PRIu32
+                        "flow_spare_q status: %"PRIu32"%% flows at the queue",
+                        len, flow_config.prealloc, len * 100 / flow_config.prealloc);
+
             /* only if we have pruned this "emergency_recovery" percentage
              * of flows, we will unset the emergency bit */
             if (len * 100 / flow_config.prealloc > flow_config.emergency_recovery) {
-                SC_ATOMIC_AND(flow_flags, ~FLOW_EMERGENCY);
+                emerg_over_cnt++;
+            } else {
+                emerg_over_cnt = 0;
+            }
 
+            if (emerg_over_cnt >= 30) {
+                SC_ATOMIC_AND(flow_flags, ~FLOW_EMERGENCY);
                 FlowTimeoutsReset();
 
-                emerg = FALSE;
+                emerg = false;
                 prev_emerg = FALSE;
-
-                flow_update_delay_sec = FLOW_NORMAL_MODE_UPDATE_DELAY_SEC;
-                flow_update_delay_nsec = FLOW_NORMAL_MODE_UPDATE_DELAY_NSEC;
-                SCLogInfo("Flow emergency mode over, back to normal... unsetting"
+                emerg_over_cnt = 0;
+                hash_pass_iter = 0;
+                SCLogNotice("Flow emergency mode over, back to normal... unsetting"
                           " FLOW_EMERGENCY bit (ts.tv_sec: %"PRIuMAX", "
                           "ts.tv_usec:%"PRIuMAX") flow_spare_q status(): %"PRIu32
                           "%% flows at the queue", (uintmax_t)ts.tv_sec,
                           (uintmax_t)ts.tv_usec, len * 100 / flow_config.prealloc);
 
-                StatsIncr(th_v, ftd->flow_emerg_mode_over);
-            } else {
-                flow_update_delay_sec = FLOW_EMERG_MODE_UPDATE_DELAY_SEC;
-                flow_update_delay_nsec = FLOW_EMERG_MODE_UPDATE_DELAY_NSEC;
+                StatsIncr(th_v, ftd->cnt.flow_emerg_mode_over);
             }
         }
+        next_run_ms = ts_ms + 667;
+        if (emerg)
+            next_run_ms = ts_ms + 250;
+        }
+        if (flow_last_sec == 0) {
+            flow_last_sec = rt;
+        }
+
+        if (ftd->instance == 0 &&
+                (other_last_sec == 0 || other_last_sec < (uint32_t)ts.tv_sec)) {
+            DefragTimeoutHash(&ts);
+            //uint32_t hosts_pruned =
+            HostTimeoutHash(&ts);
+            IPPairTimeoutHash(&ts);
+            other_last_sec = (uint32_t)ts.tv_sec;
+        }
+
+
+#ifdef FM_PROFILE
+        struct timeval run_endts;
+        memset(&run_endts, 0, sizeof(run_endts));
+        gettimeofday(&run_endts, NULL);
+        struct timeval run_time;
+        memset(&run_time, 0, sizeof(run_time));
+        timersub(&run_endts, &run_startts, &run_time);
+        timeradd(&active, &run_time, &active);
+#endif
 
         if (TmThreadsCheckFlag(th_v, THV_KILL)) {
             StatsSyncCounters(th_v);
             break;
         }
 
-        cond_time.tv_sec = time(NULL) + flow_update_delay_sec;
-        cond_time.tv_nsec = flow_update_delay_nsec;
-        SCCtrlMutexLock(&flow_manager_ctrl_mutex);
-        SCCtrlCondTimedwait(&flow_manager_ctrl_cond, &flow_manager_ctrl_mutex,
-                            &cond_time);
-        SCCtrlMutexUnlock(&flow_manager_ctrl_mutex);
+#ifdef FM_PROFILE
+        struct timeval sleep_startts;
+        memset(&sleep_startts, 0, sizeof(sleep_startts));
+        gettimeofday(&sleep_startts, NULL);
+#endif
+        usleep(100);
+
+#ifdef FM_PROFILE
+        struct timeval sleep_endts;
+        memset(&sleep_endts, 0, sizeof(sleep_endts));
+        gettimeofday(&sleep_endts, NULL);
 
+        struct timeval sleep_time;
+        memset(&sleep_time, 0, sizeof(sleep_time));
+        timersub(&sleep_endts, &sleep_startts, &sleep_time);
+        timeradd(&sleeping, &sleep_time, &sleeping);
+#endif
         SCLogDebug("woke up... %s", SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY ? "emergency":"");
 
         StatsSyncCountersIfSignalled(th_v);
     }
-
     SCLogPerf("%" PRIu32 " new flows, %" PRIu32 " established flows were "
               "timed out, %"PRIu32" flows in closed state", new_cnt,
               established_cnt, closing_cnt);
 
+#ifdef FM_PROFILE
+    SCLogNotice("hash passes %u avg chunks %u full %u rows %u (rows/s %u)",
+            hash_passes, hash_passes_chunks / (hash_passes ? hash_passes : 1),
+            hash_full_passes, hash_row_checks,
+            hash_row_checks / ((uint32_t)active.tv_sec?(uint32_t)active.tv_sec:1));
+
+    gettimeofday(&endts, NULL);
+    struct timeval total_run_time;
+    timersub(&endts, &startts, &total_run_time);
+
+    SCLogNotice("FM: active %u.%us out of %u.%us; sleeping %u.%us, paused %u.%us",
+            (uint32_t)active.tv_sec, (uint32_t)active.tv_usec,
+            (uint32_t)total_run_time.tv_sec, (uint32_t)total_run_time.tv_usec,
+            (uint32_t)sleeping.tv_sec, (uint32_t)sleeping.tv_usec,
+            (uint32_t)paused.tv_sec, (uint32_t)paused.tv_usec);
+#endif
     return TM_ECODE_OK;
 }
 
@@ -853,9 +1061,6 @@ void FlowManagerThreadSpawn()
     flowmgr_number = (uint32_t)setting;
 
     SCLogConfig("using %u flow manager threads", flowmgr_number);
-    SCCtrlCondInit(&flow_manager_ctrl_cond, NULL);
-    SCCtrlMutexInit(&flow_manager_ctrl_mutex, NULL);
-
     StatsRegisterGlobalCounter("flow.memuse", FlowGetMemuse);
 
     for (uint32_t u = 0; u < flowmgr_number; u++) {
@@ -885,7 +1090,6 @@ static TmEcode FlowRecyclerThreadInit(ThreadVars *t, const void *initdata, void
     FlowRecyclerThreadData *ftd = SCCalloc(1, sizeof(FlowRecyclerThreadData));
     if (ftd == NULL)
         return TM_ECODE_FAILED;
-
     if (OutputFlowLogThreadInit(t, NULL, &ftd->output_thread_data) != TM_ECODE_OK) {
         SCLogError(SC_ERR_THREAD_INIT, "initializing flow log API for thread failed");
         SCFree(ftd);
@@ -914,78 +1118,134 @@ static TmEcode FlowRecyclerThreadDeinit(ThreadVars *t, void *data)
 static TmEcode FlowRecycler(ThreadVars *th_v, void *thread_data)
 {
     struct timeval ts;
-    struct timespec cond_time;
-    int flow_update_delay_sec = FLOW_NORMAL_MODE_UPDATE_DELAY_SEC;
-    int flow_update_delay_nsec = FLOW_NORMAL_MODE_UPDATE_DELAY_NSEC;
     uint64_t recycled_cnt = 0;
     FlowRecyclerThreadData *ftd = (FlowRecyclerThreadData *)thread_data;
     BUG_ON(ftd == NULL);
 
     memset(&ts, 0, sizeof(ts));
+    uint32_t fr_passes = 0;
+
+#ifdef FM_PROFILE
+    struct timeval endts;
+    struct timeval active;
+    struct timeval paused;
+    struct timeval sleeping;
+    memset(&endts, 0, sizeof(endts));
+    memset(&active, 0, sizeof(active));
+    memset(&paused, 0, sizeof(paused));
+    memset(&sleeping, 0, sizeof(sleeping));
+#endif
+    struct timeval startts;
+    memset(&startts, 0, sizeof(startts));
+    gettimeofday(&startts, NULL);
 
     while (1)
     {
         if (TmThreadsCheckFlag(th_v, THV_PAUSE)) {
             TmThreadsSetFlag(th_v, THV_PAUSED);
+#ifdef FM_PROFILE
+            struct timeval pause_startts;
+            memset(&pause_startts, 0, sizeof(pause_startts));
+            gettimeofday(&pause_startts, NULL);
+#endif
             TmThreadTestThreadUnPaused(th_v);
+
+#ifdef FM_PROFILE
+            struct timeval pause_endts;
+            memset(&pause_endts, 0, sizeof(pause_endts));
+            gettimeofday(&pause_endts, NULL);
+
+            struct timeval pause_time;
+            memset(&pause_time, 0, sizeof(pause_time));
+            timersub(&pause_endts, &pause_startts, &pause_time);
+            timeradd(&paused, &pause_time, &paused);
+#endif
             TmThreadsUnsetFlag(th_v, THV_PAUSED);
         }
+        fr_passes++;
+#ifdef FM_PROFILE
+        struct timeval run_startts;
+        memset(&run_startts, 0, sizeof(run_startts));
+        gettimeofday(&run_startts, NULL);
+#endif
+        SC_ATOMIC_ADD(flowrec_busy,1);
+        FlowQueuePrivate list = FlowQueueExtractPrivate(&flow_recycle_q);
+
+        const int bail = (TmThreadsCheckFlag(th_v, THV_KILL));
 
         /* Get the time */
         memset(&ts, 0, sizeof(ts));
         TimeGet(&ts);
         SCLogDebug("ts %" PRIdMAX "", (intmax_t)ts.tv_sec);
 
-        uint32_t len = 0;
-        FQLOCK_LOCK(&flow_recycle_q);
-        len = flow_recycle_q.len;
-        FQLOCK_UNLOCK(&flow_recycle_q);
-
-        /* Loop through the queue and clean up all flows in it */
-        if (len) {
-            Flow *f;
-
-            while ((f = FlowDequeue(&flow_recycle_q)) != NULL) {
-                FLOWLOCK_WRLOCK(f);
-
-                (void)OutputFlowLog(th_v, ftd->output_thread_data, f);
-
-                FlowClearMemory (f, f->protomap);
-                FLOWLOCK_UNLOCK(f);
-                FlowMoveToSpare(f);
-                recycled_cnt++;
-            }
+        Flow *f;
+        while ((f = FlowQueuePrivateGetFromTop(&list)) != NULL) {
+            Recycler(th_v, ftd->output_thread_data, f);
+            recycled_cnt++;
         }
+        SC_ATOMIC_SUB(flowrec_busy,1);
 
-        SCLogDebug("%u flows to recycle", len);
+#ifdef FM_PROFILE
+        struct timeval run_endts;
+        memset(&run_endts, 0, sizeof(run_endts));
+        gettimeofday(&run_endts, NULL);
 
-        if (TmThreadsCheckFlag(th_v, THV_KILL)) {
-            StatsSyncCounters(th_v);
+        struct timeval run_time;
+        memset(&run_time, 0, sizeof(run_time));
+        timersub(&run_endts, &run_startts, &run_time);
+        timeradd(&active, &run_time, &active);
+#endif
+
+        if (bail) {
             break;
         }
 
-        cond_time.tv_sec = time(NULL) + flow_update_delay_sec;
-        cond_time.tv_nsec = flow_update_delay_nsec;
-        SCCtrlMutexLock(&flow_recycler_ctrl_mutex);
-        SCCtrlCondTimedwait(&flow_recycler_ctrl_cond,
-                &flow_recycler_ctrl_mutex, &cond_time);
-        SCCtrlMutexUnlock(&flow_recycler_ctrl_mutex);
+#ifdef FM_PROFILE
+        struct timeval sleep_startts;
+        memset(&sleep_startts, 0, sizeof(sleep_startts));
+        gettimeofday(&sleep_startts, NULL);
+#endif
+        usleep(100);
+#ifdef FM_PROFILE
+        struct timeval sleep_endts;
+        memset(&sleep_endts, 0, sizeof(sleep_endts));
+        gettimeofday(&sleep_endts, NULL);
+        struct timeval sleep_time;
+        memset(&sleep_time, 0, sizeof(sleep_time));
+        timersub(&sleep_endts, &sleep_startts, &sleep_time);
+        timeradd(&sleeping, &sleep_time, &sleeping);
+#endif
 
         SCLogDebug("woke up...");
 
         StatsSyncCountersIfSignalled(th_v);
     }
-
+    StatsSyncCounters(th_v);
+#ifdef FM_PROFILE
+    gettimeofday(&endts, NULL);
+    struct timeval total_run_time;
+    timersub(&endts, &startts, &total_run_time);
+    SCLogNotice("FR: active %u.%us out of %u.%us; sleeping %u.%us, paused %u.%us",
+            (uint32_t)active.tv_sec, (uint32_t)active.tv_usec,
+            (uint32_t)total_run_time.tv_sec, (uint32_t)total_run_time.tv_usec,
+            (uint32_t)sleeping.tv_sec, (uint32_t)sleeping.tv_usec,
+            (uint32_t)paused.tv_sec, (uint32_t)paused.tv_usec);
+
+    SCLogNotice("FR passes %u passes/s %u", fr_passes,
+            (uint32_t)fr_passes/((uint32_t)active.tv_sec?(uint32_t)active.tv_sec:1));
+#endif
     SCLogPerf("%"PRIu64" flows processed", recycled_cnt);
-
     return TM_ECODE_OK;
 }
 
-static int FlowRecyclerReadyToShutdown(void)
+static bool FlowRecyclerReadyToShutdown(void)
 {
+    if (SC_ATOMIC_GET(flowrec_busy) != 0) {
+        return false;
+    }
     uint32_t len = 0;
     FQLOCK_LOCK(&flow_recycle_q);
-    len = flow_recycle_q.len;
+    len = flow_recycle_q.qlen;
     FQLOCK_UNLOCK(&flow_recycle_q);
 
     return ((len == 0));
@@ -1005,9 +1265,6 @@ void FlowRecyclerThreadSpawn()
 
     SCLogConfig("using %u flow recycler threads", flowrec_number);
 
-    SCCtrlCondInit(&flow_recycler_ctrl_cond, NULL);
-    SCCtrlMutexInit(&flow_recycler_ctrl_mutex, NULL);
-
     for (uint32_t u = 0; u < flowrec_number; u++) {
         char name[TM_THREAD_NAME_MAX];
         snprintf(name, sizeof(name), "%s#%02u", thread_name_flow_rec, u+1);
@@ -1038,18 +1295,13 @@ void FlowDisableFlowRecyclerThread(void)
     int cnt = 0;
 
     /* move all flows still in the hash to the recycler queue */
-    FlowCleanupHash();
+    uint32_t flows = FlowCleanupHash();
+    SCLogNotice("flows to progress: %u", flows);
 
     /* make sure all flows are processed */
     do {
-        SCCtrlCondSignal(&flow_recycler_ctrl_cond);
         usleep(10);
-    } while (FlowRecyclerReadyToShutdown() == 0);
-
-    /* wake up threads */
-    for (uint32_t u = 0; u < flowrec_number; u++) {
-        SCCtrlCondSignal(&flow_recycler_ctrl_cond);
-    }
+    } while (FlowRecyclerReadyToShutdown() == false);
 
     SCMutexLock(&tv_root_lock);
     /* flow recycler thread(s) is/are a part of mgmt threads */
@@ -1119,290 +1371,5 @@ void TmModuleFlowRecyclerRegister (void)
     SCLogDebug("%s registered", tmm_modules[TMM_FLOWRECYCLER].name);
 
     SC_ATOMIC_INIT(flowrec_cnt);
-}
-
-#ifdef UNITTESTS
-
-/**
- *  \test   Test the timing out of a flow with a fresh TcpSession
- *          (just initialized, no data segments) in normal mode.
- *
- *  \retval On success it returns 1 and on failure 0.
- */
-
-static int FlowMgrTest01 (void)
-{
-    TcpSession ssn;
-    Flow f;
-    FlowBucket fb;
-    struct timeval ts;
-
-    FlowQueueInit(&flow_spare_q);
-
-    memset(&ssn, 0, sizeof(TcpSession));
-    memset(&f, 0, sizeof(Flow));
-    memset(&ts, 0, sizeof(ts));
-    memset(&fb, 0, sizeof(FlowBucket));
-
-    FBLOCK_INIT(&fb);
-
-    FLOW_INITIALIZE(&f);
-    f.flags |= FLOW_TIMEOUT_REASSEMBLY_DONE;
-
-    TimeGet(&ts);
-    f.lastts.tv_sec = ts.tv_sec - 5000;
-    f.protoctx = &ssn;
-    f.fb = &fb;
-
-    f.proto = IPPROTO_TCP;
-
-    int32_t next_ts = 0;
-    int state = SC_ATOMIC_GET(f.flow_state);
-    FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-    if (FlowManagerFlowTimeout(&f, state, &ts, &next_ts) != 1 && FlowManagerFlowTimedOut(&f, &ts, &counters) != 1) {
-        FBLOCK_DESTROY(&fb);
-        FLOW_DESTROY(&f);
-        FlowQueueDestroy(&flow_spare_q);
-        return 0;
-    }
-
-    FBLOCK_DESTROY(&fb);
-    FLOW_DESTROY(&f);
-
-    FlowQueueDestroy(&flow_spare_q);
-    return 1;
-}
-
-/**
- *  \test   Test the timing out of a flow with a TcpSession
- *          (with data segments) in normal mode.
- *
- *  \retval On success it returns 1 and on failure 0.
- */
-
-static int FlowMgrTest02 (void)
-{
-    TcpSession ssn;
-    Flow f;
-    FlowBucket fb;
-    struct timeval ts;
-    TcpSegment seg;
-    TcpStream client;
-
-    FlowQueueInit(&flow_spare_q);
-
-    memset(&ssn, 0, sizeof(TcpSession));
-    memset(&f, 0, sizeof(Flow));
-    memset(&fb, 0, sizeof(FlowBucket));
-    memset(&ts, 0, sizeof(ts));
-    memset(&seg, 0, sizeof(TcpSegment));
-    memset(&client, 0, sizeof(TcpStream));
-
-    FBLOCK_INIT(&fb);
-    FLOW_INITIALIZE(&f);
-    f.flags |= FLOW_TIMEOUT_REASSEMBLY_DONE;
-
-    TimeGet(&ts);
-    TCP_SEG_LEN(&seg) = 3;
-    TCPSEG_RB_INSERT(&client.seg_tree, &seg);
-    ssn.client = client;
-    ssn.server = client;
-    ssn.state = TCP_ESTABLISHED;
-    f.lastts.tv_sec = ts.tv_sec - 5000;
-    f.protoctx = &ssn;
-    f.fb = &fb;
-    f.proto = IPPROTO_TCP;
-
-    int32_t next_ts = 0;
-    int state = SC_ATOMIC_GET(f.flow_state);
-    FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-    if (FlowManagerFlowTimeout(&f, state, &ts, &next_ts) != 1 && FlowManagerFlowTimedOut(&f, &ts, &counters) != 1) {
-        FBLOCK_DESTROY(&fb);
-        FLOW_DESTROY(&f);
-        FlowQueueDestroy(&flow_spare_q);
-        return 0;
-    }
-    FBLOCK_DESTROY(&fb);
-    FLOW_DESTROY(&f);
-    FlowQueueDestroy(&flow_spare_q);
-    return 1;
-
-}
-
-/**
- *  \test   Test the timing out of a flow with a fresh TcpSession
- *          (just initialized, no data segments) in emergency mode.
- *
- *  \retval On success it returns 1 and on failure 0.
- */
-
-static int FlowMgrTest03 (void)
-{
-    TcpSession ssn;
-    Flow f;
-    FlowBucket fb;
-    struct timeval ts;
-
-    FlowQueueInit(&flow_spare_q);
-
-    memset(&ssn, 0, sizeof(TcpSession));
-    memset(&f, 0, sizeof(Flow));
-    memset(&ts, 0, sizeof(ts));
-    memset(&fb, 0, sizeof(FlowBucket));
-
-    FBLOCK_INIT(&fb);
-    FLOW_INITIALIZE(&f);
-    f.flags |= FLOW_TIMEOUT_REASSEMBLY_DONE;
-
-    TimeGet(&ts);
-    ssn.state = TCP_SYN_SENT;
-    f.lastts.tv_sec = ts.tv_sec - 300;
-    f.protoctx = &ssn;
-    f.fb = &fb;
-    f.proto = IPPROTO_TCP;
-    f.flags |= FLOW_EMERGENCY;
-
-    int next_ts = 0;
-    int state = SC_ATOMIC_GET(f.flow_state);
-    FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-    if (FlowManagerFlowTimeout(&f, state, &ts, &next_ts) != 1 && FlowManagerFlowTimedOut(&f, &ts, &counters) != 1) {
-        FBLOCK_DESTROY(&fb);
-        FLOW_DESTROY(&f);
-        FlowQueueDestroy(&flow_spare_q);
-        return 0;
-    }
-
-    FBLOCK_DESTROY(&fb);
-    FLOW_DESTROY(&f);
-    FlowQueueDestroy(&flow_spare_q);
-    return 1;
-}
-
-/**
- *  \test   Test the timing out of a flow with a TcpSession
- *          (with data segments) in emergency mode.
- *
- *  \retval On success it returns 1 and on failure 0.
- */
-
-static int FlowMgrTest04 (void)
-{
-
-    TcpSession ssn;
-    Flow f;
-    FlowBucket fb;
-    struct timeval ts;
-    TcpSegment seg;
-    TcpStream client;
-
-    FlowQueueInit(&flow_spare_q);
-
-    memset(&ssn, 0, sizeof(TcpSession));
-    memset(&f, 0, sizeof(Flow));
-    memset(&fb, 0, sizeof(FlowBucket));
-    memset(&ts, 0, sizeof(ts));
-    memset(&seg, 0, sizeof(TcpSegment));
-    memset(&client, 0, sizeof(TcpStream));
-
-    FBLOCK_INIT(&fb);
-    FLOW_INITIALIZE(&f);
-    f.flags |= FLOW_TIMEOUT_REASSEMBLY_DONE;
-
-    TimeGet(&ts);
-    TCP_SEG_LEN(&seg) = 3;
-    TCPSEG_RB_INSERT(&client.seg_tree, &seg);
-    ssn.client = client;
-    ssn.server = client;
-    ssn.state = TCP_ESTABLISHED;
-    f.lastts.tv_sec = ts.tv_sec - 5000;
-    f.protoctx = &ssn;
-    f.fb = &fb;
-    f.proto = IPPROTO_TCP;
-    f.flags |= FLOW_EMERGENCY;
-
-    int next_ts = 0;
-    int state = SC_ATOMIC_GET(f.flow_state);
-    FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-    if (FlowManagerFlowTimeout(&f, state, &ts, &next_ts) != 1 && FlowManagerFlowTimedOut(&f, &ts, &counters) != 1) {
-        FBLOCK_DESTROY(&fb);
-        FLOW_DESTROY(&f);
-        FlowQueueDestroy(&flow_spare_q);
-        return 0;
-    }
-
-    FBLOCK_DESTROY(&fb);
-    FLOW_DESTROY(&f);
-    FlowQueueDestroy(&flow_spare_q);
-    return 1;
-}
-
-/**
- *  \test   Test flow allocations when it reach memcap
- *
- *
- *  \retval On success it returns 1 and on failure 0.
- */
-
-static int FlowMgrTest05 (void)
-{
-    int result = 0;
-
-    FlowInitConfig(FLOW_QUIET);
-    FlowConfig backup;
-    memcpy(&backup, &flow_config, sizeof(FlowConfig));
-
-    uint32_t ini = 0;
-    uint32_t end = flow_spare_q.len;
-    SC_ATOMIC_SET(flow_config.memcap, 10000);
-    flow_config.prealloc = 100;
-
-    /* Let's get the flow_spare_q empty */
-    UTHBuildPacketOfFlows(ini, end, 0);
-
-    /* And now let's try to reach the memcap val */
-    while (FLOW_CHECK_MEMCAP(sizeof(Flow))) {
-        ini = end + 1;
-        end = end + 2;
-        UTHBuildPacketOfFlows(ini, end, 0);
-    }
-
-    /* should time out normal */
-    TimeSetIncrementTime(2000);
-    ini = end + 1;
-    end = end + 2;;
-    UTHBuildPacketOfFlows(ini, end, 0);
-
-    struct timeval ts;
-    TimeGet(&ts);
-    /* try to time out flows */
-    FlowTimeoutCounters counters = { 0, 0, 0, 0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0};
-    FlowTimeoutHash(&ts, 0 /* check all */, 0, flow_config.hash_size, &counters);
-
-    if (flow_recycle_q.len > 0) {
-        result = 1;
-    }
-
-    memcpy(&flow_config, &backup, sizeof(FlowConfig));
-    FlowShutdown();
-    return result;
-}
-#endif /* UNITTESTS */
-
-/**
- *  \brief   Function to register the Flow Unitests.
- */
-void FlowMgrRegisterTests (void)
-{
-#ifdef UNITTESTS
-    UtRegisterTest("FlowMgrTest01 -- Timeout a flow having fresh TcpSession",
-                   FlowMgrTest01);
-    UtRegisterTest("FlowMgrTest02 -- Timeout a flow having TcpSession with segments",
-                   FlowMgrTest02);
-    UtRegisterTest("FlowMgrTest03 -- Timeout a flow in emergency having fresh TcpSession",
-                   FlowMgrTest03);
-    UtRegisterTest("FlowMgrTest04 -- Timeout a flow in emergency having TcpSession with segments",
-                   FlowMgrTest04);
-    UtRegisterTest("FlowMgrTest05 -- Test flow Allocations when it reach memcap",
-                   FlowMgrTest05);
-#endif /* UNITTESTS */
+    SC_ATOMIC_INIT(flowrec_busy);
 }
index 8ce882c59126fe1b9f7cb98574babf3fcdff6e2e..a157215d92e33d32f88998f9398bc6923d7e66e1 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2013 Open Information Security Foundation
+/* Copyright (C) 2007-2020 Open Information Security Foundation
  *
  * You can copy, redistribute or modify this Program under the terms of
  * the GNU General Public License version 2 as published by the Free
 #define FlowTimeoutsReset() FlowTimeoutsInit()
 void FlowTimeoutsInit(void);
 void FlowTimeoutsEmergency(void);
-
-/** flow manager scheduling condition */
-extern SCCtrlCondT flow_manager_ctrl_cond;
-extern SCCtrlMutex flow_manager_ctrl_mutex;
-#define FlowWakeupFlowManagerThread() SCCtrlCondSignal(&flow_manager_ctrl_cond)
-
 void FlowManagerThreadSpawn(void);
 void FlowDisableFlowManagerThread(void);
-void FlowMgrRegisterTests (void);
-
-/** flow recycler scheduling condition */
-extern SCCtrlCondT flow_recycler_ctrl_cond;
-extern SCCtrlMutex flow_recycler_ctrl_mutex;
-#define FlowWakeupFlowRecyclerThread() \
-    SCCtrlCondSignal(&flow_recycler_ctrl_cond)
-
 void FlowRecyclerThreadSpawn(void);
 void FlowDisableFlowRecyclerThread(void);
-
 void TmModuleFlowManagerRegister (void);
 void TmModuleFlowRecyclerRegister (void);
 
index dbdf46624e49b72dc68291c15b59c702b78fa03e..2357156c481b017f273bbea1d09e897bb78da453 100644 (file)
@@ -89,7 +89,7 @@ extern FlowProtoTimeout flow_timeouts_emerg[FLOW_PROTO_MAX];
 extern FlowProtoFreeFunc flow_freefuncs[FLOW_PROTO_MAX];
 
 /** spare/unused/prealloced flows live here */
-extern FlowQueue flow_spare_q;
+//extern FlowQueue flow_spare_q;
 
 /** queue to pass flows to cleanup/log thread(s) */
 extern FlowQueue flow_recycle_q;
@@ -101,7 +101,35 @@ extern FlowConfig flow_config;
 SC_ATOMIC_EXTERN(uint64_t, flow_memuse);
 
 typedef FlowProtoTimeout *FlowProtoTimeoutPtr;
-SC_ATOMIC_DECLARE(FlowProtoTimeoutPtr, flow_timeouts);
+SC_ATOMIC_EXTERN(FlowProtoTimeoutPtr, flow_timeouts);
+
+static inline uint32_t FlowGetFlowTimeoutDirect(
+        const FlowProtoTimeoutPtr flow_timeouts,
+        const enum FlowState state, const uint8_t protomap)
+{
+    uint32_t timeout;
+    switch (state) {
+        default:
+        case FLOW_STATE_NEW:
+            timeout = flow_timeouts[protomap].new_timeout;
+            break;
+        case FLOW_STATE_ESTABLISHED:
+            timeout = flow_timeouts[protomap].est_timeout;
+            break;
+        case FLOW_STATE_CLOSED:
+            timeout = flow_timeouts[protomap].closed_timeout;
+            break;
+#ifdef CAPTURE_OFFLOAD
+        case FLOW_STATE_CAPTURE_BYPASSED:
+            timeout = FLOW_BYPASSED_TIMEOUT;
+            break;
+#endif
+        case FLOW_STATE_LOCAL_BYPASSED:
+            timeout = flow_timeouts[protomap].bypassed_timeout;
+            break;
+    }
+    return timeout;
+}
 
 /** \internal
  *  \brief get timeout for flow
@@ -113,9 +141,24 @@ SC_ATOMIC_DECLARE(FlowProtoTimeoutPtr, flow_timeouts);
  */
 static inline uint32_t FlowGetFlowTimeout(const Flow *f, enum FlowState state)
 {
-    uint32_t timeout;
     FlowProtoTimeoutPtr flow_timeouts = SC_ATOMIC_GET(flow_timeouts);
-    switch(state) {
+    return FlowGetFlowTimeoutDirect(flow_timeouts, state, f->protomap);
+}
+
+/** \internal
+ *  \brief get timeout policy for flow
+ *  \note does not take emergency mode into account. Always
+ *        returns the 'normal' policy.
+ *
+ *  \param f flow
+ *
+ *  \retval timeout timeout in seconds
+ */
+static inline uint32_t FlowGetTimeoutPolicy(const Flow *f)
+{
+    uint32_t timeout;
+    FlowProtoTimeoutPtr flow_timeouts = flow_timeouts_normal;
+    switch (f->flow_state) {
         default:
         case FLOW_STATE_NEW:
             timeout = flow_timeouts[f->protomap].new_timeout;
@@ -137,7 +180,4 @@ static inline uint32_t FlowGetFlowTimeout(const Flow *f, enum FlowState state)
     }
     return timeout;
 }
-
-
 #endif /* __FLOW_PRIVATE_H__ */
-
index b6a1138ddc724c975e4a7535c782afd101a39f67..530cab3db78138ce9bb94299a566689cd4a879f7 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2013 Open Information Security Foundation
+/* Copyright (C) 2007-2020 Open Information Security Foundation
  *
  * You can copy, redistribute or modify this Program under the terms of
  * the GNU General Public License version 2 as published by the Free
@@ -63,6 +63,108 @@ void FlowQueueDestroy (FlowQueue *q)
     FQLOCK_DESTROY(q);
 }
 
+void FlowQueuePrivateAppendFlow(FlowQueuePrivate *fqc, Flow *f)
+{
+    if (fqc->top == NULL) {
+        fqc->top = fqc->bot = f;
+        fqc->len = 1;
+    } else {
+        fqc->bot->next = f;
+        fqc->bot = f;
+        fqc->len++;
+    }
+    f->next = NULL;
+}
+
+void FlowQueuePrivatePrependFlow(FlowQueuePrivate *fqc, Flow *f)
+{
+    f->next = fqc->top;
+    fqc->top = f;
+    if (f->next == NULL) {
+        fqc->bot = f;
+    }
+    fqc->len++;
+}
+
+void FlowQueuePrivateAppendPrivate(FlowQueuePrivate *dest, FlowQueuePrivate *src)
+{
+    if (src->top == NULL)
+        return;
+
+    if (dest->bot == NULL) {
+        dest->top = src->top;
+        dest->bot = src->bot;
+        dest->len = src->len;
+    } else {
+        dest->bot->next = src->top;
+        dest->bot = src->bot;
+        dest->len += src->len;
+    }
+    src->top = src->bot = NULL;
+    src->len = 0;
+}
+
+static inline void FlowQueueAtomicSetNonEmpty(FlowQueue *fq)
+{
+    if (SC_ATOMIC_GET(fq->non_empty) == false) {
+        SC_ATOMIC_SET(fq->non_empty, true);
+    }
+}
+static inline void FlowQueueAtomicSetEmpty(FlowQueue *fq)
+{
+    if (SC_ATOMIC_GET(fq->non_empty) == true) {
+        SC_ATOMIC_SET(fq->non_empty, false);
+    }
+}
+
+void FlowQueueAppendPrivate(FlowQueue *fq, FlowQueuePrivate *fqc)
+{
+    if (fqc->top == NULL)
+        return;
+
+    FQLOCK_LOCK(fq);
+    if (fq->qbot == NULL) {
+        fq->qtop = fqc->top;
+        fq->qbot = fqc->bot;
+        fq->qlen = fqc->len;
+    } else {
+        fq->qbot->next = fqc->top;
+        fq->qbot = fqc->bot;
+        fq->qlen += fqc->len;
+    }
+    FlowQueueAtomicSetNonEmpty(fq);
+    FQLOCK_UNLOCK(fq);
+    fqc->top = fqc->bot = NULL;
+    fqc->len = 0;
+}
+
+FlowQueuePrivate FlowQueueExtractPrivate(FlowQueue *fq)
+{
+    FQLOCK_LOCK(fq);
+    FlowQueuePrivate fqc = fq->priv;
+    fq->qtop = fq->qbot = NULL;
+    fq->qlen = 0;
+    FlowQueueAtomicSetEmpty(fq);
+    FQLOCK_UNLOCK(fq);
+    return fqc;
+}
+
+Flow *FlowQueuePrivateGetFromTop(FlowQueuePrivate *fqc)
+{
+    Flow *f = fqc->top;
+    if (f == NULL) {
+        return NULL;
+    }
+
+    fqc->top = f->next;
+    f->next = NULL;
+    fqc->len--;
+    if (fqc->top == NULL) {
+        fqc->bot = NULL;
+    }
+    return f;
+}
+
 /**
  *  \brief add a flow to a queue
  *
@@ -74,24 +176,9 @@ void FlowEnqueue (FlowQueue *q, Flow *f)
 #ifdef DEBUG
     BUG_ON(q == NULL || f == NULL);
 #endif
-
     FQLOCK_LOCK(q);
-
-    /* more flows in queue */
-    if (q->top != NULL) {
-        f->lnext = q->top;
-        q->top->lprev = f;
-        q->top = f;
-    /* only flow */
-    } else {
-        q->top = f;
-        q->bot = f;
-    }
-    q->len++;
-#ifdef DBG_PERF
-    if (q->len > q->dbg_maxlen)
-        q->dbg_maxlen = q->len;
-#endif /* DBG_PERF */
+    FlowQueuePrivateAppendFlow(&q->priv, f);
+    FlowQueueAtomicSetNonEmpty(q);
     FQLOCK_UNLOCK(q);
 }
 
@@ -105,64 +192,9 @@ void FlowEnqueue (FlowQueue *q, Flow *f)
 Flow *FlowDequeue (FlowQueue *q)
 {
     FQLOCK_LOCK(q);
-
-    Flow *f = q->bot;
-    if (f == NULL) {
-        FQLOCK_UNLOCK(q);
-        return NULL;
-    }
-
-    /* more packets in queue */
-    if (q->bot->lprev != NULL) {
-        q->bot = q->bot->lprev;
-        q->bot->lnext = NULL;
-    /* just the one we remove, so now empty */
-    } else {
-        q->top = NULL;
-        q->bot = NULL;
-    }
-
-#ifdef DEBUG
-    BUG_ON(q->len == 0);
-#endif
-    if (q->len > 0)
-        q->len--;
-
-    f->lnext = NULL;
-    f->lprev = NULL;
-
+    Flow *f = FlowQueuePrivateGetFromTop(&q->priv);
+    if (f == NULL)
+        FlowQueueAtomicSetEmpty(q);
     FQLOCK_UNLOCK(q);
     return f;
 }
-
-/**
- *  \brief Transfer a flow from a queue to the spare queue
- *
- *  \param f the flow to be transfered
- *  \param q the source queue, where the flow will be removed. This queue is locked.
- *
- *  \note spare queue needs locking
- */
-void FlowMoveToSpare(Flow *f)
-{
-    /* now put it in spare */
-    FQLOCK_LOCK(&flow_spare_q);
-
-    /* add to new queue (append) */
-    f->lprev = flow_spare_q.bot;
-    if (f->lprev != NULL)
-        f->lprev->lnext = f;
-    f->lnext = NULL;
-    flow_spare_q.bot = f;
-    if (flow_spare_q.top == NULL)
-        flow_spare_q.top = f;
-
-    flow_spare_q.len++;
-#ifdef DBG_PERF
-    if (flow_spare_q.len > flow_spare_q.dbg_maxlen)
-        flow_spare_q.dbg_maxlen = flow_spare_q.len;
-#endif /* DBG_PERF */
-
-    FQLOCK_UNLOCK(&flow_spare_q);
-}
-
index fd7a3b0ea8267fe04b94973f26419e9c9b1ce3c0..8eadf1cab74a31ff5fb30e22691cacf48915fddf 100644 (file)
     #endif
 #endif
 
-/* Define a queue for storing flows */
-typedef struct FlowQueue_
+typedef struct FlowQueuePrivate_
 {
     Flow *top;
     Flow *bot;
     uint32_t len;
-#ifdef DBG_PERF
-    uint32_t dbg_maxlen;
-#endif /* DBG_PERF */
+} FlowQueuePrivate;
+
+/* Define a queue for storing flows */
+typedef struct FlowQueue_
+{
+    FlowQueuePrivate priv;
+    SC_ATOMIC_DECLARE(bool,non_empty);
 #ifdef FQLOCK_MUTEX
     SCMutex m;
 #elif defined FQLOCK_SPIN
@@ -54,6 +57,9 @@ typedef struct FlowQueue_
     #error Enable FQLOCK_SPIN or FQLOCK_MUTEX
 #endif
 } FlowQueue;
+#define qtop priv.top
+#define qbot priv.bot
+#define qlen priv.len
 
 #ifdef FQLOCK_SPIN
     #define FQLOCK_INIT(q) SCSpinInit(&(q)->s, 0)
@@ -78,8 +84,16 @@ void FlowQueueDestroy (FlowQueue *);
 
 void FlowEnqueue (FlowQueue *, Flow *);
 Flow *FlowDequeue (FlowQueue *);
+void FlowQueueRemove(FlowQueue *fq, Flow *f);
+void FlowQueueRemoveLock(FlowQueue *fq, Flow *f);
+
+void FlowQueuePrivateAppendFlow(FlowQueuePrivate *fqc, Flow *f);
+void FlowQueuePrivatePrependFlow(FlowQueuePrivate *fqc, Flow *f);
 
-void FlowMoveToSpare(Flow *);
+void FlowQueueAppendPrivate(FlowQueue *fq, FlowQueuePrivate *fqp);
+void FlowQueuePrivateAppendPrivate(FlowQueuePrivate *dest, FlowQueuePrivate *src);
+FlowQueuePrivate FlowQueueExtractPrivate(FlowQueue *fq);
+Flow *FlowQueuePrivateGetFromTop(FlowQueuePrivate *fqp);
 
 #endif /* __FLOW_QUEUE_H__ */
 
diff --git a/src/flow-spare-pool.c b/src/flow-spare-pool.c
new file mode 100644 (file)
index 0000000..4f72706
--- /dev/null
@@ -0,0 +1,260 @@
+/* Copyright (C) 2007-2020 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Victor Julien <victor@inliniac.net>
+ *
+ * Flow queue handler functions
+ */
+
+#include "suricata-common.h"
+#include "threads.h"
+#include "debug.h"
+#include "flow-private.h"
+#include "flow-queue.h"
+#include "flow-util.h"
+#include "flow-spare-pool.h"
+#include "util-error.h"
+#include "util-debug.h"
+#include "util-print.h"
+#include "util-validate.h"
+
+typedef struct FlowSparePool {
+    FlowQueuePrivate queue;
+    struct FlowSparePool *next;
+} FlowSparePool;
+
+static uint32_t flow_spare_pool_flow_cnt = 0;
+static uint32_t flow_spare_pool_block_size = 100;
+static FlowSparePool *flow_spare_pool = NULL;
+static SCMutex flow_spare_pool_m = SCMUTEX_INITIALIZER;
+
+uint32_t FlowSpareGetPoolSize(void)
+{
+    uint32_t size;
+    SCMutexLock(&flow_spare_pool_m);
+    size = flow_spare_pool_flow_cnt;
+    SCMutexUnlock(&flow_spare_pool_m);
+    return size;
+}
+
+static FlowSparePool *FlowSpareGetPool(void)
+{
+    FlowSparePool *p = SCCalloc(1, sizeof(*p));
+    if (p == NULL)
+        return NULL;
+    return p;
+}
+
+static bool FlowSparePoolUpdateBlock(FlowSparePool *p)
+{
+    DEBUG_VALIDATE_BUG_ON(p == NULL);
+
+    for (uint32_t i = p->queue.len; i < flow_spare_pool_block_size; i++)
+    {
+        Flow *f = FlowAlloc();
+        if (f == NULL)
+            return false;
+        FlowQueuePrivateAppendFlow(&p->queue, f);
+    }
+    return true;
+}
+
+#ifdef FSP_VALIDATE
+static void Validate(FlowSparePool *top, const uint32_t target)
+{
+    if (top == NULL) {
+        assert(target == 0);
+        return;
+    }
+
+    assert(top->queue.len >= 1);
+    //if (top->next != NULL)
+    //    assert(top->next->queue.len == flow_spare_pool_block_size);
+
+    uint32_t cnt = 0;
+    for (FlowSparePool *p = top; p != NULL; p = p->next)
+    {
+        assert(p->queue.len);
+        cnt += p->queue.len;
+    }
+    assert(cnt == target);
+}
+#endif
+
+void FlowSparePoolReturnFlow(Flow *f)
+{
+    SCMutexLock(&flow_spare_pool_m);
+    if (flow_spare_pool == NULL) {
+        flow_spare_pool = FlowSpareGetPool();
+    }
+    DEBUG_VALIDATE_BUG_ON(flow_spare_pool == NULL);
+
+    /* if the top is full, get a new block */
+    if (flow_spare_pool->queue.len >= flow_spare_pool_block_size) {
+        FlowSparePool *p = FlowSpareGetPool();
+        DEBUG_VALIDATE_BUG_ON(p == NULL);
+        p->next = flow_spare_pool;
+        flow_spare_pool = p;
+    }
+    /* add to the (possibly new) top */
+    FlowQueuePrivateAppendFlow(&flow_spare_pool->queue, f);
+    flow_spare_pool_flow_cnt++;
+
+    SCMutexUnlock(&flow_spare_pool_m);
+}
+
+void FlowSparePoolReturnFlows(FlowQueuePrivate *fqp)
+{
+
+}
+
+FlowQueuePrivate FlowSpareGetFromPool(void)
+{
+    SCMutexLock(&flow_spare_pool_m);
+    if (flow_spare_pool == NULL || flow_spare_pool_flow_cnt == 0) {
+        SCMutexUnlock(&flow_spare_pool_m);
+        FlowQueuePrivate empty = { NULL, NULL, 0 };
+        return empty;
+    }
+
+    /* top if full or its the only block we have */
+    if (flow_spare_pool->queue.len >= flow_spare_pool_block_size || flow_spare_pool->next == NULL) {
+        FlowSparePool *p = flow_spare_pool;
+        flow_spare_pool = p->next;
+        DEBUG_VALIDATE_BUG_ON(flow_spare_pool_flow_cnt < p->queue.len);
+        flow_spare_pool_flow_cnt -= p->queue.len;
+#ifdef FSP_VALIDATE
+        Validate(flow_spare_pool, flow_spare_pool_flow_cnt);
+#endif
+        SCMutexUnlock(&flow_spare_pool_m);
+
+        FlowQueuePrivate ret = p->queue;
+        SCFree(p);
+        return ret;
+    /* next should always be full if it exists */
+    } else if (flow_spare_pool->next != NULL) {
+        FlowSparePool *p = flow_spare_pool->next;
+        flow_spare_pool->next = p->next;
+        DEBUG_VALIDATE_BUG_ON(flow_spare_pool_flow_cnt < p->queue.len);
+        flow_spare_pool_flow_cnt -= p->queue.len;
+#ifdef FSP_VALIDATE
+        Validate(flow_spare_pool, flow_spare_pool_flow_cnt);
+#endif
+        SCMutexUnlock(&flow_spare_pool_m);
+
+        FlowQueuePrivate ret = p->queue;
+        SCFree(p);
+        return ret;
+    }
+
+    SCMutexUnlock(&flow_spare_pool_m);
+    FlowQueuePrivate empty = { NULL, NULL, 0 };
+    return empty;
+}
+
+void FlowSparePoolUpdate(uint32_t size)
+{
+    const int64_t todo = (int64_t)flow_config.prealloc - (int64_t)size;
+    if (todo < 0) {
+        // remove
+    } else if (todo > 0) {
+        FlowSparePool *head = NULL, *tail = NULL;
+
+        uint32_t blocks = ((uint32_t)todo / flow_spare_pool_block_size) + 1;
+
+        uint32_t flow_cnt = 0;
+        for (uint32_t cnt = 0; cnt < blocks; cnt++) {
+            FlowSparePool *p = FlowSpareGetPool();
+            if (p == NULL) {
+                break;
+            }
+            const bool ok = FlowSparePoolUpdateBlock(p);
+            if (p->queue.len == 0) {
+                SCFree(p);
+                break;
+            }
+            flow_cnt += p->queue.len;
+
+            /* prepend to list */
+            p->next = head;
+            head = p;
+            if (tail == NULL)
+                tail = p;
+            if (!ok)
+                break;
+        }
+        if (head) {
+            SCMutexLock(&flow_spare_pool_m);
+            if (flow_spare_pool == NULL) {
+                flow_spare_pool = head;
+            } else if (tail != NULL) {
+                /* since these are 'full' buckets we don't put them
+                 * at the top but right after as the top is likely not
+                 * full. */
+                tail->next = flow_spare_pool->next;
+                flow_spare_pool->next = head;
+            }
+
+            flow_spare_pool_flow_cnt += flow_cnt;
+#ifdef FSP_VALIDATE
+            Validate(flow_spare_pool, flow_spare_pool_flow_cnt);
+#endif
+            SCMutexUnlock(&flow_spare_pool_m);
+        }
+    }
+}
+
+void FlowSparePoolInit(void)
+{
+    SCMutexLock(&flow_spare_pool_m);
+    for (uint32_t cnt = 0; cnt < flow_config.prealloc; ) {
+        FlowSparePool *p = FlowSpareGetPool();
+        if (p == NULL) {
+            FatalError(SC_ERR_FLOW_INIT, "failed to initialize flow pool");
+        }
+        FlowSparePoolUpdateBlock(p);
+        cnt += p->queue.len;
+
+        /* prepend to list */
+        p->next = flow_spare_pool;
+        flow_spare_pool = p;
+        flow_spare_pool_flow_cnt = cnt;
+    }
+    SCMutexUnlock(&flow_spare_pool_m);
+}
+
+void FlowSparePoolDestroy(void)
+{
+    SCMutexLock(&flow_spare_pool_m);
+    for (FlowSparePool *p = flow_spare_pool; p != NULL; ) {
+        uint32_t cnt = 0;
+        Flow *f;
+        while ((f = FlowQueuePrivateGetFromTop(&p->queue))) {
+            FlowFree(f);
+            cnt++;
+        }
+        flow_spare_pool_flow_cnt -= cnt;
+        FlowSparePool *next = p->next;
+        SCFree(p);
+        p = next;
+    }
+    flow_spare_pool = NULL;
+    SCMutexUnlock(&flow_spare_pool_m);
+}
diff --git a/src/flow-spare-pool.h b/src/flow-spare-pool.h
new file mode 100644 (file)
index 0000000..cf9a4bd
--- /dev/null
@@ -0,0 +1,41 @@
+/* Copyright (C) 2007-2020 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Victor Julien <victor@inliniac.net>
+ */
+
+#ifndef __FLOW_SPARE_POOL_H__
+#define __FLOW_SPARE_POOL_H__
+
+#include "suricata-common.h"
+#include "flow.h"
+
+void FlowSparePoolInit(void);
+void FlowSparePoolDestroy(void);
+void FlowSparePoolUpdate(uint32_t size);
+
+uint32_t FlowSpareGetPoolSize(void);
+
+FlowQueuePrivate FlowSpareGetFromPool(void);
+
+void FlowSparePoolReturnFlow(Flow *f);
+void FlowSparePoolReturnFlows(FlowQueuePrivate *fqp);
+
+#endif /* __FLOW_SPARE_POOL_H__ */
index 8659a7402fcb8bf7c58b615c38520e3f1d14aa91..e48e7051220d30446c144104fb77bcf9745f1985 100644 (file)
@@ -265,7 +265,10 @@ error:
     return NULL;
 }
 
-static inline Packet *FlowForceReassemblyPseudoPacketGet(int direction,
+Packet *FlowForceReassemblyPseudoPacketGet(int direction,
+                                                         Flow *f,
+                                                         TcpSession *ssn);
+Packet *FlowForceReassemblyPseudoPacketGet(int direction,
                                                          Flow *f,
                                                          TcpSession *ssn)
 {
@@ -284,29 +287,27 @@ static inline Packet *FlowForceReassemblyPseudoPacketGet(int direction,
  *  \brief Check if a flow needs forced reassembly, or any other processing
  *
  *  \param f *LOCKED* flow
- *  \param server ptr to int that should be set to 1 or 2 if we return 1
- *  \param client ptr to int that should be set to 1 or 2 if we return 1
  *
  *  \retval 0 no
  *  \retval 1 yes
  */
-int FlowForceReassemblyNeedReassembly(Flow *f, int *server, int *client)
+int FlowForceReassemblyNeedReassembly(Flow *f)
 {
+
     if (f == NULL || f->protoctx == NULL) {
-        *server = *client = STREAM_HAS_UNPROCESSED_SEGMENTS_NONE;
         SCReturnInt(0);
     }
 
     TcpSession *ssn = (TcpSession *)f->protoctx;
-    *client = StreamNeedsReassembly(ssn, STREAM_TOSERVER);
-    *server = StreamNeedsReassembly(ssn, STREAM_TOCLIENT);
+    int client = StreamNeedsReassembly(ssn, STREAM_TOSERVER);
+    int server = StreamNeedsReassembly(ssn, STREAM_TOCLIENT);
 
     /* if state is not fully closed we assume that we haven't fully
      * inspected the app layer state yet */
     if (ssn->state >= TCP_ESTABLISHED && ssn->state != TCP_CLOSED)
     {
-        *client = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
-        *server = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
+        client = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
+        server = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
     }
 
     /* if app layer still needs some love, push through */
@@ -315,20 +316,22 @@ int FlowForceReassemblyNeedReassembly(Flow *f, int *server, int *client)
 
         if (AppLayerParserGetTransactionActive(f, f->alparser, STREAM_TOCLIENT) < total_txs)
         {
-            *server = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
+            server = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
         }
         if (AppLayerParserGetTransactionActive(f, f->alparser, STREAM_TOSERVER) < total_txs)
         {
-            *client = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
+            client = STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION;
         }
     }
 
     /* nothing to do */
-    if (*client == STREAM_HAS_UNPROCESSED_SEGMENTS_NONE &&
-        *server == STREAM_HAS_UNPROCESSED_SEGMENTS_NONE) {
+    if (client == STREAM_HAS_UNPROCESSED_SEGMENTS_NONE &&
+        server == STREAM_HAS_UNPROCESSED_SEGMENTS_NONE) {
         SCReturnInt(0);
     }
 
+    f->ffr_ts = client;
+    f->ffr_tc = server;
     SCReturnInt(1);
 }
 
@@ -339,81 +342,13 @@ int FlowForceReassemblyNeedReassembly(Flow *f, int *server, int *client)
  *        The function requires flow to be locked beforehand.
  *
  * \param f Pointer to the flow.
- * \param server action required for server: 1 or 2
- * \param client action required for client: 1 or 2
  *
  * \retval 0 This flow doesn't need any reassembly processing; 1 otherwise.
  */
-int FlowForceReassemblyForFlow(Flow *f, int server, int client)
+int FlowForceReassemblyForFlow(Flow *f)
 {
-    Packet *p1 = NULL, *p2 = NULL;
-
-    /* looks like we have no flows in this queue */
-    if (f == NULL || f->protoctx == NULL) {
-        return 0;
-    }
-
-    /* Get the tcp session for the flow */
-    TcpSession *ssn = (TcpSession *)f->protoctx;
-
-    /* The packets we use are based on what segments in what direction are
-     * unprocessed.
-     * p1 if we have client segments for reassembly purpose only.  If we
-     * have no server segments p2 can be a toserver packet with dummy
-     * seq/ack, and if we have server segments p2 has to carry out reassembly
-     * for server segment as well, in which case we will also need a p3 in the
-     * toclient which is now dummy since all we need it for is detection */
-
-    /* insert a pseudo packet in the toserver direction */
-    if (client == STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION) {
-        p1 = FlowForceReassemblyPseudoPacketGet(0, f, ssn);
-        if (p1 == NULL) {
-            goto done;
-        }
-        PKT_SET_SRC(p1, PKT_SRC_FFR);
-
-        if (server == STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION) {
-            p2 = FlowForceReassemblyPseudoPacketGet(1, f, ssn);
-            if (p2 == NULL) {
-                FlowDeReference(&p1->flow);
-                TmqhOutputPacketpool(NULL, p1);
-                goto done;
-            }
-            PKT_SET_SRC(p2, PKT_SRC_FFR);
-            p2->flowflags |= FLOW_PKT_LAST_PSEUDO;
-        } else {
-            p1->flowflags |= FLOW_PKT_LAST_PSEUDO;
-        }
-    } else {
-        if (server == STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION) {
-            p1 = FlowForceReassemblyPseudoPacketGet(1, f, ssn);
-            if (p1 == NULL) {
-                goto done;
-            }
-            PKT_SET_SRC(p1, PKT_SRC_FFR);
-            p1->flowflags |= FLOW_PKT_LAST_PSEUDO;
-        } else {
-            /* impossible */
-            BUG_ON(1);
-        }
-    }
-
-    /* inject the packet(s) into the appropriate thread */
-    int thread_id = (int)f->thread_id[0];
-    Packet *packets[3] = { p1, p2 ? p2 : NULL, NULL }; /**< null terminated array of packets */
-    if (unlikely(!(TmThreadsInjectPacketsById(packets, thread_id)))) {
-        FlowDeReference(&p1->flow);
-        TmqhOutputPacketpool(NULL, p1);
-        if (p2) {
-            FlowDeReference(&p2->flow);
-            TmqhOutputPacketpool(NULL, p2);
-        }
-    }
-
-    /* done, in case of error (no packet) we still tag flow as complete
-     * as we're probably resource stress if we couldn't get packets */
-done:
-    f->flags |= FLOW_TIMEOUT_REASSEMBLY_DONE;
+    const int thread_id = (int)f->thread_id[0];
+    TmThreadsInjectFlowById(f, thread_id);
     return 1;
 }
 
@@ -440,11 +375,12 @@ static inline void FlowForceReassemblyForHash(void)
         PacketPoolWaitForN(9);
         FBLOCK_LOCK(fb);
 
-        /* get the topmost flow from the QUEUE */
         Flow *f = fb->head;
+        Flow *prev_f = NULL;
 
         /* we need to loop through all the flows in the queue */
         while (f != NULL) {
+            Flow *next_f = f->next;
             PacketPoolWaitForN(3);
 
             FLOWLOCK_WRLOCK(f);
@@ -454,20 +390,26 @@ static inline void FlowForceReassemblyForHash(void)
             /* \todo Also skip flows that shouldn't be inspected */
             if (ssn == NULL) {
                 FLOWLOCK_UNLOCK(f);
-                f = f->hnext;
+                prev_f = f;
+                f = next_f;
                 continue;
             }
 
-            int client_ok = 0;
-            int server_ok = 0;
-            if (FlowForceReassemblyNeedReassembly(f, &server_ok, &client_ok) == 1) {
-                FlowForceReassemblyForFlow(f, server_ok, client_ok);
+            /* in case of additional work, we pull the flow out of the
+             * hash and xfer ownership to the injected packet(s) */
+            if (FlowForceReassemblyNeedReassembly(f) == 1) {
+                RemoveFromHash(f, prev_f);
+                f->flow_end_flags |= FLOW_END_FLAG_SHUTDOWN;
+                FlowForceReassemblyForFlow(f);
+                f = next_f;
+                continue;
             }
 
             FLOWLOCK_UNLOCK(f);
 
             /* next flow in the queue */
-            f = f->hnext;
+            prev_f = f;
+            f = f->next;
         }
         FBLOCK_UNLOCK(fb);
     }
index 50e007ae066d6c95d3c46827047c7b38e6bf8ff2..1430119caec3648f17b92c1d1b473d32932708d6 100644 (file)
@@ -24,8 +24,8 @@
 #ifndef __FLOW_TIMEOUT_H__
 #define __FLOW_TIMEOUT_H__
 
-int FlowForceReassemblyForFlow(Flow *f, int server, int client);
-int FlowForceReassemblyNeedReassembly(Flow *f, int *server, int *client);
+int FlowForceReassemblyForFlow(Flow *f);
+int FlowForceReassemblyNeedReassembly(Flow *f);
 void FlowForceReassembly(void);
 void FlowForceReassemblySetup(int detect_disabled);
 
index f4dad5dd80aa055b344cfe56b4eeb36da83222e5..b16f7c26e400692495dfceb99612598aa182ac4d 100644 (file)
@@ -196,6 +196,9 @@ void FlowInit(Flow *f, const Packet *p)
     COPY_TIMESTAMP(&p->ts, &f->startts);
 
     f->protomap = FlowGetProtoMapping(f->proto);
+    f->timeout_policy = FlowGetTimeoutPolicy(f);
+    const uint32_t timeout_at = (uint32_t)f->startts.tv_sec + f->timeout_policy;
+    f->timeout_at = timeout_at;
 
     if (MacSetFlowStorageEnabled()) {
         MacSet *ms = FlowGetStorageById(f, MacSetGetFlowStorageID());
index ee952944762424298199523e8d2e3313f64d4c04..ff7a7bce5324b7d76cf0f1ebddfba2fca858aa0a 100644 (file)
         (f)->dp = 0; \
         (f)->proto = 0; \
         (f)->livedev = NULL; \
+        (f)->timeout_at = 0; \
+        (f)->timeout_policy = 0; \
         (f)->vlan_idx = 0; \
-        SC_ATOMIC_INIT((f)->flow_state); \
-        SC_ATOMIC_INIT((f)->use_cnt); \
+        (f)->next = NULL; \
+        (f)->flow_state = 0; \
+        (f)->use_cnt = 0; \
         (f)->tenant_id = 0; \
         (f)->parent_id = 0; \
         (f)->probing_parser_toserver_alproto_masks = 0; \
         (f)->sgh_toserver = NULL; \
         (f)->sgh_toclient = NULL; \
         (f)->flowvar = NULL; \
-        (f)->hnext = NULL; \
-        (f)->hprev = NULL; \
-        (f)->lnext = NULL; \
-        (f)->lprev = NULL; \
         RESET_COUNTERS((f)); \
     } while (0)
 
 /** \brief macro to recycle a flow before it goes into the spare queue for reuse.
  *
- *  Note that the lnext, lprev, hnext, hprev fields are untouched, those are
+ *  Note that the lnext, lprev, hnext fields are untouched, those are
  *  managed by the queueing code. Same goes for fb (FlowBucket ptr) field.
  */
 #define FLOW_RECYCLE(f) do { \
         (f)->proto = 0; \
         (f)->livedev = NULL; \
         (f)->vlan_idx = 0; \
-        SC_ATOMIC_RESET((f)->flow_state); \
-        SC_ATOMIC_RESET((f)->use_cnt); \
+        (f)->ffr = 0; \
+        (f)->next = NULL; \
+        (f)->timeout_at = 0; \
+        (f)->timeout_policy = 0; \
+        (f)->flow_state = 0; \
+        (f)->use_cnt = 0; \
         (f)->tenant_id = 0; \
         (f)->parent_id = 0; \
         (f)->probing_parser_toserver_alproto_masks = 0; \
index 83a40134c967065c0d3bc1340d98df4454918b87..69dbb6ac575f2cf865cbfdca0cdedf5112f275fc 100644 (file)
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016 Open Information Security Foundation
+/* Copyright (C) 2016-2020 Open Information Security Foundation
  *
  * You can copy, redistribute or modify this Program under the terms of
  * the GNU General Public License version 2 as published by the Free
 #include "util-validate.h"
 
 #include "flow-util.h"
+#include "flow-manager.h"
+#include "flow-timeout.h"
+#include "flow-spare-pool.h"
 
 typedef DetectEngineThreadCtx *DetectEngineThreadCtxPtr;
 
+typedef struct FlowTimeoutCounters {
+    uint32_t flows_aside_needs_work;
+    uint32_t flows_aside_pkt_inject;
+} FlowTimeoutCounters;
+
 typedef struct FlowWorkerThreadData_ {
     DecodeThreadVars *dtv;
 
@@ -59,6 +67,7 @@ typedef struct FlowWorkerThreadData_ {
     SC_ATOMIC_DECLARE(DetectEngineThreadCtxPtr, detect_thread);
 
     void *output_thread; /* Output thread data. */
+    void *output_thread_flow; /* Output thread data. */
 
     uint16_t local_bypass_pkts;
     uint16_t local_bypass_bytes;
@@ -66,9 +75,140 @@ typedef struct FlowWorkerThreadData_ {
     uint16_t both_bypass_bytes;
 
     PacketQueueNoLock pq;
+    FlowLookupStruct fls;
+
+    struct {
+        uint16_t flows_injected;
+        uint16_t flows_removed;
+        uint16_t flows_aside_needs_work;
+        uint16_t flows_aside_pkt_inject;
+    } cnt;
 
 } FlowWorkerThreadData;
 
+static void FlowWorkerFlowTimeout(ThreadVars *tv, Packet *p, FlowWorkerThreadData *fw, void *detect_thread);
+Packet *FlowForceReassemblyPseudoPacketGet(int direction, Flow *f, TcpSession *ssn);
+
+/**
+ * \internal
+ * \brief Forces reassembly for flow if it needs it.
+ *
+ *        The function requires flow to be locked beforehand.
+ *
+ * \param f Pointer to the flow.
+ *
+ * \retval cnt number of packets injected
+ */
+static int FlowFinish(ThreadVars *tv, Flow *f, FlowWorkerThreadData *fw, void *detect_thread)
+{
+    Packet *p1 = NULL, *p2 = NULL;
+    const int server = f->ffr_tc;
+    const int client = f->ffr_ts;
+
+    /* Get the tcp session for the flow */
+    TcpSession *ssn = (TcpSession *)f->protoctx;
+
+    /* The packets we use are based on what segments in what direction are
+     * unprocessed.
+     * p1 if we have client segments for reassembly purpose only.  If we
+     * have no server segments p2 can be a toserver packet with dummy
+     * seq/ack, and if we have server segments p2 has to carry out reassembly
+     * for server segment as well, in which case we will also need a p3 in the
+     * toclient which is now dummy since all we need it for is detection */
+
+    /* insert a pseudo packet in the toserver direction */
+    if (client == STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION) {
+        p1 = FlowForceReassemblyPseudoPacketGet(0, f, ssn);
+        if (p1 == NULL) {
+            return 0;
+        }
+        PKT_SET_SRC(p1, PKT_SRC_FFR);
+
+        if (server == STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION) {
+            p2 = FlowForceReassemblyPseudoPacketGet(1, f, ssn);
+            if (p2 == NULL) {
+                FlowDeReference(&p1->flow);
+                TmqhOutputPacketpool(NULL, p1);
+                return 0;
+            }
+            PKT_SET_SRC(p2, PKT_SRC_FFR);
+            p2->flowflags |= FLOW_PKT_LAST_PSEUDO;
+        } else {
+            p1->flowflags |= FLOW_PKT_LAST_PSEUDO;
+        }
+    } else {
+        if (server == STREAM_HAS_UNPROCESSED_SEGMENTS_NEED_ONLY_DETECTION) {
+            p1 = FlowForceReassemblyPseudoPacketGet(1, f, ssn);
+            if (p1 == NULL) {
+                return 0;
+            }
+            PKT_SET_SRC(p1, PKT_SRC_FFR);
+            p1->flowflags |= FLOW_PKT_LAST_PSEUDO;
+        } else {
+            /* impossible */
+            BUG_ON(1);
+        }
+    }
+    f->flags |= FLOW_TIMEOUT_REASSEMBLY_DONE;
+
+    FlowWorkerFlowTimeout(tv, p1, fw, detect_thread);
+    PacketPoolReturnPacket(p1);
+    if (p2) {
+        FlowWorkerFlowTimeout(tv, p2, fw, detect_thread);
+        PacketPoolReturnPacket(p2);
+        return 2;
+    }
+    return 1;
+}
+
+static void CheckWorkQueue(ThreadVars *tv, FlowWorkerThreadData *fw,
+        void *detect_thread, // TODO proper type?
+        FlowTimeoutCounters *counters,
+        FlowQueuePrivate *fq)
+{
+    Flow *f;
+    while ((f = FlowQueuePrivateGetFromTop(fq)) != NULL) {
+        f->flow_end_flags |= FLOW_END_FLAG_TIMEOUT; //TODO emerg
+
+        const FlowStateType state = f->flow_state;
+        if (f->proto == IPPROTO_TCP) {
+            if (!(f->flags & FLOW_TIMEOUT_REASSEMBLY_DONE) &&
+#ifdef CAPTURE_OFFLOAD
+                    state != FLOW_STATE_CAPTURE_BYPASSED &&
+#endif
+                    state != FLOW_STATE_LOCAL_BYPASSED &&
+                    FlowForceReassemblyNeedReassembly(f) == 1 &&
+                    f->ffr != 0)
+            {
+                int cnt = FlowFinish(tv, f, fw, detect_thread);
+                counters->flows_aside_pkt_inject += cnt;
+                counters->flows_aside_needs_work++;
+            }
+        }
+#if 0
+// 20200501 this *is* possible if the flow timeout handling triggers a proto upgrade (e.g. http->https)
+#ifdef DEBUG
+        /* this should not be possible */
+        BUG_ON(f->use_cnt > 0);
+#endif
+#endif
+        /* no one is referring to this flow, use_cnt 0, removed from hash
+         * so we can unlock it and pass it to the flow recycler */
+
+        if (fw->output_thread_flow != NULL)
+            (void)OutputFlowLog(tv, fw->output_thread_flow, f);
+
+        FlowClearMemory (f, f->protomap);
+        FLOWLOCK_UNLOCK(f);
+        if (fw->fls.spare_queue.len >= 200) { // TODO match to API? 200 = 2 * block size
+            FlowSparePoolReturnFlow(f);
+        } else {
+            FlowQueuePrivatePrependFlow(&fw->fls.spare_queue, f);
+        }
+// TODO 20200503 we can get here with use_cnt > 0. How does it work wrt timeout? Should we not queue it? But what then?
+    }
+}
+
 /** \brief handle flow for packet
  *
  *  Handle flow creation/lookup
@@ -77,7 +217,7 @@ static inline TmEcode FlowUpdate(ThreadVars *tv, FlowWorkerThreadData *fw, Packe
 {
     FlowHandlePacketUpdate(p->flow, p, tv, fw->dtv);
 
-    int state = SC_ATOMIC_GET(p->flow->flow_state);
+    int state = p->flow->flow_state;
     switch (state) {
 #ifdef CAPTURE_OFFLOAD
         case FLOW_STATE_CAPTURE_BYPASSED:
@@ -110,7 +250,12 @@ static TmEcode FlowWorkerThreadInit(ThreadVars *tv, const void *initdata, void *
     fw->both_bypass_pkts = StatsRegisterCounter("flow_bypassed.local_capture_pkts", tv);
     fw->both_bypass_bytes = StatsRegisterCounter("flow_bypassed.local_capture_bytes", tv);
 
-    fw->dtv = DecodeThreadVarsAlloc(tv);
+    fw->cnt.flows_aside_needs_work = StatsRegisterCounter("flow.wrk.flows_evicted_needs_work", tv);
+    fw->cnt.flows_aside_pkt_inject = StatsRegisterCounter("flow.wrk.flows_evicted_pkt_inject", tv);
+    fw->cnt.flows_removed = StatsRegisterCounter("flow.wrk.flows_evicted", tv);
+    fw->cnt.flows_injected = StatsRegisterCounter("flow.wrk.flows_injected", tv);
+
+    fw->fls.dtv = fw->dtv = DecodeThreadVarsAlloc(tv);
     if (fw->dtv == NULL) {
         FlowWorkerThreadDeinit(tv, fw);
         return TM_ECODE_FAILED;
@@ -137,13 +282,17 @@ static TmEcode FlowWorkerThreadInit(ThreadVars *tv, const void *initdata, void *
         FlowWorkerThreadDeinit(tv, fw);
         return TM_ECODE_FAILED;
     }
+    if (OutputFlowLogThreadInit(tv, NULL, &fw->output_thread_flow) != TM_ECODE_OK) {
+        SCLogError(SC_ERR_THREAD_INIT, "initializing flow log API for thread failed");
+        FlowWorkerThreadDeinit(tv, fw);
+        return TM_ECODE_FAILED;
+    }
 
     DecodeRegisterPerfCounters(fw->dtv, tv);
     AppLayerRegisterThreadCounters(tv);
 
     /* setup pq for stream end pkts */
     memset(&fw->pq, 0, sizeof(PacketQueueNoLock));
-
     *data = fw;
     return TM_ECODE_OK;
 }
@@ -166,14 +315,36 @@ static TmEcode FlowWorkerThreadDeinit(ThreadVars *tv, void *data)
 
     /* Free output. */
     OutputLoggerThreadDeinit(tv, fw->output_thread);
+    OutputFlowLogThreadDeinit(tv, fw->output_thread_flow);
 
     /* free pq */
     BUG_ON(fw->pq.len);
 
+    Flow *f;
+    while ((f = FlowQueuePrivateGetFromTop(&fw->fls.spare_queue)) != NULL) {
+        FlowFree(f);
+    }
+
     SCFree(fw);
     return TM_ECODE_OK;
 }
 
+TmEcode Detect(ThreadVars *tv, Packet *p, void *data);
+TmEcode StreamTcp (ThreadVars *, Packet *, void *, PacketQueueNoLock *pq);
+
+static inline void UpdateCounters(ThreadVars *tv,
+        FlowWorkerThreadData *fw, const FlowTimeoutCounters *counters)
+{
+    if (counters->flows_aside_needs_work) {
+        StatsAddUI64(tv, fw->cnt.flows_aside_needs_work,
+                (uint64_t)counters->flows_aside_needs_work);
+    }
+    if (counters->flows_aside_pkt_inject) {
+        StatsAddUI64(tv, fw->cnt.flows_aside_pkt_inject,
+                (uint64_t)counters->flows_aside_pkt_inject);
+    }
+}
+
 static void FlowPruneFiles(Packet *p)
 {
     if (p->flow && p->flow->alstate) {
@@ -186,11 +357,124 @@ static void FlowPruneFiles(Packet *p)
     }
 }
 
+static inline void FlowWorkerStreamTCPUpdate(ThreadVars *tv, FlowWorkerThreadData *fw,
+        Packet *p, void *detect_thread)
+{
+    FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_STREAM);
+    StreamTcp(tv, p, fw->stream_thread, &fw->pq);
+    FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_STREAM);
+
+    if (FlowChangeProto(p->flow)) {
+        StreamTcpDetectLogFlush(tv, fw->stream_thread, p->flow, p, &fw->pq);
+        AppLayerParserStateSetFlag(p->flow->alparser, APP_LAYER_PARSER_EOF_TS);
+        AppLayerParserStateSetFlag(p->flow->alparser, APP_LAYER_PARSER_EOF_TC);
+    }
+
+    /* Packets here can safely access p->flow as it's locked */
+    SCLogDebug("packet %"PRIu64": extra packets %u", p->pcap_cnt, fw->pq.len);
+    Packet *x;
+    while ((x = PacketDequeueNoLock(&fw->pq))) {
+        SCLogDebug("packet %"PRIu64" extra packet %p", p->pcap_cnt, x);
+
+        if (detect_thread != NULL) {
+            FLOWWORKER_PROFILING_START(x, PROFILE_FLOWWORKER_DETECT);
+            Detect(tv, x, detect_thread);
+            FLOWWORKER_PROFILING_END(x, PROFILE_FLOWWORKER_DETECT);
+        }
+
+        OutputLoggerLog(tv, x, fw->output_thread);
+
+        /* put these packets in the preq queue so that they are
+         * by the other thread modules before packet 'p'. */
+        PacketEnqueueNoLock(&tv->decode_pq, x);
+    }
+}
+
+static void FlowWorkerFlowTimeout(ThreadVars *tv, Packet *p, FlowWorkerThreadData *fw,
+        void *detect_thread)
+{
+    SCLogDebug("packet %"PRIu64" is TCP. Direction %s", p->pcap_cnt, PKT_IS_TOSERVER(p) ? "TOSERVER" : "TOCLIENT");
+    DEBUG_VALIDATE_BUG_ON(!(p->flow && PKT_IS_TCP(p)));
+    DEBUG_ASSERT_FLOW_LOCKED(p->flow);
+
+    /* handle TCP and app layer */
+    FlowWorkerStreamTCPUpdate(tv, fw, p, detect_thread);
+
+    PacketUpdateEngineEventCounters(tv, fw->dtv, p);
+
+    /* handle Detect */
+    SCLogDebug("packet %"PRIu64" calling Detect", p->pcap_cnt);
+    if (detect_thread != NULL) {
+        FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_DETECT);
+        Detect(tv, p, detect_thread);
+        FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_DETECT);
+    }
+
+    // Outputs.
+    OutputLoggerLog(tv, p, fw->output_thread);
+
+    /* Prune any stored files. */
+    FlowPruneFiles(p);
+
+    /*  Release tcp segments. Done here after alerting can use them. */
+    FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_TCPPRUNE);
+    StreamTcpPruneSession(p->flow, p->flowflags & FLOW_PKT_TOSERVER ?
+            STREAM_TOSERVER : STREAM_TOCLIENT);
+    FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_TCPPRUNE);
+
+    /* run tx cleanup last */
+    AppLayerParserTransactionsCleanup(p->flow);
+
+    FlowDeReference(&p->flow);
+    /* flow is unlocked later in FlowFinish() */
+}
+
+/** \internal
+ *  \brief process flows injected into our queue by other threads
+ */
+static inline void FlowWorkerProcessInjectedFlows(ThreadVars *tv,
+        FlowWorkerThreadData *fw, Packet *p, void *detect_thread)
+{
+    /* take injected flows and append to our work queue */
+    FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_FLOW_INJECTED);
+    FlowQueuePrivate injected = { NULL, NULL, 0 };
+    if (SC_ATOMIC_GET(tv->flow_queue->non_empty) == true)
+        injected = FlowQueueExtractPrivate(tv->flow_queue);
+    if (injected.len > 0) {
+        StatsAddUI64(tv, fw->cnt.flows_injected, (uint64_t)injected.len);
+
+        FlowTimeoutCounters counters = { 0, 0, };
+        CheckWorkQueue(tv, fw, detect_thread, &counters, &injected);
+        UpdateCounters(tv, fw, &counters);
+    }
+    FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_FLOW_INJECTED);
+}
+
+/** \internal
+ *  \brief process flows set aside locally during flow lookup
+ */
+static inline void FlowWorkerProcessLocalFlows(ThreadVars *tv,
+        FlowWorkerThreadData *fw, Packet *p, void *detect_thread)
+{
+    FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_FLOW_EVICTED);
+    if (fw->fls.work_queue.len) {
+        StatsAddUI64(tv, fw->cnt.flows_removed, (uint64_t)fw->fls.work_queue.len);
+
+        FlowTimeoutCounters counters = { 0, 0, };
+        CheckWorkQueue(tv, fw, detect_thread, &counters, &fw->fls.work_queue);
+        UpdateCounters(tv, fw, &counters);
+    }
+    FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_FLOW_EVICTED);
+}
+
 static TmEcode FlowWorker(ThreadVars *tv, Packet *p, void *data)
 {
     FlowWorkerThreadData *fw = data;
     void *detect_thread = SC_ATOMIC_GET(fw->detect_thread);
 
+    DEBUG_VALIDATE_BUG_ON(p == NULL);
+    DEBUG_VALIDATE_BUG_ON(tv->flow_queue == NULL);
+
     SCLogDebug("packet %"PRIu64, p->pcap_cnt);
 
     /* update time */
@@ -202,7 +486,7 @@ static TmEcode FlowWorker(ThreadVars *tv, Packet *p, void *data)
     if (p->flags & PKT_WANTS_FLOW) {
         FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_FLOW);
 
-        FlowHandlePacket(tv, fw->dtv, p);
+        FlowHandlePacket(tv, &fw->fls, p);
         if (likely(p->flow != NULL)) {
             DEBUG_ASSERT_FLOW_LOCKED(p->flow);
             if (FlowUpdate(tv, fw, p) == TM_ECODE_DONE) {
@@ -237,37 +521,7 @@ static TmEcode FlowWorker(ThreadVars *tv, Packet *p, void *data)
             DisableDetectFlowFileFlags(p->flow);
         }
 
-        FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_STREAM);
-        StreamTcp(tv, p, fw->stream_thread, &fw->pq);
-        FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_STREAM);
-
-        if (FlowChangeProto(p->flow)) {
-            StreamTcpDetectLogFlush(tv, fw->stream_thread, p->flow, p, &fw->pq);
-            AppLayerParserStateSetFlag(p->flow->alparser, APP_LAYER_PARSER_EOF_TS);
-            AppLayerParserStateSetFlag(p->flow->alparser, APP_LAYER_PARSER_EOF_TC);
-        }
-
-        /* Packets here can safely access p->flow as it's locked */
-        SCLogDebug("packet %"PRIu64": extra packets %u", p->pcap_cnt, fw->pq.len);
-        Packet *x;
-        while ((x = PacketDequeueNoLock(&fw->pq))) {
-            SCLogDebug("packet %"PRIu64" extra packet %p", p->pcap_cnt, x);
-
-            // TODO do we need to call StreamTcp on these pseudo packets or not?
-            //StreamTcp(tv, x, fw->stream_thread, &fw->pq, NULL);
-            if (detect_thread != NULL) {
-                FLOWWORKER_PROFILING_START(x, PROFILE_FLOWWORKER_DETECT);
-                Detect(tv, x, detect_thread);
-                FLOWWORKER_PROFILING_END(x, PROFILE_FLOWWORKER_DETECT);
-            }
-
-            //  Outputs
-            OutputLoggerLog(tv, x, fw->output_thread);
-
-            /* put these packets in the preq queue so that they are
-             * by the other thread modules before packet 'p'. */
-            PacketEnqueueNoLock(&tv->decode_pq, x);
-        }
+        FlowWorkerStreamTCPUpdate(tv, fw, p, detect_thread);
 
     /* handle the app layer part of the UDP packet payload */
     } else if (p->flow && p->proto == IPPROTO_UDP) {
@@ -281,7 +535,6 @@ static TmEcode FlowWorker(ThreadVars *tv, Packet *p, void *data)
     /* handle Detect */
     DEBUG_ASSERT_FLOW_LOCKED(p->flow);
     SCLogDebug("packet %"PRIu64" calling Detect", p->pcap_cnt);
-
     if (detect_thread != NULL) {
         FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_DETECT);
         Detect(tv, p, detect_thread);
@@ -295,23 +548,30 @@ static TmEcode FlowWorker(ThreadVars *tv, Packet *p, void *data)
     FlowPruneFiles(p);
 
     /*  Release tcp segments. Done here after alerting can use them. */
-    if (p->flow != NULL && p->proto == IPPROTO_TCP) {
-        FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_TCPPRUNE);
-        StreamTcpPruneSession(p->flow, p->flowflags & FLOW_PKT_TOSERVER ?
-                STREAM_TOSERVER : STREAM_TOCLIENT);
-        FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_TCPPRUNE);
-    }
-
-    if (p->flow) {
+    if (p->flow != NULL) {
         DEBUG_ASSERT_FLOW_LOCKED(p->flow);
 
+        if (p->proto == IPPROTO_TCP) {
+            FLOWWORKER_PROFILING_START(p, PROFILE_FLOWWORKER_TCPPRUNE);
+            StreamTcpPruneSession(p->flow, p->flowflags & FLOW_PKT_TOSERVER ?
+                    STREAM_TOSERVER : STREAM_TOCLIENT);
+            FLOWWORKER_PROFILING_END(p, PROFILE_FLOWWORKER_TCPPRUNE);
+        }
+
         /* run tx cleanup last */
         AppLayerParserTransactionsCleanup(p->flow);
+
         Flow *f = p->flow;
         FlowDeReference(&p->flow);
         FLOWLOCK_UNLOCK(f);
     }
 
+    /* take injected flows and process them */
+    FlowWorkerProcessInjectedFlows(tv, fw, p, detect_thread);
+
+    /* process local work queue */
+    FlowWorkerProcessLocalFlows(tv, fw, p, detect_thread);
+
     return TM_ECODE_OK;
 }
 
@@ -342,6 +602,10 @@ const char *ProfileFlowWorkerIdToString(enum ProfileFlowWorkerId fwi)
             return "detect";
         case PROFILE_FLOWWORKER_TCPPRUNE:
             return "tcp-prune";
+        case PROFILE_FLOWWORKER_FLOW_INJECTED:
+            return "flow-inject";
+        case PROFILE_FLOWWORKER_FLOW_EVICTED:
+            return "flow-evict";
         case PROFILE_FLOWWORKER_SIZE:
             return "size";
     }
index 2fc6d643287e19ed8bea0c468d547a15c84d7015..9187602dec2c7d663d4493a290435100c0206049 100644 (file)
@@ -24,6 +24,8 @@ enum ProfileFlowWorkerId {
     PROFILE_FLOWWORKER_APPLAYERUDP,
     PROFILE_FLOWWORKER_DETECT,
     PROFILE_FLOWWORKER_TCPPRUNE,
+    PROFILE_FLOWWORKER_FLOW_INJECTED,
+    PROFILE_FLOWWORKER_FLOW_EVICTED,
     PROFILE_FLOWWORKER_SIZE
 };
 const char *ProfileFlowWorkerIdToString(enum ProfileFlowWorkerId fwi);
index 49dc4a2e373041900a753b4d22068a9653a06026..42c6fb8b93e681249b7370142c22d09838923103 100644 (file)
@@ -44,6 +44,7 @@
 #include "flow-manager.h"
 #include "flow-storage.h"
 #include "flow-bypass.h"
+#include "flow-spare-pool.h"
 
 #include "stream-tcp-private.h"
 #include "stream-tcp-reassemble.h"
@@ -74,6 +75,8 @@
 
 #define FLOW_DEFAULT_PREALLOC    10000
 
+SC_ATOMIC_DECLARE(FlowProtoTimeoutPtr, flow_timeouts);
+
 /** atomic int that is used when freeing a flow from the hash. In this
  *  case we walk the hash to find a flow to free. This var records where
  *  we left off in the hash. Without this only the top rows of the hash
@@ -89,11 +92,9 @@ SC_ATOMIC_DECLARE(unsigned int, flow_flags);
 
 FlowProtoTimeout flow_timeouts_normal[FLOW_PROTO_MAX];
 FlowProtoTimeout flow_timeouts_emerg[FLOW_PROTO_MAX];
+FlowProtoTimeout flow_timeouts_delta[FLOW_PROTO_MAX];
 FlowProtoFreeFunc flow_freefuncs[FLOW_PROTO_MAX];
 
-/** spare/unused/prealloced flows live here */
-FlowQueue flow_spare_q;
-
 FlowConfig flow_config;
 
 /** flow memuse counter (atomic), for enforcing memcap limit */
@@ -149,51 +150,6 @@ void FlowCleanupAppLayer(Flow *f)
     return;
 }
 
-/** \brief Make sure we have enough spare flows. 
- *
- *  Enforce the prealloc parameter, so keep at least prealloc flows in the
- *  spare queue and free flows going over the limit.
- *
- *  \retval 1 if the queue was properly updated (or if it already was in good shape)
- *  \retval 0 otherwise.
- */
-int FlowUpdateSpareFlows(void)
-{
-    SCEnter();
-    uint32_t toalloc = 0, tofree = 0, len;
-
-    FQLOCK_LOCK(&flow_spare_q);
-    len = flow_spare_q.len;
-    FQLOCK_UNLOCK(&flow_spare_q);
-
-    if (len < flow_config.prealloc) {
-        toalloc = flow_config.prealloc - len;
-
-        uint32_t i;
-        for (i = 0; i < toalloc; i++) {
-            Flow *f = FlowAlloc();
-            if (f == NULL)
-                return 0;
-
-            FlowEnqueue(&flow_spare_q,f);
-        }
-    } else if (len > flow_config.prealloc) {
-        tofree = len - flow_config.prealloc;
-
-        uint32_t i;
-        for (i = 0; i < tofree; i++) {
-            /* FlowDequeue locks the queue */
-            Flow *f = FlowDequeue(&flow_spare_q);
-            if (f == NULL)
-                return 1;
-
-            FlowFree(f);
-        }
-    }
-
-    return 1;
-}
-
 /** \brief Set the IPOnly scanned flag for 'direction'.
   *
   * \param f Flow to set the flag in
@@ -435,13 +391,18 @@ void FlowHandlePacketUpdate(Flow *f, Packet *p, ThreadVars *tv, DecodeThreadVars
     SCLogDebug("packet %"PRIu64" -- flow %p", p->pcap_cnt, f);
 
 #ifdef CAPTURE_OFFLOAD
-    int state = SC_ATOMIC_GET(f->flow_state);
+    int state = f->flow_state;
 
     if (state != FLOW_STATE_CAPTURE_BYPASSED) {
 #endif
         /* update the last seen timestamp of this flow */
-        if (timercmp(&p->ts, &f->lastts, >))
+        if (timercmp(&p->ts, &f->lastts, >)) {
             COPY_TIMESTAMP(&p->ts, &f->lastts);
+            const uint32_t timeout_at = (uint32_t)f->lastts.tv_sec + f->timeout_policy;
+            if (timeout_at != f->timeout_at) {
+                f->timeout_at = timeout_at;
+            }
+        }
 #ifdef CAPTURE_OFFLOAD
     } else {
         /* still seeing packet, we downgrade to local bypass */
@@ -493,7 +454,7 @@ void FlowHandlePacketUpdate(Flow *f, Packet *p, ThreadVars *tv, DecodeThreadVars
         FlowUpdateEthernet(tv, dtv, f, p->ethh, false);
     }
 
-    if (SC_ATOMIC_GET(f->flow_state) == FLOW_STATE_ESTABLISHED) {
+    if (f->flow_state == FLOW_STATE_ESTABLISHED) {
         SCLogDebug("pkt %p FLOW_PKT_ESTABLISHED", p);
         p->flowflags |= FLOW_PKT_ESTABLISHED;
 
@@ -536,12 +497,12 @@ void FlowHandlePacketUpdate(Flow *f, Packet *p, ThreadVars *tv, DecodeThreadVars
  *  \param dtv decode thread vars (for flow output api thread data)
  *  \param p packet to handle flow for
  */
-void FlowHandlePacket(ThreadVars *tv, DecodeThreadVars *dtv, Packet *p)
+void FlowHandlePacket(ThreadVars *tv, FlowLookupStruct *fls, Packet *p)
 {
     /* Get this packet's flow from the hash. FlowHandlePacket() will setup
      * a new flow if nescesary. If we get NULL, we're out of flow memory.
      * The returned flow is locked. */
-    Flow *f = FlowGetFlowFromHash(tv, dtv, p, &p->flow);
+    Flow *f = FlowGetFlowFromHash(tv, fls, p, &p->flow);
     if (f == NULL)
         return;
 
@@ -561,7 +522,6 @@ void FlowInitConfig(char quiet)
     SC_ATOMIC_INIT(flow_memuse);
     SC_ATOMIC_INIT(flow_prune_idx);
     SC_ATOMIC_INIT(flow_config.memcap);
-    FlowQueueInit(&flow_spare_q);
     FlowQueueInit(&flow_recycle_q);
 
     /* set defaults */
@@ -663,42 +623,16 @@ void FlowInitConfig(char quiet)
                   SC_ATOMIC_GET(flow_memuse), flow_config.hash_size,
                   (uintmax_t)sizeof(FlowBucket));
     }
-
-    /* pre allocate flows */
-    for (i = 0; i < flow_config.prealloc; i++) {
-        if (!(FLOW_CHECK_MEMCAP(sizeof(Flow) + FlowStorageSize()))) {
-            SCLogError(SC_ERR_FLOW_INIT, "preallocating flows failed: "
-                    "max flow memcap reached. Memcap %"PRIu64", "
-                    "Memuse %"PRIu64".", SC_ATOMIC_GET(flow_config.memcap),
-                    ((uint64_t)SC_ATOMIC_GET(flow_memuse) + (uint64_t)sizeof(Flow)));
-            exit(EXIT_FAILURE);
-        }
-
-        Flow *f = FlowAlloc();
-        if (f == NULL) {
-            SCLogError(SC_ERR_FLOW_INIT, "preallocating flow failed: %s", strerror(errno));
-            exit(EXIT_FAILURE);
-        }
-
-        FlowEnqueue(&flow_spare_q,f);
-    }
-
+    FlowSparePoolInit();
     if (quiet == FALSE) {
-        SCLogConfig("preallocated %" PRIu32 " flows of size %" PRIuMAX "",
-                flow_spare_q.len, (uintmax_t)(sizeof(Flow) + + FlowStorageSize()));
         SCLogConfig("flow memory usage: %"PRIu64" bytes, maximum: %"PRIu64,
                 SC_ATOMIC_GET(flow_memuse), SC_ATOMIC_GET(flow_config.memcap));
     }
 
     FlowInitFlowProto();
 
-    return;
-}
-
-/** \brief print some flow stats
- *  \warning Not thread safe */
-static void FlowPrintStats (void)
-{
+    uint32_t sz = sizeof(Flow) + FlowStorageSize();
+    SCLogNotice("flow size %u, memcap allows for %"PRIu64" flows. Per hash row in perfect conditions %"PRIu64, sz, flow_memcap_copy/sz, (flow_memcap_copy/sz)/flow_config.hash_size);
     return;
 }
 
@@ -707,26 +641,27 @@ static void FlowPrintStats (void)
 void FlowShutdown(void)
 {
     Flow *f;
-    uint32_t u;
-
-    FlowPrintStats();
-
-    /* free queues */
-    while((f = FlowDequeue(&flow_spare_q))) {
-        FlowFree(f);
-    }
-    while((f = FlowDequeue(&flow_recycle_q))) {
+    while ((f = FlowDequeue(&flow_recycle_q))) {
         FlowFree(f);
     }
 
     /* clear and free the hash */
     if (flow_hash != NULL) {
         /* clean up flow mutexes */
-        for (u = 0; u < flow_config.hash_size; u++) {
+        for (uint32_t u = 0; u < flow_config.hash_size; u++) {
             f = flow_hash[u].head;
             while (f) {
-                DEBUG_VALIDATE_BUG_ON(SC_ATOMIC_GET(f->use_cnt) != 0);
-                Flow *n = f->hnext;
+                DEBUG_VALIDATE_BUG_ON(f->use_cnt != 0);
+                Flow *n = f->next;
+                uint8_t proto_map = FlowGetProtoMapping(f->proto);
+                FlowClearMemory(f, proto_map);
+                FlowFree(f);
+                f = n;
+            }
+            f = flow_hash[u].evicted;
+            while (f) {
+                DEBUG_VALIDATE_BUG_ON(f->use_cnt != 0);
+                Flow *n = f->next;
                 uint8_t proto_map = FlowGetProtoMapping(f->proto);
                 FlowClearMemory(f, proto_map);
                 FlowFree(f);
@@ -739,8 +674,8 @@ void FlowShutdown(void)
         flow_hash = NULL;
     }
     (void) SC_ATOMIC_SUB(flow_memuse, flow_config.hash_size * sizeof(FlowBucket));
-    FlowQueueDestroy(&flow_spare_q);
     FlowQueueDestroy(&flow_recycle_q);
+    FlowSparePoolDestroy();
     return;
 }
 
@@ -1075,6 +1010,39 @@ void FlowInitFlowProto(void)
         }
     }
 
+    for (int i = 0; i < FLOW_PROTO_MAX; i++) {
+        FlowProtoTimeout *n = &flow_timeouts_normal[i];
+        FlowProtoTimeout *e = &flow_timeouts_emerg[i];
+        FlowProtoTimeout *d = &flow_timeouts_delta[i];
+
+        if (e->est_timeout > n->est_timeout) {
+            SCLogWarning(SC_WARN_FLOW_EMERGENCY, "emergency timeout value for \'established\' must be below normal value");
+            e->est_timeout = n->est_timeout / 10;
+        }
+        d->est_timeout = n->est_timeout - e->est_timeout;
+
+        if (e->new_timeout > n->new_timeout) {
+            SCLogWarning(SC_WARN_FLOW_EMERGENCY, "emergency timeout value for \'new\' must be below normal value");
+            e->new_timeout = n->new_timeout / 10;
+        }
+        d->new_timeout = n->new_timeout - e->new_timeout;
+
+        if (e->closed_timeout > n->closed_timeout) {
+            SCLogWarning(SC_WARN_FLOW_EMERGENCY, "emergency timeout value for \'closed\' must be below normal value");
+            e->closed_timeout = n->closed_timeout / 10;
+        }
+        d->closed_timeout = n->closed_timeout - e->closed_timeout;
+
+        if (e->bypassed_timeout > n->bypassed_timeout) {
+            SCLogWarning(SC_WARN_FLOW_EMERGENCY, "emergency timeout value for \'bypassed\' must be below normal value");
+            e->bypassed_timeout = n->bypassed_timeout / 10;
+        }
+        d->bypassed_timeout = n->bypassed_timeout - e->bypassed_timeout;
+
+        SCLogDebug("deltas: new: -%u est: -%u closed: -%u bypassed: -%u",
+                d->new_timeout, d->est_timeout, d->closed_timeout, d->bypassed_timeout);
+    }
+
     return;
 }
 
@@ -1164,16 +1132,30 @@ uint8_t FlowGetDisruptionFlags(const Flow *f, uint8_t flags)
     return newflags;
 }
 
-void FlowUpdateState(Flow *f, enum FlowState s)
+void FlowUpdateState(Flow *f, const enum FlowState s)
 {
-    /* set the state */
-    SC_ATOMIC_SET(f->flow_state, s);
-
-    if (f->fb) {
+    if (s != f->flow_state) {
+        /* set the state */
+        f->flow_state = s;
+
+        /* update timeout policy and value */
+        const uint32_t timeout_policy = FlowGetTimeoutPolicy(f);
+        if (timeout_policy != f->timeout_policy) {
+            f->timeout_policy = timeout_policy;
+            const uint32_t timeout_at = (uint32_t)f->lastts.tv_sec + timeout_policy;
+            if (timeout_at != f->timeout_at)
+                f->timeout_at = timeout_at;
+        }
+    }
+#ifdef UNITTESTS
+    if (f->fb != NULL) {
+#endif
         /* and reset the flow buckup next_ts value so that the flow manager
          * has to revisit this row */
         SC_ATOMIC_SET(f->fb->next_ts, 0);
+#ifdef UNITTESTS
     }
+#endif
 }
 
 /**
@@ -1268,17 +1250,16 @@ static int FlowTest02 (void)
 static int FlowTest07 (void)
 {
     int result = 0;
-
     FlowInitConfig(FLOW_QUIET);
     FlowConfig backup;
     memcpy(&backup, &flow_config, sizeof(FlowConfig));
 
     uint32_t ini = 0;
-    uint32_t end = flow_spare_q.len;
+    uint32_t end = FlowSpareGetPoolSize();
     SC_ATOMIC_SET(flow_config.memcap, 10000);
     flow_config.prealloc = 100;
 
-    /* Let's get the flow_spare_q empty */
+    /* Let's get the flow spare pool empty */
     UTHBuildPacketOfFlows(ini, end, 0);
 
     /* And now let's try to reach the memcap val */
@@ -1291,7 +1272,7 @@ static int FlowTest07 (void)
     /* should time out normal */
     TimeSetIncrementTime(2000);
     ini = end + 1;
-    end = end + 2;;
+    end = end + 2;
     UTHBuildPacketOfFlows(ini, end, 0);
 
     /* This means that the engine entered emerg mode: should happen as easy
@@ -1299,8 +1280,8 @@ static int FlowTest07 (void)
     if (SC_ATOMIC_GET(flow_flags) & FLOW_EMERGENCY)
         result = 1;
 
-    memcpy(&flow_config, &backup, sizeof(FlowConfig));
     FlowShutdown();
+    memcpy(&flow_config, &backup, sizeof(FlowConfig));
 
     return result;
 }
@@ -1321,11 +1302,11 @@ static int FlowTest08 (void)
     memcpy(&backup, &flow_config, sizeof(FlowConfig));
 
     uint32_t ini = 0;
-    uint32_t end = flow_spare_q.len;
+    uint32_t end = FlowSpareGetPoolSize();
     SC_ATOMIC_SET(flow_config.memcap, 10000);
     flow_config.prealloc = 100;
 
-    /* Let's get the flow_spare_q empty */
+    /* Let's get the flow spare pool empty */
     UTHBuildPacketOfFlows(ini, end, 0);
 
     /* And now let's try to reach the memcap val */
@@ -1368,11 +1349,11 @@ static int FlowTest09 (void)
     memcpy(&backup, &flow_config, sizeof(FlowConfig));
 
     uint32_t ini = 0;
-    uint32_t end = flow_spare_q.len;
+    uint32_t end = FlowSpareGetPoolSize();
     SC_ATOMIC_SET(flow_config.memcap, 10000);
     flow_config.prealloc = 100;
 
-    /* Let's get the flow_spare_q empty */
+    /* Let's get the flow spare pool empty */
     UTHBuildPacketOfFlows(ini, end, 0);
 
     /* And now let's try to reach the memcap val */
@@ -1416,7 +1397,6 @@ void FlowRegisterTests (void)
     UtRegisterTest("FlowTest09 -- Test flow Allocations when it reach memcap",
                    FlowTest09);
 
-    FlowMgrRegisterTests();
     RegisterFlowStorageTests();
 #endif /* UNITTESTS */
 }
index 318ae36e7646449e882c71aa613b306256ec885c..198a899b4e3a728583d139792ea1a62e5e213b6b 100644 (file)
@@ -107,8 +107,6 @@ typedef struct AppLayerParserState_ AppLayerParserState;
 #define FLOW_DIR_REVERSED               BIT_U32(26)
 /** Indicate that the flow did trigger an expectation creation */
 #define FLOW_HAS_EXPECTATION            BIT_U32(27)
-/** Make sure flow is not 'found' during flow hash lookup. */
-#define FLOW_TIMED_OUT                  BIT_U32(28)
 
 /* File flags */
 
@@ -367,8 +365,33 @@ typedef struct Flow_
     uint8_t proto;
     uint8_t recursion_level;
     uint16_t vlan_id[2];
+    /** how many references exist to this flow *right now*
+     *
+     *  On receiving a packet the counter is incremented while the flow
+     *  bucked is locked, which is also the case on timeout pruning.
+     */
+    FlowRefCount use_cnt;
+
     uint8_t vlan_idx;
 
+    /* track toserver/toclient flow timeout needs */
+    union {
+        struct {
+            uint8_t ffr_ts:4;
+            uint8_t ffr_tc:4;
+        };
+        uint8_t ffr;
+    };
+
+    /** timestamp in seconds of the moment this flow will timeout
+     *  according to the timeout policy. Does *not* take emergency
+     *  mode into account. */
+    uint32_t timeout_at;
+
+    /** Thread ID for the stream/detect portion of this flow */
+    FlowThreadId thread_id[2];
+
+    struct Flow_ *next; /* (hash) list next */
     /** Incoming interface */
     struct LiveDevice_ *livedev;
 
@@ -382,15 +405,11 @@ typedef struct Flow_
 
     /* end of flow "header" */
 
-    SC_ATOMIC_DECLARE(FlowStateType, flow_state);
+    /** timeout policy value in seconds to add to the lastts.tv_sec
+     *  when a packet has been received. */
+    uint32_t timeout_policy;
 
-    /** how many pkts and stream msgs are using the flow *right now*. This
-     *  variable is atomic so not protected by the Flow mutex "m".
-     *
-     *  On receiving a packet the counter is incremented while the flow
-     *  bucked is locked, which is also the case on timeout pruning.
-     */
-    SC_ATOMIC_DECLARE(FlowRefCount, use_cnt);
+    FlowStateType flow_state;
 
     /** flow tenant id, used to setup flow timeout and stream pseudo
      *  packets with the correct tenant id set */
@@ -444,9 +463,6 @@ typedef struct Flow_
      *  stored sgh ptrs are reset. */
     uint32_t de_ctx_version;
 
-    /** Thread ID for the stream/detect portion of this flow */
-    FlowThreadId thread_id[2];
-
     /** ttl tracking */
     uint8_t min_ttl_toserver;
     uint8_t max_ttl_toserver;
@@ -469,14 +485,8 @@ typedef struct Flow_
     /* pointer to the var list */
     GenericVar *flowvar;
 
-    /** hash list pointers, protected by fb->s */
-    struct Flow_ *hnext; /* hash list */
-    struct Flow_ *hprev;
     struct FlowBucket_ *fb;
 
-    /** queue list pointers, protected by queue mutex */
-    struct Flow_ *lnext; /* list */
-    struct Flow_ *lprev;
     struct timeval startts;
 
     uint32_t todstpktcnt;
@@ -516,12 +526,23 @@ typedef struct FlowBypassInfo_ {
     uint64_t todstbytecnt;
 } FlowBypassInfo;
 
+#include "flow-queue.h"
+
+typedef struct FlowLookupStruct_ // TODO name
+{
+    /** thread store of spare queues */
+    FlowQueuePrivate spare_queue;
+    DecodeThreadVars *dtv;
+    FlowQueuePrivate work_queue;
+    uint32_t emerg_spare_sync_stamp;
+} FlowLookupStruct;
+
 /** \brief prepare packet for a life with flow
  *  Set PKT_WANTS_FLOW flag to incidate workers should do a flow lookup
  *  and calc the hash value to be used in the lookup and autofp flow
  *  balancing. */
 void FlowSetupPacket(Packet *p);
-void FlowHandlePacket (ThreadVars *, DecodeThreadVars *, Packet *);
+void FlowHandlePacket (ThreadVars *, FlowLookupStruct *, Packet *);
 void FlowInitConfig (char);
 void FlowPrintQueueInfo (void);
 void FlowShutdown(void);
@@ -539,8 +560,6 @@ int FlowSetProtoEmergencyTimeout(uint8_t ,uint32_t ,uint32_t ,uint32_t);
 int FlowSetProtoFreeFunc (uint8_t , void (*Free)(void *));
 void FlowUpdateQueue(Flow *);
 
-struct FlowQueue_;
-
 int FlowUpdateSpareFlows(void);
 
 static inline void FlowSetNoPacketInspectionFlag(Flow *);
@@ -601,7 +620,7 @@ static inline void FlowIncrUsecnt(Flow *f)
     if (f == NULL)
         return;
 
-    (void) SC_ATOMIC_ADD(f->use_cnt, 1);
+    f->use_cnt++;
 }
 
 /**
@@ -614,7 +633,7 @@ static inline void FlowDecrUsecnt(Flow *f)
     if (f == NULL)
         return;
 
-    (void) SC_ATOMIC_SUB(f->use_cnt, 1);
+    f->use_cnt--;
 }
 
 /** \brief Reference the flow, bumping the flows use_cnt
@@ -658,6 +677,23 @@ static inline int64_t FlowGetId(const Flow *f)
     return id;
 }
 
+static inline void FlowSetEndFlags(Flow *f)
+{
+    const int state = f->flow_state;
+    if (state == FLOW_STATE_NEW)
+        f->flow_end_flags |= FLOW_END_FLAG_STATE_NEW;
+    else if (state == FLOW_STATE_ESTABLISHED)
+        f->flow_end_flags |= FLOW_END_FLAG_STATE_ESTABLISHED;
+    else if (state == FLOW_STATE_CLOSED)
+        f->flow_end_flags |= FLOW_END_FLAG_STATE_CLOSED;
+    else if (state == FLOW_STATE_LOCAL_BYPASSED)
+        f->flow_end_flags |= FLOW_END_FLAG_STATE_BYPASSED;
+#ifdef CAPTURE_OFFLOAD
+    else if (state == FLOW_STATE_CAPTURE_BYPASSED)
+        f->flow_end_flags = FLOW_END_FLAG_STATE_BYPASSED;
+#endif
+}
+
 int FlowClearMemory(Flow *,uint8_t );
 
 AppProto FlowGetAppProtocol(const Flow *f);
index a30f99282e3cb07cd4895eaca7d8f041fba995ba..40b7e1a37be98aa3d5755a7ec86067624c82db45 100644 (file)
@@ -95,6 +95,8 @@ TmEcode OutputFlowLog(ThreadVars *tv, void *thread_data, Flow *f)
     if (list == NULL)
         return TM_ECODE_OK;
 
+    FlowSetEndFlags(f);
+
     OutputLoggerThreadData *op_thread_data = (OutputLoggerThreadData *)thread_data;
     OutputFlowLogger *logger = list;
     OutputLoggerThreadStore *store = op_thread_data->store;
index 3d47f42eae624f9d7096846af0ded2b304cceb43..c830249b3486fa407f76225fa50ce87566c1b9ed 100644 (file)
@@ -250,7 +250,7 @@ static void EveFlowLogJSON(JsonFlowLogThread *aft, JsonBuilder *jb, Flow *f)
         state = "closed";
     else if (f->flow_end_flags & FLOW_END_FLAG_STATE_BYPASSED) {
         state = "bypassed";
-        int flow_state = SC_ATOMIC_GET(f->flow_state);
+        int flow_state = f->flow_state;
         switch (flow_state) {
             case FLOW_STATE_LOCAL_BYPASSED:
                 JB_SET_STRING(jb, "bypass", "local");
@@ -270,12 +270,14 @@ static void EveFlowLogJSON(JsonFlowLogThread *aft, JsonBuilder *jb, Flow *f)
     jb_set_string(jb, "state", state);
 
     const char *reason = NULL;
-    if (f->flow_end_flags & FLOW_END_FLAG_TIMEOUT)
-        reason = "timeout";
-    else if (f->flow_end_flags & FLOW_END_FLAG_FORCED)
+    if (f->flow_end_flags & FLOW_END_FLAG_FORCED)
         reason = "forced";
     else if (f->flow_end_flags & FLOW_END_FLAG_SHUTDOWN)
         reason = "shutdown";
+    else if (f->flow_end_flags & FLOW_END_FLAG_TIMEOUT)
+        reason = "timeout";
+    else
+        reason = "unknown";
 
     jb_set_string(jb, "reason", reason);
 
index e3d88bb8ffdaeb9fc9a2101c5bebef49a2b031cf..6d2c0b564aa3f1d7b56b84663298235156650cd8 100644 (file)
@@ -413,7 +413,9 @@ static TmEcode DecodePcapFile(ThreadVars *tv, Packet *p, void *data)
     double curr_ts = p->ts.tv_sec + p->ts.tv_usec / 1000.0;
     if (curr_ts < prev_signaled_ts || (curr_ts - prev_signaled_ts) > 60.0) {
         prev_signaled_ts = curr_ts;
+#if 0
         FlowWakeupFlowManagerThread();
+#endif
     }
 
     DecoderFunc decoder;
index c15d8c7b734ac28366672731d7c2d8c5ac03a0d1..1089dd132da69598d650cca74ee92093deb2000c 100644 (file)
@@ -131,6 +131,8 @@ typedef struct ThreadVars_ {
     SCCtrlMutex *ctrl_mutex;
     SCCtrlCondT *ctrl_cond;
 
+    struct FlowQueue_ *flow_queue;
+
 } ThreadVars;
 
 /** Thread setup flags: */
index dd9a4fcec67cd6886b13a750c0605811c35a5bd6..51f74c2a7764ec6fa394741d2ad39c7ba8ddcb4c 100644 (file)
@@ -274,6 +274,12 @@ static void *TmThreadsSlotPktAcqLoop(void *td)
             tv->stream_pq = tv->inq->pq;
             tv->tm_flowworker = slot;
             SCLogDebug("pre-stream packetqueue %p (inq)", tv->stream_pq);
+            tv->flow_queue = FlowQueueNew();
+            if (tv->flow_queue == NULL) {
+                TmThreadsSetFlag(tv, THV_CLOSED | THV_RUNNING_DONE);
+                pthread_exit((void *) -1);
+                return NULL;
+            }
         /* setup a queue */
         } else if (slot->tm_id == TMM_FLOWWORKER) {
             tv->stream_pq_local = SCCalloc(1, sizeof(PacketQueue));
@@ -283,6 +289,12 @@ static void *TmThreadsSlotPktAcqLoop(void *td)
             tv->stream_pq = tv->stream_pq_local;
             tv->tm_flowworker = slot;
             SCLogDebug("pre-stream packetqueue %p (local)", tv->stream_pq);
+            tv->flow_queue = FlowQueueNew();
+            if (tv->flow_queue == NULL) {
+                TmThreadsSetFlag(tv, THV_CLOSED | THV_RUNNING_DONE);
+                pthread_exit((void *) -1);
+                return NULL;
+            }
         }
     }
 
@@ -356,7 +368,7 @@ static void *TmThreadsSlotVar(void *td)
     char run = 1;
     TmEcode r = TM_ECODE_OK;
 
-    PacketPoolInitEmpty();
+    PacketPoolInit();//Empty();
 
     /* Set the thread name */
     if (SCSetThreadName(tv->name) < 0) {
@@ -395,6 +407,12 @@ static void *TmThreadsSlotVar(void *td)
             tv->stream_pq = tv->inq->pq;
             tv->tm_flowworker = s;
             SCLogDebug("pre-stream packetqueue %p (inq)", tv->stream_pq);
+            tv->flow_queue = FlowQueueNew();
+            if (tv->flow_queue == NULL) {
+                TmThreadsSetFlag(tv, THV_CLOSED | THV_RUNNING_DONE);
+                pthread_exit((void *) -1);
+                return NULL;
+            }
         /* setup a queue */
         } else if (s->tm_id == TMM_FLOWWORKER) {
             tv->stream_pq_local = SCCalloc(1, sizeof(PacketQueue));
@@ -404,6 +422,12 @@ static void *TmThreadsSlotVar(void *td)
             tv->stream_pq = tv->stream_pq_local;
             tv->tm_flowworker = s;
             SCLogDebug("pre-stream packetqueue %p (local)", tv->stream_pq);
+            tv->flow_queue = FlowQueueNew();
+            if (tv->flow_queue == NULL) {
+                TmThreadsSetFlag(tv, THV_CLOSED | THV_RUNNING_DONE);
+                pthread_exit((void *) -1);
+                return NULL;
+            }
         }
     }
 
@@ -1308,6 +1332,30 @@ again:
             SleepMsec(1);
             goto again;
         }
+        if (tv->flow_queue) {
+            FQLOCK_LOCK(tv->flow_queue);
+            bool fq_done = (tv->flow_queue->qlen == 0);
+            FQLOCK_UNLOCK(tv->flow_queue);
+            if (!fq_done) {
+                SCMutexUnlock(&tv_root_lock);
+
+                    Packet *p = PacketGetFromAlloc();
+                    if (p != NULL) {
+                        //SCLogNotice("flush packet created");
+                        p->flags |= PKT_PSEUDO_STREAM_END;
+                        PKT_SET_SRC(p, PKT_SRC_DETECT_RELOAD_FLUSH);
+                        PacketQueue *q = tv->stream_pq;
+                        SCMutexLock(&q->mutex_q);
+                        PacketEnqueue(q, p);
+                        SCCondSignal(&q->cond_q);
+                        SCMutexUnlock(&q->mutex_q);
+                    }
+
+                /* don't sleep while holding a lock */
+                SleepMsec(1);
+                goto again;
+            }
+        }
         tv = tv->next;
     }
 
@@ -1373,6 +1421,31 @@ again:
                 goto again;
             }
 
+            if (tv->flow_queue) {
+                FQLOCK_LOCK(tv->flow_queue);
+                bool fq_done = (tv->flow_queue->qlen == 0);
+                FQLOCK_UNLOCK(tv->flow_queue);
+                if (!fq_done) {
+                    SCMutexUnlock(&tv_root_lock);
+
+                    Packet *p = PacketGetFromAlloc();
+                    if (p != NULL) {
+                        //SCLogNotice("flush packet created");
+                        p->flags |= PKT_PSEUDO_STREAM_END;
+                        PKT_SET_SRC(p, PKT_SRC_DETECT_RELOAD_FLUSH);
+                        PacketQueue *q = tv->stream_pq;
+                        SCMutexLock(&q->mutex_q);
+                        PacketEnqueue(q, p);
+                        SCCondSignal(&q->cond_q);
+                        SCMutexUnlock(&q->mutex_q);
+                    }
+
+                    /* don't sleep while holding a lock */
+                    SleepMsec(1);
+                    goto again;
+                }
+            }
+
             /* we found a receive TV. Send it a KILL_PKTACQ signal. */
             if (tm && tm->PktAcqBreakLoop != NULL) {
                 tm->PktAcqBreakLoop(tv, SC_ATOMIC_GET(slots->slot_data));
@@ -1555,6 +1628,11 @@ static void TmThreadFree(ThreadVars *tv)
 
     SCLogDebug("Freeing thread '%s'.", tv->name);
 
+    if (tv->flow_queue) {
+        BUG_ON(tv->flow_queue->qlen != 0);
+        SCFree(tv->flow_queue);
+    }
+
     StatsThreadCleanup(tv);
 
     TmThreadDeinitMC(tv);
@@ -2224,3 +2302,24 @@ int TmThreadsInjectPacketsById(Packet **packets, const int id)
     }
     return 1;
 }
+
+/** \brief inject a flow into a threads flow queue
+ */
+void TmThreadsInjectFlowById(Flow *f, const int id)
+{
+    BUG_ON(id <= 0 || id > (int)thread_store.threads_size);
+
+    int idx = id - 1;
+
+    Thread *t = &thread_store.threads[idx];
+    ThreadVars *tv = t->tv;
+
+    BUG_ON(tv == NULL || tv->flow_queue == NULL);
+
+    FlowEnqueue(tv->flow_queue, f);
+
+    /* wake up listening thread(s) if necessary */
+    if (tv->inq != NULL) {
+        SCCondSignal(&tv->inq->pq->cond_q);
+    }
+}
index 278e156bb5e5c923036cafb4352447da0eeab8c7..76e13d9795c931918762ddd067395248e7ddd609 100644 (file)
@@ -239,6 +239,7 @@ void TmThreadsListThreads(void);
 int TmThreadsRegisterThread(ThreadVars *tv, const int type);
 void TmThreadsUnregisterThread(const int id);
 int TmThreadsInjectPacketsById(Packet **, int id);
+void TmThreadsInjectFlowById(Flow *f, const int id);
 
 void TmThreadsInitThreadsTimestamp(const struct timeval *ts);
 void TmThreadsSetThreadTimestamp(const int id, const struct timeval *ts);
index d38740f25472a3afd603a8b8525249c0ced4e811..b654a9f093240c781dfb67f43c1610c349533874 100644 (file)
 
 #include <stdatomic.h>
 
+#define SC_ATOMIC_MEMORY_ORDER_RELAXED memory_order_relaxed
+#define SC_ATOMIC_MEMORY_ORDER_CONSUME memory_order_consume
+#define SC_ATOMIC_MEMORY_ORDER_ACQUIRE memory_order_acquire
+#define SC_ATOMIC_MEMORY_ORDER_RELEASE memory_order_release
+#define SC_ATOMIC_MEMORY_ORDER_ACQ_REL memory_order_acq_rel
+#define SC_ATOMIC_MEMORY_ORDER_SEQ_CST memory_order_seq_cst
+
 /**
  *  \brief wrapper for declaring atomic variables.
  *
 #define SC_ATOMIC_GET(name) \
     atomic_load(&(name ## _sc_atomic__))
 
+#define SC_ATOMIC_LOAD_EXPLICIT(name, order) \
+    atomic_load_explicit(&(name ## _sc_atomic__), (order))
+
 /**
  *  \brief Set the value for the atomic variable.
  *
 
 #else
 
+#define SC_ATOMIC_MEMORY_ORDER_RELAXED
+#define SC_ATOMIC_MEMORY_ORDER_CONSUME
+#define SC_ATOMIC_MEMORY_ORDER_ACQUIRE
+#define SC_ATOMIC_MEMORY_ORDER_RELEASE
+#define SC_ATOMIC_MEMORY_ORDER_ACQ_REL
+#define SC_ATOMIC_MEMORY_ORDER_SEQ_CST
+
 /**
  *  \brief wrapper for OS/compiler specific atomic compare and swap (CAS)
  *         function.
 #define SC_ATOMIC_GET(name) \
     (name ## _sc_atomic__)
 
+#define SC_ATOMIC_LOAD_EXPLICIT(name, order) \
+    (name ## _sc_atomic__)
+
 /**
  *  \brief Set the value for the atomic variable.
  *
index b73b7e4917c1017544a8fb08d3ca9b5fb7fef158..58834bc193dba1530ed138022ed6acacb89a088e 100644 (file)
@@ -177,6 +177,11 @@ tryagain:
     return ret;
 }
 #endif /* BUILD_WITH_UNIXSOCKET */
+static inline void OutputWriteLock(pthread_mutex_t *m)
+{
+    SCMutexLock(m);
+
+}
 
 /**
  * \brief Write buffer to log file.
@@ -219,7 +224,7 @@ static int SCLogFileWriteNoLock(const char *buffer, int buffer_len, LogFileCtx *
  */
 static int SCLogFileWrite(const char *buffer, int buffer_len, LogFileCtx *log_ctx)
 {
-    SCMutexLock(&log_ctx->fp_mutex);
+    OutputWriteLock(&log_ctx->fp_mutex);
     int ret = 0;
 
 #ifdef BUILD_WITH_UNIXSOCKET
index f7a459d3d13c2f04d62514c732b2f931d53397b3..7379af66d8d5b715c85d8a8faea8e23a4c48f407 100644 (file)
@@ -30,6 +30,7 @@
 
 #include "flow-private.h"
 #include "flow-util.h"
+#include "flow-spare-pool.h"
 
 #include "detect.h"
 #include "detect-parse.h"
@@ -929,6 +930,9 @@ end:
 
 uint32_t UTHBuildPacketOfFlows(uint32_t start, uint32_t end, uint8_t dir)
 {
+    FlowLookupStruct fls;
+    memset(&fls, 0, sizeof(fls));
+
     uint32_t i = start;
     uint8_t payload[] = "Payload";
     for (; i < end; i++) {
@@ -940,9 +944,9 @@ uint32_t UTHBuildPacketOfFlows(uint32_t start, uint32_t end, uint8_t dir)
             p->src.addr_data32[0] = i + 1;
             p->dst.addr_data32[0] = i;
         }
-        FlowHandlePacket(NULL, NULL, p);
+        FlowHandlePacket(NULL, &fls, p);
         if (p->flow != NULL) {
-            SC_ATOMIC_RESET(p->flow->use_cnt);
+            p->flow->use_cnt = 0;
             FLOWLOCK_UNLOCK(p->flow);
         }
 
@@ -950,6 +954,14 @@ uint32_t UTHBuildPacketOfFlows(uint32_t start, uint32_t end, uint8_t dir)
         UTHFreePacket(p);
     }
 
+    Flow *f;
+    while ((f = FlowQueuePrivateGetFromTop(&fls.spare_queue))) {
+        FlowFree(f);
+    }
+    while ((f = FlowQueuePrivateGetFromTop(&fls.work_queue))) {
+        FlowFree(f);
+    }
+
     return i;
 }
 
@@ -1103,11 +1115,11 @@ static int UTHBuildPacketOfFlowsTest01(void)
     int result = 0;
 
     FlowInitConfig(FLOW_QUIET);
-    uint32_t flow_spare_q_len = flow_spare_q.len;
+    uint32_t flow_spare_q_len = FlowSpareGetPoolSize();
 
     UTHBuildPacketOfFlows(0, 100, 0);
 
-    if (flow_spare_q.len != flow_spare_q_len - 100)
+    if (FlowSpareGetPoolSize() != flow_spare_q_len - 100)
         result = 0;
     else
         result = 1;