From: Anoop Saldanha <anoopsaldanha@gmail.com>
Date: Fri, 25 Jul 2014 07:50:28 +0000 (+0530)
Subject: CUDA: Update the inspection engine to inform the cuda module that it
X-Git-Tag: suricata-2.1beta2~75
X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F1171%2Fhead;p=thirdparty%2Fsuricata.git

CUDA: Update the inspection engine to inform the cuda module that it
doesn't need the gpu results and to release the packet for the next run.

Previously the inspection engine wouldn't inform the cuda module, if it
didn't need the results.  As a consequence, when the packet is next taken
for re-use, and if the packet is still being processed by the cuda module,
the engine would wait till the cuda module frees the packet.

This commits updates this functionality to inform the cuda module to
release the packet for the afore-mentioned case.
---

diff --git a/src/detect.c b/src/detect.c
index e8950a9707..f4f0d54e3d 100644
--- a/src/detect.c
+++ b/src/detect.c
@@ -1572,6 +1572,10 @@ next:
     PACKET_PROFILING_DETECT_END(p, PROF_DETECT_RULES);
 
 end:
+#ifdef __SC_CUDA_SUPPORT__
+    CudaReleasePacket(p);
+#endif
+
     /* see if we need to increment the inspect_id and reset the de_state */
     if (has_state && AppLayerParserProtocolSupportsTxs(p->proto, alproto)) {
         PACKET_PROFILING_DETECT_START(p, PROF_DETECT_STATEFUL);
diff --git a/src/util-mpm-ac.c b/src/util-mpm-ac.c
index 6271f2fb70..f8e2199a39 100644
--- a/src/util-mpm-ac.c
+++ b/src/util-mpm-ac.c
@@ -1691,6 +1691,18 @@ void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
 
 }
 
+void CudaReleasePacket(Packet *p)
+{
+    if (p->cuda_pkt_vars.cuda_mpm_enabled == 1) {
+        p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+        SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+        p->cuda_pkt_vars.cuda_done = 0;
+        SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+    }
+
+    return;
+}
+
 /* \todos
  * - Use texture memory - Can we fit all the arrays into a 3d texture.
  *   Texture memory definitely offers slightly better performance even
@@ -1890,6 +1902,13 @@ static void *SCACCudaDispatcher(void *arg)
         for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) {
             Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset];
 
+            SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+            if (p->cuda_pkt_vars.cuda_mpm_enabled == 0) {
+                p->cuda_pkt_vars.cuda_done = 0;
+                SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+                continue;
+            }
+
             p->cuda_pkt_vars.cuda_gpu_matches =
                 cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)];
             if (p->cuda_pkt_vars.cuda_gpu_matches != 0) {
@@ -1900,7 +1919,6 @@ static void *SCACCudaDispatcher(void *arg)
                                                 d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4);
             }
 
-            SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
             p->cuda_pkt_vars.cuda_done = 1;
             SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
             SCCondSignal(&p->cuda_pkt_vars.cuda_cond);
diff --git a/src/util-mpm-ac.h b/src/util-mpm-ac.h
index 7b6b83ecf2..760fb70495 100644
--- a/src/util-mpm-ac.h
+++ b/src/util-mpm-ac.h
@@ -205,6 +205,8 @@ uint32_t  SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx,
                                           PatternMatcherQueue *pmq);
 void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx);
 
+void CudaReleasePacket(Packet *p);
+
 #endif /* __SC_CUDA_SUPPORT__ */