]> git.ipfire.org Git - thirdparty/suricata.git/commitdiff
CUDA: Update the inspection engine to inform the cuda module that it 1171/head
authorAnoop Saldanha <anoopsaldanha@gmail.com>
Fri, 25 Jul 2014 07:50:28 +0000 (13:20 +0530)
committerVictor Julien <victor@inliniac.net>
Mon, 20 Oct 2014 06:25:15 +0000 (08:25 +0200)
doesn't need the gpu results and to release the packet for the next run.

Previously the inspection engine wouldn't inform the cuda module, if it
didn't need the results.  As a consequence, when the packet is next taken
for re-use, and if the packet is still being processed by the cuda module,
the engine would wait till the cuda module frees the packet.

This commits updates this functionality to inform the cuda module to
release the packet for the afore-mentioned case.

src/detect.c
src/util-mpm-ac.c
src/util-mpm-ac.h

index e8950a9707508274bf3863ea3b494bc3fb5097b2..f4f0d54e3daa2608ab1f61379158aa9325db1e8f 100644 (file)
@@ -1572,6 +1572,10 @@ next:
     PACKET_PROFILING_DETECT_END(p, PROF_DETECT_RULES);
 
 end:
+#ifdef __SC_CUDA_SUPPORT__
+    CudaReleasePacket(p);
+#endif
+
     /* see if we need to increment the inspect_id and reset the de_state */
     if (has_state && AppLayerParserProtocolSupportsTxs(p->proto, alproto)) {
         PACKET_PROFILING_DETECT_START(p, PROF_DETECT_STATEFUL);
index 6271f2fb706990701005d7e4582ec068754f8f79..f8e2199a3961f16c957025c6f62c51fe48951e92 100644 (file)
@@ -1691,6 +1691,18 @@ void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
 
 }
 
+void CudaReleasePacket(Packet *p)
+{
+    if (p->cuda_pkt_vars.cuda_mpm_enabled == 1) {
+        p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+        SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+        p->cuda_pkt_vars.cuda_done = 0;
+        SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+    }
+
+    return;
+}
+
 /* \todos
  * - Use texture memory - Can we fit all the arrays into a 3d texture.
  *   Texture memory definitely offers slightly better performance even
@@ -1890,6 +1902,13 @@ static void *SCACCudaDispatcher(void *arg)
         for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) {
             Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset];
 
+            SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+            if (p->cuda_pkt_vars.cuda_mpm_enabled == 0) {
+                p->cuda_pkt_vars.cuda_done = 0;
+                SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+                continue;
+            }
+
             p->cuda_pkt_vars.cuda_gpu_matches =
                 cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)];
             if (p->cuda_pkt_vars.cuda_gpu_matches != 0) {
@@ -1900,7 +1919,6 @@ static void *SCACCudaDispatcher(void *arg)
                                                 d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4);
             }
 
-            SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
             p->cuda_pkt_vars.cuda_done = 1;
             SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
             SCCondSignal(&p->cuda_pkt_vars.cuda_cond);
index 7b6b83ecf25e5e229ddaf8f8b48a41d710d8afe8..760fb70495c15749caafabf40e27236365ee1a1c 100644 (file)
@@ -205,6 +205,8 @@ uint32_t  SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx,
                                           PatternMatcherQueue *pmq);
 void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx);
 
+void CudaReleasePacket(Packet *p);
+
 #endif /* __SC_CUDA_SUPPORT__ */