From: Anoop Saldanha Date: Fri, 25 Jul 2014 07:50:28 +0000 (+0530) Subject: CUDA: Update the inspection engine to inform the cuda module that it X-Git-Tag: suricata-2.1beta2~75 X-Git-Url: http://git.ipfire.org/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F1171%2Fhead;p=thirdparty%2Fsuricata.git CUDA: Update the inspection engine to inform the cuda module that it doesn't need the gpu results and to release the packet for the next run. Previously the inspection engine wouldn't inform the cuda module, if it didn't need the results. As a consequence, when the packet is next taken for re-use, and if the packet is still being processed by the cuda module, the engine would wait till the cuda module frees the packet. This commits updates this functionality to inform the cuda module to release the packet for the afore-mentioned case. --- diff --git a/src/detect.c b/src/detect.c index e8950a9707..f4f0d54e3d 100644 --- a/src/detect.c +++ b/src/detect.c @@ -1572,6 +1572,10 @@ next: PACKET_PROFILING_DETECT_END(p, PROF_DETECT_RULES); end: +#ifdef __SC_CUDA_SUPPORT__ + CudaReleasePacket(p); +#endif + /* see if we need to increment the inspect_id and reset the de_state */ if (has_state && AppLayerParserProtocolSupportsTxs(p->proto, alproto)) { PACKET_PROFILING_DETECT_START(p, PROF_DETECT_STATEFUL); diff --git a/src/util-mpm-ac.c b/src/util-mpm-ac.c index 6271f2fb70..f8e2199a39 100644 --- a/src/util-mpm-ac.c +++ b/src/util-mpm-ac.c @@ -1691,6 +1691,18 @@ void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx) } +void CudaReleasePacket(Packet *p) +{ + if (p->cuda_pkt_vars.cuda_mpm_enabled == 1) { + p->cuda_pkt_vars.cuda_mpm_enabled = 0; + SCMutexLock(&p->cuda_pkt_vars.cuda_mutex); + p->cuda_pkt_vars.cuda_done = 0; + SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex); + } + + return; +} + /* \todos * - Use texture memory - Can we fit all the arrays into a 3d texture. * Texture memory definitely offers slightly better performance even @@ -1890,6 +1902,13 @@ static void *SCACCudaDispatcher(void *arg) for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) { Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset]; + SCMutexLock(&p->cuda_pkt_vars.cuda_mutex); + if (p->cuda_pkt_vars.cuda_mpm_enabled == 0) { + p->cuda_pkt_vars.cuda_done = 0; + SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex); + continue; + } + p->cuda_pkt_vars.cuda_gpu_matches = cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)]; if (p->cuda_pkt_vars.cuda_gpu_matches != 0) { @@ -1900,7 +1919,6 @@ static void *SCACCudaDispatcher(void *arg) d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4); } - SCMutexLock(&p->cuda_pkt_vars.cuda_mutex); p->cuda_pkt_vars.cuda_done = 1; SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex); SCCondSignal(&p->cuda_pkt_vars.cuda_cond); diff --git a/src/util-mpm-ac.h b/src/util-mpm-ac.h index 7b6b83ecf2..760fb70495 100644 --- a/src/util-mpm-ac.h +++ b/src/util-mpm-ac.h @@ -205,6 +205,8 @@ uint32_t SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx, PatternMatcherQueue *pmq); void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx); +void CudaReleasePacket(Packet *p); + #endif /* __SC_CUDA_SUPPORT__ */