PACKET_PROFILING_DETECT_END(p, PROF_DETECT_RULES);
end:
+#ifdef __SC_CUDA_SUPPORT__
+ CudaReleasePacket(p);
+#endif
+
/* see if we need to increment the inspect_id and reset the de_state */
if (has_state && AppLayerParserProtocolSupportsTxs(p->proto, alproto)) {
PACKET_PROFILING_DETECT_START(p, PROF_DETECT_STATEFUL);
}
+void CudaReleasePacket(Packet *p)
+{
+ if (p->cuda_pkt_vars.cuda_mpm_enabled == 1) {
+ p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+ SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+ p->cuda_pkt_vars.cuda_done = 0;
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+ }
+
+ return;
+}
+
/* \todos
* - Use texture memory - Can we fit all the arrays into a 3d texture.
* Texture memory definitely offers slightly better performance even
for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) {
Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset];
+ SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+ if (p->cuda_pkt_vars.cuda_mpm_enabled == 0) {
+ p->cuda_pkt_vars.cuda_done = 0;
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+ continue;
+ }
+
p->cuda_pkt_vars.cuda_gpu_matches =
cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)];
if (p->cuda_pkt_vars.cuda_gpu_matches != 0) {
d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4);
}
- SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
p->cuda_pkt_vars.cuda_done = 1;
SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
SCCondSignal(&p->cuda_pkt_vars.cuda_cond);
PatternMatcherQueue *pmq);
void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx);
+void CudaReleasePacket(Packet *p);
+
#endif /* __SC_CUDA_SUPPORT__ */