util-cuda.c util-cuda.h \
util-cuda-buffer.c util-cuda-buffer.h \
util-cuda-handlers.c util-cuda-handlers.h \
+util-cuda-vars.c util-cuda-vars.h \
util-daemon.c util-daemon.h \
util-debug.c util-debug.h \
util-debug-filters.c util-debug-filters.h \
#ifdef __SC_CUDA_SUPPORT__
#include "util-cuda-buffer.h"
+#include "util-cuda-vars.h"
#endif /* __SC_CUDA_SUPPORT__ */
typedef enum {
PktProfiling profile;
#endif
#ifdef __SC_CUDA_SUPPORT__
- uint8_t cuda_mpm_enabled;
- uint8_t cuda_done;
- uint16_t cuda_gpu_matches;
- SCMutex cuda_mutex;
- SCCondT cuda_cond;
- uint32_t cuda_results[(UTIL_MPM_CUDA_DATA_BUFFER_SIZE_MAX_LIMIT_DEFAULT * 2) + 1];
+ CudaPacketVars cuda_pkt_vars;
#endif
} Packet;
uint16_t counter_defrag_max_hit;
#ifdef __SC_CUDA_SUPPORT__
- /* cb - CudaBuffer */
- CudaBufferData *cuda_ac_cb;
-
- MpmCtx *mpm_proto_other_ctx;
-
- MpmCtx *mpm_proto_tcp_ctx_ts;
- MpmCtx *mpm_proto_udp_ctx_ts;
-
- MpmCtx *mpm_proto_tcp_ctx_tc;
- MpmCtx *mpm_proto_udp_ctx_tc;
-
- uint16_t data_buffer_size_max_limit;
- uint16_t data_buffer_size_min_limit;
-
- uint8_t mpm_is_cuda;
+ CudaThreadVars cuda_vars;
#endif
} DecodeThreadVars;
PACKET_RESET_CHECKSUMS((p)); \
(p)->pkt = ((uint8_t *)(p)) + sizeof(Packet); \
(p)->livedev = NULL; \
- SCMutexInit(&(p)->cuda_mutex, NULL); \
- SCCondInit(&(p)->cuda_cond, NULL); \
+ SCMutexInit(&(p)->cuda_pkt_vars.cuda_mutex, NULL); \
+ SCCondInit(&(p)->cuda_pkt_vars.cuda_cond, NULL); \
} while (0)
#else
#define PACKET_INITIALIZE(p) { \
SCReturnInt(0);
#ifdef __SC_CUDA_SUPPORT__
- if (p->cuda_mpm_enabled && p->pkt_src == PKT_SRC_WIRE) {
+ if (p->cuda_pkt_vars.cuda_mpm_enabled && p->pkt_src == PKT_SRC_WIRE) {
ret = SCACCudaPacketResultsProcessing(p, mpm_ctx, &det_ctx->pmq);
} else {
ret = mpm_table[mpm_ctx->mpm_type].Search(mpm_ctx,
return 0;
}
-#ifdef __SC_CUDA_SUPPORT__
-
-static void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
-{
- MpmCtx *mpm_ctx = NULL;
-
- int ac_16_tables = 0;
- int ac_32_tables = 0;
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_other_packet, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 0);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
- mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 1);
- if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
- SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
- if (ctx->state_count < 32767)
- ac_16_tables++;
- else
- ac_32_tables++;
- }
-
- if (ac_16_tables > 0 && ac_32_tables > 0)
- SCACConstructBoth16and32StateTables();
-
-
- SCLogDebug("Total mpm ac 16 bit state tables - %d\n", ac_16_tables);
- SCLogDebug("Total mpm ac 32 bit state tables - %d\n", ac_32_tables);
-
-}
-#endif
-
/**
* \brief Convert the signature list into the runtime match structure.
*
}
}
+#ifdef __SC_CUDA_SUPPORT__
+ if (PatternMatchDefaultMatcher() == MPM_AC_CUDA &&
+ strcasecmp(custom_mode, "autofp") != 0) {
+ SCLogError(SC_ERR_RUNMODE, "When using a cuda mpm, the only runmode we "
+ "support is autofp.");
+ exit(EXIT_FAILURE);
+ }
+#endif
+
RunMode *mode = RunModeGetCustomMode(runmode, custom_mode);
if (mode == NULL) {
SCLogError(SC_ERR_RUNMODE, "The custom type \"%s\" doesn't exist "
#include "util-cuda-handlers.h"
#include "detect-engine.h"
#include "detect-engine-mpm.h"
-
-static DetectEngineCtx *cuda_de_ctx = NULL;
+#include "util-cuda-vars.h"
#endif /* __SC_CUDA_SUPPORT__ */
tmm_modules[TMM_DECODEPCAPFILE].flags = TM_FLAG_DECODE_TM;
}
-#ifdef __SC_CUDA_SUPPORT__
-void DecodePcapFileSetCudaDeCtx(DetectEngineCtx *de_ctx)
-{
- cuda_de_ctx = de_ctx;
-
- return;
-}
-#endif
-
void PcapFileCallbackLoop(char *user, struct pcap_pkthdr *h, u_char *pkt) {
SCEnter();
SCReturnInt(TM_ECODE_OK);
}
-#ifdef __SC_CUDA_SUPPORT__
-
-static inline void DecodePcapFileBufferPacket(DecodeThreadVars *dtv, Packet *p)
-{
- if (p->cuda_mpm_enabled) {
- while (!p->cuda_done) {
- SCMutexLock(&p->cuda_mutex);
- if (p->cuda_done) {
- SCMutexUnlock(&p->cuda_mutex);
- break;
- } else {
- SCCondWait(&p->cuda_cond, &p->cuda_mutex);
- SCMutexUnlock(&p->cuda_mutex);
- }
- }
- }
- p->cuda_done = 0;
-
- if (p->payload_len == 0 ||
- (p->flags & (PKT_NOPAYLOAD_INSPECTION & PKT_NOPACKET_INSPECTION)) ||
- (p->flags & PKT_ALLOC) ||
- (dtv->data_buffer_size_min_limit != 0 && p->payload_len < dtv->data_buffer_size_min_limit) ||
- (p->payload_len > dtv->data_buffer_size_max_limit && dtv->data_buffer_size_max_limit != 0) ) {
- p->cuda_mpm_enabled = 0;
- return;
- }
-
- MpmCtx *mpm_ctx = NULL;
- if (p->proto == IPPROTO_TCP) {
- if (p->flowflags & FLOW_PKT_TOSERVER)
- mpm_ctx = dtv->mpm_proto_tcp_ctx_ts;
- else
- mpm_ctx = dtv->mpm_proto_tcp_ctx_tc;
- } else if (p->proto == IPPROTO_UDP) {
- if (p->flowflags & FLOW_PKT_TOSERVER)
- mpm_ctx = dtv->mpm_proto_udp_ctx_ts;
- else
- mpm_ctx = dtv->mpm_proto_udp_ctx_tc;
- } else {
- mpm_ctx = dtv->mpm_proto_other_ctx;
- }
- if (mpm_ctx == NULL || mpm_ctx->pattern_cnt == 0) {
- p->cuda_mpm_enabled = 0;
- return;
- }
-
-#if __WORDSIZE==64
- CudaBufferSlice *slice = CudaBufferGetSlice(dtv->cuda_ac_cb,
- p->payload_len + sizeof(uint64_t) + sizeof(CUdeviceptr),
- (void *)p);
- if (slice == NULL) {
- SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
- "this to dev.");
- p->cuda_mpm_enabled = 0;
- return;
- }
- *((uint64_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
- *((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint64_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
- memcpy(slice->buffer + slice->start_offset + sizeof(uint64_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
-#else
- CudaBufferSlice *slice = CudaBufferGetSlice(dtv->cuda_ac_cb,
- p->payload_len + sizeof(uint32_t) + sizeof(CUdeviceptr),
- (void *)p);
- if (slice == NULL) {
- SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
- "this to dev.");
- p->cuda_mpm_enabled = 0;
- return;
- }
- *((uint32_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
- *((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint32_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
- memcpy(slice->buffer + slice->start_offset + sizeof(uint32_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
-#endif
- p->cuda_mpm_enabled = 1;
- SC_ATOMIC_SET(slice->done, 1);
-
- SCLogDebug("cuda ac buffering packet %p, payload_len - %"PRIu16" and deviceptr - %"PRIu64"\n",
- p, p->payload_len, (unsigned long)((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda);
-
- return;
-}
-
-#endif /* __SC_CUDA_SUPPORT__ */
-
double prev_signaled_ts = 0;
TmEcode DecodePcapFile(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
TimeSet(&p->ts);
/* call the decoder */
-
pcap_g.Decoder(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
#ifdef DEBUG
#endif
#ifdef __SC_CUDA_SUPPORT__
- if (dtv->mpm_is_cuda)
- DecodePcapFileBufferPacket(dtv, p);
+ if (dtv->cuda_vars.mpm_is_cuda)
+ CudaBufferPacket(&dtv->cuda_vars, p);
#endif
SCReturnInt(TM_ECODE_OK);
}
-#ifdef __SC_CUDA_SUPPORT__
-
-static int DecodePcapFileThreadInitCuda(DecodeThreadVars *dtv)
-{
- if (PatternMatchDefaultMatcher() != MPM_AC_CUDA)
- return 0;
-
- MpmCudaConf *conf = CudaHandlerGetCudaProfile("mpm");
- if (conf == NULL) {
- SCLogError(SC_ERR_AC_CUDA_ERROR, "Error obtaining cuda mpm profile.");
- return -1;
- }
-
- dtv->mpm_is_cuda = 1;
- dtv->cuda_ac_cb = CudaHandlerModuleGetData(MPM_AC_CUDA_MODULE_NAME, MPM_AC_CUDA_MODULE_CUDA_BUFFER_NAME);
- dtv->data_buffer_size_max_limit = conf->data_buffer_size_max_limit;
- dtv->data_buffer_size_min_limit = conf->data_buffer_size_min_limit;
- dtv->mpm_proto_tcp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
- dtv->mpm_proto_tcp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
- dtv->mpm_proto_udp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 0);
- dtv->mpm_proto_udp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 1);
- dtv->mpm_proto_other_ctx = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_other_packet, 0);
-
- return 0;
-}
-
-#endif /* __SC_CUDA_SUPPORT__ */
-
TmEcode DecodePcapFileThreadInit(ThreadVars *tv, void *initdata, void **data)
{
SCEnter();
DecodeRegisterPerfCounters(dtv, tv);
#ifdef __SC_CUDA_SUPPORT__
- if (DecodePcapFileThreadInitCuda(dtv) < 0)
+ if (CudaThreadVarsInit(&dtv->cuda_vars) < 0)
SCReturnInt(TM_ECODE_FAILED);
#endif
void TmModuleReceivePcapFileRegister (void);
void TmModuleDecodePcapFileRegister (void);
-#ifdef __SC_CUDA_SUPPORT__
-void DecodePcapFileSetCudaDeCtx(DetectEngineCtx *de_ctx);
-#endif
#endif /* __SOURCE_PCAP_FILE_H__ */
}
#ifdef __SC_CUDA_SUPPORT__
if (PatternMatchDefaultMatcher() == MPM_AC_CUDA)
- DecodePcapFileSetCudaDeCtx(de_ctx);
+ CudaVarsSetDeCtx(de_ctx);
#endif /* __SC_CUDA_SUPPORT__ */
SCClassConfLoadClassficationConfigFile(de_ctx);
if (cb_data->d_buffer_write < cb_data->d_buffer_read) {
if (cb_data->d_buffer_write + len >= cb_data->d_buffer_read) {
- SCLogInfo("d_buffer full");
+ SCLogDebug("d_buffer full");
SCMutexUnlock(&cb_data->m);
SCMutexLock(&slice_pool_mutex);
}
} else {
if (cb_data->d_buffer_write + len > cb_data->d_buffer_len) {
- SCLogInfo("d_buffer limit hit - buffer_len - %"PRIu32,
+ SCLogDebug("d_buffer limit hit - buffer_len - %"PRIu32,
cb_data->d_buffer_len);
SCMutexUnlock(&cb_data->m);
if (cb_data->op_buffer_write < cb_data->op_buffer_read) {
if (cb_data->op_buffer_write + 1 >= cb_data->op_buffer_read) {
- SCLogInfo("op_buffer full");
+ SCLogDebug("op_buffer full");
SCMutexUnlock(&cb_data->m);
SCMutexLock(&slice_pool_mutex);
}
} else {
if (cb_data->op_buffer_write + 1 > cb_data->op_buffer_len) {
- SCLogInfo("op_buffer limit hit - buffer_len - %"PRIu32,
+ SCLogDebug("op_buffer limit hit - buffer_len - %"PRIu32,
cb_data->op_buffer_len);
SCMutexUnlock(&cb_data->m);
int CudaBufferTest03(void)
{
- CudaBufferSlice *slice1, *slice2, *slice3, *slice_temp;
+ CudaBufferSlice *slice, *slice_temp;
int result = 0;
uint8_t *d_buffer = SCMalloc(sizeof(uint8_t) * 64);
goto end;
}
- slice1 = CudaBufferGetSlice(data, 16, NULL);
- slice2 = CudaBufferGetSlice(data, 16, NULL);
- slice3 = CudaBufferGetSlice(data, 24, NULL);
+ slice = CudaBufferGetSlice(data, 16, NULL);
+ slice = CudaBufferGetSlice(data, 16, NULL);
+ slice = CudaBufferGetSlice(data, 24, NULL);
/* culling */
CudaBufferCulledInfo culled_info;
--- /dev/null
+/* Copyright (C) 2007-2010 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Anoop Saldanha <anoopsaldanha@gmail.com>
+ */
+
+#ifdef __SC_CUDA_SUPPORT__
+
+#include "suricata.h"
+#include "util-mpm.h"
+#include "util-cuda-handlers.h"
+#include "util-cuda-vars.h"
+#include "detect-engine-mpm.h"
+#include "util-debug.h"
+#include "util-mpm-ac.h"
+
+static DetectEngineCtx *cuda_de_ctx = NULL;
+
+void CudaVarsSetDeCtx(DetectEngineCtx *de_ctx)
+{
+ if (cuda_de_ctx != NULL) {
+ SCLogError(SC_ERR_FATAL, "CudaVarsSetDeCtx() called more than once. "
+ "This function should be called only once during the "
+ "lifetime of the engine.");
+ exit(EXIT_FAILURE);
+ }
+
+ cuda_de_ctx = de_ctx;
+
+ return;
+}
+
+int CudaThreadVarsInit(CudaThreadVars *ctv)
+{
+ if (PatternMatchDefaultMatcher() != MPM_AC_CUDA)
+ return 0;
+
+ MpmCudaConf *conf = CudaHandlerGetCudaProfile("mpm");
+ if (conf == NULL) {
+ SCLogError(SC_ERR_AC_CUDA_ERROR, "Error obtaining cuda mpm profile.");
+ return -1;
+ }
+
+ ctv->mpm_is_cuda = 1;
+ ctv->cuda_ac_cb = CudaHandlerModuleGetData(MPM_AC_CUDA_MODULE_NAME, MPM_AC_CUDA_MODULE_CUDA_BUFFER_NAME);
+ ctv->data_buffer_size_max_limit = conf->data_buffer_size_max_limit;
+ ctv->data_buffer_size_min_limit = conf->data_buffer_size_min_limit;
+ ctv->mpm_proto_tcp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
+ ctv->mpm_proto_tcp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
+ ctv->mpm_proto_udp_ctx_ts = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 0);
+ ctv->mpm_proto_udp_ctx_tc = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_udp_packet, 1);
+ ctv->mpm_proto_other_ctx = MpmFactoryGetMpmCtxForProfile(cuda_de_ctx, cuda_de_ctx->sgh_mpm_context_proto_other_packet, 0);
+
+ return 0;
+}
+
+#endif
--- /dev/null
+/* Copyright (C) 2007-2010 Open Information Security Foundation
+ *
+ * You can copy, redistribute or modify this Program under the terms of
+ * the GNU General Public License version 2 as published by the Free
+ * Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * version 2 along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+/**
+ * \file
+ *
+ * \author Anoop Saldanha <anoopsaldanha@gmail.com>
+ */
+
+#ifdef __SC_CUDA_SUPPORT__
+
+#ifndef __UTIL_CUDA_VARS__H__
+#define __UTIL_CUDA_VARS__H__
+
+#include "util-cuda-buffer.h"
+#include "util-mpm.h"
+#include "threads.h"
+
+typedef struct CudaThreadVars_ {
+ /* cb - CudaBuffer */
+ CudaBufferData *cuda_ac_cb;
+
+ MpmCtx *mpm_proto_other_ctx;
+
+ MpmCtx *mpm_proto_tcp_ctx_ts;
+ MpmCtx *mpm_proto_udp_ctx_ts;
+
+ MpmCtx *mpm_proto_tcp_ctx_tc;
+ MpmCtx *mpm_proto_udp_ctx_tc;
+
+ uint16_t data_buffer_size_max_limit;
+ uint16_t data_buffer_size_min_limit;
+
+ uint8_t mpm_is_cuda;
+} CudaThreadVars;
+
+typedef struct CudaPacketVars_ {
+ uint8_t cuda_mpm_enabled;
+ uint8_t cuda_done;
+ uint16_t cuda_gpu_matches;
+ SCMutex cuda_mutex;
+ SCCondT cuda_cond;
+ uint32_t cuda_results[(UTIL_MPM_CUDA_DATA_BUFFER_SIZE_MAX_LIMIT_DEFAULT * 2) + 1];
+} CudaPacketVars;
+
+void CudaVarsSetDeCtx(struct DetectEngineCtx_ *de_ctx);
+int CudaThreadVarsInit(CudaThreadVars *ctv);
+
+#endif /* __UTIL_CUDA_VARS__H__ */
+
+#endif /* __SC_CUDA_SUPPORT__ */
/****************************Cuda side of things****************************/
#ifdef __SC_CUDA_SUPPORT__
+
+/* \todo Technically it's generic to all mpms, but since we use ac only, the
+ * code internally directly references ac and hence it has found its
+ * home in this file, instead of util-mpm.c
+ */
+void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx)
+{
+ MpmCtx *mpm_ctx = NULL;
+
+ int ac_16_tables = 0;
+ int ac_32_tables = 0;
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_tcp_packet, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_udp_packet, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_proto_other_packet, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_uri, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcbd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hhd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrhd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hmd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hcd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hrud, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_stream, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hsmd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_hscd, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 0);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+ mpm_ctx = MpmFactoryGetMpmCtxForProfile(de_ctx, de_ctx->sgh_mpm_context_huad, 1);
+ if (mpm_ctx->mpm_type == MPM_AC_CUDA) {
+ SCACCtx *ctx = (SCACCtx *)mpm_ctx->ctx;
+ if (ctx->state_count < 32767)
+ ac_16_tables++;
+ else
+ ac_32_tables++;
+ }
+
+ if (ac_16_tables > 0 && ac_32_tables > 0)
+ SCACConstructBoth16and32StateTables();
+
+
+ SCLogDebug("Total mpm ac 16 bit state tables - %d\n", ac_16_tables);
+ SCLogDebug("Total mpm ac 32 bit state tables - %d\n", ac_32_tables);
+
+}
+
/* \todos
* - Use texture memory - Can we fit all the arrays into a 3d texture.
* Texture memory definitely offers slightly better performance even
for (uint32_t i = 0; i < no_of_items; i++, i_op_start_offset++) {
Packet *p = (Packet *)cb_data->p_buffer[i_op_start_offset];
- p->cuda_gpu_matches =
+ p->cuda_pkt_vars.cuda_gpu_matches =
cuda_results_buffer_h[((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2)];
- if (p->cuda_gpu_matches != 0) {
- memcpy(p->cuda_results,
+ if (p->cuda_pkt_vars.cuda_gpu_matches != 0) {
+ memcpy(p->cuda_pkt_vars.cuda_results,
cuda_results_buffer_h +
((o_buffer[i_op_start_offset] - d_buffer_start_offset) * 2),
(cuda_results_buffer_h[((o_buffer[i_op_start_offset] -
d_buffer_start_offset) * 2)] * sizeof(uint32_t)) + 4);
}
- SCMutexLock(&p->cuda_mutex);
- p->cuda_done = 1;
- SCMutexUnlock(&p->cuda_mutex);
- SCCondSignal(&p->cuda_cond);
+ SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+ p->cuda_pkt_vars.cuda_done = 1;
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+ SCCondSignal(&p->cuda_pkt_vars.cuda_cond);
}
if (no_of_items != 0)
CudaBufferReportCulledConsumption(cb_data, &cb_culled_info);
{
uint32_t u = 0;
- while (!p->cuda_done) {
- SCMutexLock(&p->cuda_mutex);
- if (p->cuda_done) {
- SCMutexUnlock(&p->cuda_mutex);
+ while (!p->cuda_pkt_vars.cuda_done) {
+ SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+ if (p->cuda_pkt_vars.cuda_done) {
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
break;
} else {
- SCCondWait(&p->cuda_cond, &p->cuda_mutex);
- SCMutexUnlock(&p->cuda_mutex);
+ SCCondWait(&p->cuda_pkt_vars.cuda_cond, &p->cuda_pkt_vars.cuda_mutex);
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
}
} /* while */
- p->cuda_done = 0;
- p->cuda_mpm_enabled = 0;
+ p->cuda_pkt_vars.cuda_done = 0;
+ p->cuda_pkt_vars.cuda_mpm_enabled = 0;
- uint32_t cuda_matches = p->cuda_gpu_matches;
+ uint32_t cuda_matches = p->cuda_pkt_vars.cuda_gpu_matches;
if (cuda_matches == 0)
return 0;
uint32_t matches = 0;
- uint32_t *results = p->cuda_results + 1;
+ uint32_t *results = p->cuda_pkt_vars.cuda_results + 1;
uint8_t *buf = p->payload;
SCACCtx *ctx = mpm_ctx->ctx;
SCACOutputTable *output_table = ctx->output_table;
#ifdef __SC_CUDA_SUPPORT__
#include "util-cuda.h"
+#include "util-cuda-vars.h"
+#include "decode.h"
#endif /* __SC_CUDA_SUPPORT__ */
typedef struct SCACPattern_ {
#define MPM_AC_CUDA_MODULE_NAME "ac_cuda"
#define MPM_AC_CUDA_MODULE_CUDA_BUFFER_NAME "ac_cuda_cb"
+static inline void CudaBufferPacket(CudaThreadVars *ctv, Packet *p)
+{
+ if (p->cuda_pkt_vars.cuda_mpm_enabled) {
+ while (!p->cuda_pkt_vars.cuda_done) {
+ SCMutexLock(&p->cuda_pkt_vars.cuda_mutex);
+ if (p->cuda_pkt_vars.cuda_done) {
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+ break;
+ } else {
+ SCCondWait(&p->cuda_pkt_vars.cuda_cond, &p->cuda_pkt_vars.cuda_mutex);
+ SCMutexUnlock(&p->cuda_pkt_vars.cuda_mutex);
+ }
+ }
+ }
+ p->cuda_pkt_vars.cuda_done = 0;
+
+ if (p->payload_len == 0 ||
+ (p->flags & (PKT_NOPAYLOAD_INSPECTION & PKT_NOPACKET_INSPECTION)) ||
+ (p->flags & PKT_ALLOC) ||
+ (ctv->data_buffer_size_min_limit != 0 && p->payload_len < ctv->data_buffer_size_min_limit) ||
+ (p->payload_len > ctv->data_buffer_size_max_limit && ctv->data_buffer_size_max_limit != 0) ) {
+ p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+ return;
+ }
+
+ MpmCtx *mpm_ctx = NULL;
+ if (p->proto == IPPROTO_TCP) {
+ if (p->flowflags & FLOW_PKT_TOSERVER)
+ mpm_ctx = ctv->mpm_proto_tcp_ctx_ts;
+ else
+ mpm_ctx = ctv->mpm_proto_tcp_ctx_tc;
+ } else if (p->proto == IPPROTO_UDP) {
+ if (p->flowflags & FLOW_PKT_TOSERVER)
+ mpm_ctx = ctv->mpm_proto_udp_ctx_ts;
+ else
+ mpm_ctx = ctv->mpm_proto_udp_ctx_tc;
+ } else {
+ mpm_ctx = ctv->mpm_proto_other_ctx;
+ }
+ if (mpm_ctx == NULL || mpm_ctx->pattern_cnt == 0) {
+ p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+ return;
+ }
+
+#if __WORDSIZE==64
+ CudaBufferSlice *slice = CudaBufferGetSlice(ctv->cuda_ac_cb,
+ p->payload_len + sizeof(uint64_t) + sizeof(CUdeviceptr),
+ (void *)p);
+ if (slice == NULL) {
+ SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
+ "this to dev.");
+ p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+ return;
+ }
+ *((uint64_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
+ *((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint64_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
+ memcpy(slice->buffer + slice->start_offset + sizeof(uint64_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
+#else
+ CudaBufferSlice *slice = CudaBufferGetSlice(ctv->cuda_ac_cb,
+ p->payload_len + sizeof(uint32_t) + sizeof(CUdeviceptr),
+ (void *)p);
+ if (slice == NULL) {
+ SCLogError(SC_ERR_FATAL, "Error retrieving slice. Please report "
+ "this to dev.");
+ p->cuda_pkt_vars.cuda_mpm_enabled = 0;
+ return;
+ }
+ *((uint32_t *)(slice->buffer + slice->start_offset)) = p->payload_len;
+ *((CUdeviceptr *)(slice->buffer + slice->start_offset + sizeof(uint32_t))) = ((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda;
+ memcpy(slice->buffer + slice->start_offset + sizeof(uint32_t) + sizeof(CUdeviceptr), p->payload, p->payload_len);
+#endif
+ p->cuda_pkt_vars.cuda_mpm_enabled = 1;
+ SC_ATOMIC_SET(slice->done, 1);
+
+ SCLogDebug("cuda ac buffering packet %p, payload_len - %"PRIu16" and deviceptr - %"PRIu64"\n",
+ p, p->payload_len, (unsigned long)((SCACCtx *)(mpm_ctx->ctx))->state_table_u32_cuda);
+
+ return;
+}
void MpmACCudaRegister(void);
void SCACConstructBoth16and32StateTables(void);
void SCACCudaKillDispatcher(void);
uint32_t SCACCudaPacketResultsProcessing(Packet *p, MpmCtx *mpm_ctx,
PatternMatcherQueue *pmq);
+void DetermineCudaStateTableSize(DetectEngineCtx *de_ctx);
#endif /* __SC_CUDA_SUPPORT__ */
# conservative 1024. A higher number will make sure CPU's/CPU cores will be
# more easily kept busy, but may negatively impact caching.
#
-# If you are using the CUDA pattern matcher (b2g_cuda below), different rules
-# apply. In that case try something like 4000 or more. This is because the CUDA
-# pattern matcher scans many packets in parallel.
+# If you are using the CUDA pattern matcher (mpm-algo: ac-cuda), different rules
+# apply. In that case try something like 60000 or more. This is because the CUDA
+# pattern matcher buffers and scans as many packets as possible in parallel.
#max-pending-packets: 1024
# Runmode the engine should use. Please check --list-runmodes to get the available