From: Jeff Lucovsky Date: Fri, 20 Aug 2021 13:10:26 +0000 (-0400) Subject: netmap: V14 API changes X-Git-Tag: suricata-7.0.0-beta1~1173 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=refs%2Fpull%2F6629%2Fhead;p=thirdparty%2Fsuricata.git netmap: V14 API changes This commit modifies the Netmap packet handling to use API version 14. @bmeeks8 contributed many changes instrumental to this effort. --- diff --git a/src/runmode-netmap.c b/src/runmode-netmap.c index e3ee89938a..699f4fbb10 100644 --- a/src/runmode-netmap.c +++ b/src/runmode-netmap.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2018 Open Information Security Foundation +/* Copyright (C) 2014-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -22,35 +22,27 @@ */ /** -* \file -* -* \author Aleksey Katargin -* -* Netmap runmode -* -*/ + * \file + * + * \author Aleksey Katargin + * \author Bill Meeks + * + * Netmap runmode + * + */ #include "suricata-common.h" -#include "tm-threads.h" -#include "conf.h" #include "runmodes.h" #include "runmode-netmap.h" -#include "output.h" -#include "log-httplog.h" -#include "detect-engine-mpm.h" - -#include "alert-fastlog.h" -#include "alert-debuglog.h" - -#include "util-debug.h" -#include "util-time.h" -#include "util-cpu.h" -#include "util-affinity.h" -#include "util-device.h" #include "util-runmodes.h" #include "util-ioctl.h" #include "util-byte.h" +#ifdef HAVE_NETMAP +#define NETMAP_WITH_LIBS +#include +#endif /* HAVE_NETMAP */ + #include "source-netmap.h" extern int max_pending_packets; @@ -70,7 +62,7 @@ void RunModeIdsNetmapRegister(void) " tasks from acquisition to logging", RunModeIdsNetmapWorkers); RunModeRegisterNewRunMode(RUNMODE_NETMAP, "autofp", - "Multi threaded netmap mode. Packets from " + "Multi-threaded netmap mode. Packets from " "each flow are assigned to a single detect " "thread.", RunModeIdsNetmapAutoFp); @@ -110,6 +102,14 @@ static int ParseNetmapSettings(NetmapIfaceSettings *ns, const char *iface, } } + /* we will need the base interface name for later */ + char base_name[IFNAMSIZ]; + strlcpy(base_name, ns->iface, sizeof(base_name)); + if (strlen(base_name) > 0 && + (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) { + base_name[strlen(base_name) - 1] = '\0'; + } + /* prefixed with netmap or vale means it's not a real interface * and we don't check offloading. */ if (strncmp(ns->iface, "netmap:", 7) != 0 && @@ -207,15 +207,13 @@ finalize: ns->ips = (ns->copy_mode != NETMAP_COPY_MODE_NONE); - if (ns->sw_ring) { - /* just one thread per interface supported */ - ns->threads = 1; - } else if (ns->threads_auto) { + if (ns->threads_auto) { /* As NetmapGetRSSCount used to be broken on Linux, * fall back to GetIfaceRSSQueuesNum if needed. */ ns->threads = NetmapGetRSSCount(ns->iface); if (ns->threads == 0) { - ns->threads = GetIfaceRSSQueuesNum(ns->iface); + /* need to use base_name of interface here */ + ns->threads = GetIfaceRSSQueuesNum(base_name); } } if (ns->threads <= 0) { @@ -226,15 +224,15 @@ finalize: } /** -* \brief extract information from config file -* -* The returned structure will be freed by the thread init function. -* This is thus necessary to or copy the structure before giving it -* to thread or to reparse the file for each thread (and thus have -* new structure. -* -* \return a NetmapIfaceConfig corresponding to the interface name -*/ + * \brief extract information from config file + * + * The returned structure will be freed by the thread init function. + * This is thus necessary to copy the structure before giving it + * to thread or to reparse the file for each thread (and thus have + * new structure. + * + * \return a NetmapIfaceConfig corresponding to the interface name + */ static void *ParseNetmapConfig(const char *iface_name) { ConfNode *if_root = NULL; @@ -245,11 +243,10 @@ static void *ParseNetmapConfig(const char *iface_name) return NULL; } - NetmapIfaceConfig *aconf = SCMalloc(sizeof(*aconf)); + NetmapIfaceConfig *aconf = SCCalloc(1, sizeof(*aconf)); if (unlikely(aconf == NULL)) { return NULL; } - memset(aconf, 0, sizeof(*aconf)); aconf->DerefFunc = NetmapDerefConfig; strlcpy(aconf->iface_name, iface_name, sizeof(aconf->iface_name)); @@ -275,19 +272,26 @@ static void *ParseNetmapConfig(const char *iface_name) if (strlen(out_iface) > 0) { if_root = ConfFindDeviceConfig(netmap_node, out_iface); ParseNetmapSettings(&aconf->out, out_iface, if_root, if_default); - - /* if one side of the IPS peering uses a sw_ring, we will default - * to using a single ring/thread on the other side as well. Only - * if thread variable is set to 'auto'. So the user can override - * this. */ - if (aconf->out.sw_ring && aconf->in.threads_auto) { - aconf->out.threads = aconf->in.threads = 1; - } else if (aconf->in.sw_ring && aconf->out.threads_auto) { - aconf->out.threads = aconf->in.threads = 1; - } } } + int ring_count = NetmapGetRSSCount(aconf->iface_name); + if (strlen(aconf->iface_name) > 0 && + (aconf->iface_name[strlen(aconf->iface_name) - 1] == '^' || + aconf->iface_name[strlen(aconf->iface_name) - 1] == '*')) { + SCLogDebug("%s -- using %d netmap host ring pair%s", aconf->iface_name, ring_count, + ring_count == 1 ? "" : "s"); + } else { + SCLogDebug("%s -- using %d netmap ring pair%s", aconf->iface_name, ring_count, + ring_count == 1 ? "" : "s"); + } + + for (int i = 0; i < ring_count; i++) { + char live_buf[32] = { 0 }; + snprintf(live_buf, sizeof(live_buf), "netmap%d", i); + LiveRegisterDevice(live_buf); + } + /* netmap needs all offloading to be disabled */ if (aconf->in.real) { char base_name[sizeof(aconf->in.iface)]; @@ -309,6 +313,7 @@ static void *ParseNetmapConfig(const char *iface_name) SCLogPerf("Using %d threads for interface %s", aconf->in.threads, aconf->iface_name); + LiveDeviceHasNoStats(); return aconf; } @@ -396,69 +401,83 @@ int NetmapRunModeIsIPS() return has_ips; } -#endif // #ifdef HAVE_NETMAP +typedef enum { NETMAP_AUTOFP, NETMAP_WORKERS, NETMAP_SINGLE } NetmapRunMode_t; -int RunModeIdsNetmapAutoFp(void) +static int NetmapRunModeInit(NetmapRunMode_t runmode) { SCEnter(); -#ifdef HAVE_NETMAP - int ret; - const char *live_dev = NULL; - RunModeInitialize(); - TimeModeSetLive(); + const char *live_dev = NULL; (void)ConfGet("netmap.live-interface", &live_dev); - SCLogDebug("live_dev %s", live_dev); - - ret = RunModeSetLiveCaptureAutoFp( - ParseNetmapConfig, - NetmapConfigGeThreadsCount, - "ReceiveNetmap", - "DecodeNetmap", thread_name_autofp, - live_dev); + int ret; + switch (runmode) { + case NETMAP_AUTOFP: + ret = RunModeSetLiveCaptureAutoFp(ParseNetmapConfig, NetmapConfigGeThreadsCount, + "ReceiveNetmap", "DecodeNetmap", thread_name_autofp, live_dev); + break; + case NETMAP_WORKERS: + ret = RunModeSetLiveCaptureWorkers(ParseNetmapConfig, NetmapConfigGeThreadsCount, + "ReceiveNetmap", "DecodeNetmap", thread_name_workers, live_dev); + break; + case NETMAP_SINGLE: + ret = RunModeSetLiveCaptureSingle(ParseNetmapConfig, NetmapConfigGeThreadsCount, + "ReceiveNetmap", "DecodeNetmap", thread_name_single, live_dev); + break; + } if (ret != 0) { - FatalError(SC_ERR_FATAL, "Unable to start runmode"); + FatalError(SC_ERR_FATAL, "Unable to start runmode %s", + runmode == NETMAP_AUTOFP ? "autofp" + : runmode == NETMAP_WORKERS ? "workers" : "single"); } - SCLogDebug("RunModeIdsNetmapAutoFp initialised"); -#endif /* HAVE_NETMAP */ + SCLogDebug("%s initialized", + runmode == NETMAP_AUTOFP ? "autofp" : runmode == NETMAP_WORKERS ? "workers" : "single"); SCReturnInt(0); } +int RunModeIdsNetmapAutoFp(void) +{ + return NetmapRunModeInit(NETMAP_AUTOFP); +} + /** * \brief Single thread version of the netmap processing. */ int RunModeIdsNetmapSingle(void) { - SCEnter(); - -#ifdef HAVE_NETMAP - int ret; - const char *live_dev = NULL; - - RunModeInitialize(); - TimeModeSetLive(); - - (void)ConfGet("netmap.live-interface", &live_dev); - - ret = RunModeSetLiveCaptureSingle( - ParseNetmapConfig, - NetmapConfigGeThreadsCount, - "ReceiveNetmap", - "DecodeNetmap", thread_name_single, - live_dev); - if (ret != 0) { - FatalError(SC_ERR_FATAL, "Unable to start runmode"); - } + return NetmapRunModeInit(NETMAP_SINGLE); +} - SCLogDebug("RunModeIdsNetmapSingle initialised"); +/** + * \brief Workers version of the netmap processing. + * + * Start N threads with each thread doing all the work. + * + */ +int RunModeIdsNetmapWorkers(void) +{ + return NetmapRunModeInit(NETMAP_WORKERS); +} +#else +int RunModeIdsNetmapAutoFp(void) +{ + SCEnter(); + FatalError(SC_ERR_FATAL, "Netmap not configured"); + SCReturnInt(0); +} -#endif /* HAVE_NETMAP */ +/** + * \brief Single thread version of the netmap processing. + */ +int RunModeIdsNetmapSingle(void) +{ + SCEnter(); + FatalError(SC_ERR_FATAL, "Netmap not configured"); SCReturnInt(0); } @@ -471,31 +490,10 @@ int RunModeIdsNetmapSingle(void) int RunModeIdsNetmapWorkers(void) { SCEnter(); - -#ifdef HAVE_NETMAP - int ret; - const char *live_dev = NULL; - - RunModeInitialize(); - TimeModeSetLive(); - - (void)ConfGet("netmap.live-interface", &live_dev); - - ret = RunModeSetLiveCaptureWorkers( - ParseNetmapConfig, - NetmapConfigGeThreadsCount, - "ReceiveNetmap", - "DecodeNetmap", thread_name_workers, - live_dev); - if (ret != 0) { - FatalError(SC_ERR_FATAL, "Unable to start runmode"); - } - - SCLogDebug("RunModeIdsNetmapWorkers initialised"); - -#endif /* HAVE_NETMAP */ + FatalError(SC_ERR_FATAL, "Netmap not configured"); SCReturnInt(0); } +#endif // #ifdef HAVE_NETMAP /** * @} diff --git a/src/runmode-netmap.h b/src/runmode-netmap.h index 80e3a7ac5c..f7efb2d2c4 100644 --- a/src/runmode-netmap.h +++ b/src/runmode-netmap.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014 Open Information Security Foundation +/* Copyright (C) 2014-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free diff --git a/src/source-netmap.c b/src/source-netmap.c index 136e9cb420..ad24268c84 100644 --- a/src/source-netmap.c +++ b/src/source-netmap.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2018 Open Information Security Foundation +/* Copyright (C) 2011-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -22,53 +22,36 @@ */ /** -* \file -* -* \author Aleksey Katargin -* \author Victor Julien -* -* Netmap socket acquisition support -* -* Many thanks to Luigi Rizzo for guidance and support. -* -*/ - + * \file + * + * \author Aleksey Katargin + * \author Victor Julien + * \author Bill Meeks + * + * Netmap socket acquisition support + * + * Many thanks to Luigi Rizzo for guidance and support. + * + */ -#include "suricata-common.h" #include "suricata.h" -#include "decode.h" -#include "threads.h" -#include "threadvars.h" +#include "suricata-common.h" #include "tm-threads.h" -#include "conf.h" #include "util-bpf.h" -#include "util-debug.h" -#include "util-device.h" -#include "util-error.h" #include "util-privs.h" -#include "util-optimize.h" -#include "util-checksum.h" #include "util-validate.h" -#include "tmqh-packetpool.h" #include "source-netmap.h" -#include "runmodes.h" #ifdef HAVE_NETMAP -#if HAVE_SYS_IOCTL_H -#include -#endif - -#if HAVE_SYS_MMAN_H -#include -#endif - #define NETMAP_WITH_LIBS #ifdef DEBUG #define DEBUG_NETMAP_USER #endif + #include +#include #endif /* HAVE_NETMAP */ @@ -81,10 +64,10 @@ */ static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data) { - SCLogError(SC_ERR_NO_NETMAP,"Error creating thread %s: you do not have " - "support for netmap enabled, please recompile " - "with --enable-netmap", tv->name); - exit(EXIT_FAILURE); + FatalError(SC_ERR_NO_NETMAP, + "Error creating thread %s: Netmap is not enabled. " + "Make sure to pass --enable-netmap to configure when building.", + tv->name); } void TmModuleReceiveNetmapRegister (void) @@ -119,14 +102,7 @@ void TmModuleDecodeNetmapRegister (void) #define POLL_EVENTS (POLLHUP|POLLERR|POLLNVAL) #endif -enum { - NETMAP_OK, - NETMAP_FAILURE, -}; - -enum { - NETMAP_FLAG_ZERO_COPY = 1, -}; +enum { NETMAP_FLAG_ZERO_COPY = 1, NETMAP_FLAG_EXCL_RING_ACCESS = 2 }; /** * \brief Netmap device instance. Each ring for each device gets its own @@ -134,7 +110,7 @@ enum { */ typedef struct NetmapDevice_ { - struct nm_desc *nmd; + struct nmport_d *nmd; unsigned int ref; SC_ATOMIC_DECLARE(unsigned int, threads_run); TAILQ_ENTRY(NetmapDevice_) next; @@ -143,6 +119,9 @@ typedef struct NetmapDevice_ char ifname[32]; int ring; int direction; // 0 rx, 1 tx + + // autofp: Used to lock a destination ring while we are sending data. + SCMutex netmap_dev_lock; } NetmapDevice; /** @@ -150,7 +129,7 @@ typedef struct NetmapDevice_ */ typedef struct NetmapThreadVars_ { - /* receive inteface */ + /* receive interface */ NetmapDevice *ifsrc; /* dst interface for IPS mode */ NetmapDevice *ifdst; @@ -185,12 +164,21 @@ static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER; */ int NetmapGetRSSCount(const char *ifname) { - struct nmreq nm_req; + struct nmreq_port_info_get req; + struct nmreq_header hdr; int rx_rings = 0; + /* we need the base interface name to query queues */ + char base_name[IFNAMSIZ]; + strlcpy(base_name, ifname, sizeof(base_name)); + if (strlen(base_name) > 0 && + (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) { + base_name[strlen(base_name) - 1] = '\0'; + } + SCMutexLock(&netmap_devlist_lock); - /* open netmap */ + /* open netmap device */ int fd = open("/dev/netmap", O_RDWR); if (fd == -1) { SCLogError(SC_ERR_NETMAP_CREATE, @@ -199,19 +187,24 @@ int NetmapGetRSSCount(const char *ifname) goto error_open; } - /* query netmap info */ - memset(&nm_req, 0, sizeof(nm_req)); - strlcpy(nm_req.nr_name, ifname, sizeof(nm_req.nr_name)); - nm_req.nr_version = NETMAP_API; + /* query netmap interface info */ + memset(&req, 0, sizeof(req)); + memset(&hdr, 0, sizeof(hdr)); + hdr.nr_version = NETMAP_API; + hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET; + hdr.nr_body = (uintptr_t)&req; + strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name)); - if (ioctl(fd, NIOCGINFO, &nm_req) != 0) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't query netmap for %s, error %s", + if (ioctl(fd, NIOCCTRL, &hdr) != 0) { + SCLogError(SC_ERR_NETMAP_CREATE, "Couldn't query netmap for info about %s, error %s", ifname, strerror(errno)); goto error_fd; }; - rx_rings = nm_req.nr_rx_rings; + /* return RX rings count if it equals TX rings count */ + if (req.nr_rx_rings == req.nr_tx_rings) { + rx_rings = req.nr_rx_rings; + } error_fd: close(fd); @@ -220,16 +213,68 @@ error_open: return rx_rings; } +static void NetmapDestroyDevice(NetmapDevice *pdev) +{ + nmport_close(pdev->nmd); + SCMutexDestroy(&pdev->netmap_dev_lock); + SCFree(pdev); +} + +/** + * \brief Close or dereference netmap device instance. + * \param dev Netmap device instance. + * \return Zero on success. + */ +static int NetmapClose(NetmapDevice *dev) +{ + NetmapDevice *pdev, *tmp; + + SCMutexLock(&netmap_devlist_lock); + + TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { + if (pdev == dev) { + pdev->ref--; + if (!pdev->ref) { + NetmapDestroyDevice(pdev); + } + SCMutexUnlock(&netmap_devlist_lock); + return 0; + } + } + + SCMutexUnlock(&netmap_devlist_lock); + return -1; +} + +/** + * \brief Close all open netmap device instances. + */ +static void NetmapCloseAll(void) +{ + NetmapDevice *pdev, *tmp; + + SCMutexLock(&netmap_devlist_lock); + + TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { + NetmapDestroyDevice(pdev); + } + + SCMutexUnlock(&netmap_devlist_lock); +} + /** * \brief Open interface in netmap mode. * \param ifname Interface name. * \param promisc Enable promiscuous mode. * \param dev Pointer to requested netmap device instance. * \param verbose Verbose error logging. + * \param read Indicates direction: RX or TX + * \param zerocopy 1 if zerocopy access requested + * \param soft Use Host stack (software) interface * \return Zero on success. */ -static int NetmapOpen(NetmapIfaceSettings *ns, - NetmapDevice **pdevice, int verbose, int read, bool zerocopy) +static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read, + bool zerocopy, bool soft) { SCEnter(); SCLogDebug("ifname %s", ns->iface); @@ -248,8 +293,7 @@ static int NetmapOpen(NetmapIfaceSettings *ns, int if_flags = GetIfaceFlags(base_name); if (if_flags == -1) { if (verbose) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Can not access to interface '%s' (%s)", + SCLogError(SC_ERR_NETMAP_CREATE, "Cannot access network interface '%s' (%s)", base_name, ns->iface); } goto error; @@ -268,19 +312,20 @@ static int NetmapOpen(NetmapIfaceSettings *ns, } } NetmapDevice *pdev = NULL, *spdev = NULL; - pdev = SCMalloc(sizeof(*pdev)); + pdev = SCCalloc(1, sizeof(*pdev)); if (unlikely(pdev == NULL)) { SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); goto error; } - memset(pdev, 0, sizeof(*pdev)); SC_ATOMIC_INIT(pdev->threads_run); SCMutexLock(&netmap_devlist_lock); const int direction = (read != 1); int ring = 0; - /* search interface in our already opened list */ + /* Search for interface in our already opened list. */ + /* We will find it when opening multiple rings on */ + /* the device when it exposes multiple RSS queues. */ TAILQ_FOREACH(spdev, &netmap_devlist, next) { SCLogDebug("spdev %s", spdev->ifname); if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) { @@ -294,19 +339,40 @@ static int NetmapOpen(NetmapIfaceSettings *ns, const char *opt_x = "x"; // not for IPS const char *opt_z = "z"; // zero copy, not for IPS - // FreeBSD 11 doesn't have R and T. -#if NETMAP_API<=11 - opt_R = ""; - opt_T = ""; -#endif /* assemble options string */ char optstr[16]; if (ns->ips) opt_x = ""; -// z seems to not play well with multiple opens of a real dev on linux -// if (!zerocopy || ips) + // z seems to not play well with multiple opens of a real dev on linux opt_z = ""; + /* + * How netmap endpoint names are selected: + * + * The following logic within the "retry" loop builds endpoint names. + * + * IPS Mode: + * There are two endpoints: one hardware NIC and either a hardware NIC or host stack "NIC". + * + * IDS Mode: + * One endpoint -- usually a hardware NIC. + * + * IPS mode -- with one endpoint a host stack "NIC": + * When using multiple rings/threads, then the open of the initial Ring 0 MUST + * instruct netmap to open multiple Host Stack rings (as the default is to open only a single + * pair). This is also critical for the HW NIC endpoint. This is done by adding + * “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired) + * to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack). + * For subsequent additional ring open calls, omit the suffix option specifying host ring count. + * + * IPS mode -- both endpoints are hardware NICs: + * Do NOT pass any suffix option (even for Ring 0). You do not need to tell netmap how many + * rings, because it already knows the correct value from the NIC driver itself. Specifying a + * desired ring count when both ends are Hardware NICs confuses netmap, and it seems to default + * to using only a single hardware ring. In this scenario, specify only the specific ring number + * being opened. + */ + // loop to retry opening if unsupported options are used retry: snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T); @@ -317,44 +383,91 @@ retry: ns->iface, ring, strlen(optstr) ? "/" : "", optstr); } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) { snprintf(devname, sizeof(devname), "%s", ns->iface); - } else if (ns->iface[strlen(ns->iface)-1] == '*' || - ns->iface[strlen(ns->iface)-1] == '^') { - SCLogDebug("device with SW-ring enabled (ns->iface): %s",ns->iface); - snprintf(devname, sizeof(devname), "netmap:%s", ns->iface); - SCLogDebug("device with SW-ring enabled (devname): %s",devname); - /* just a single ring, so don't use ring param */ } else if (ring == 0 && ns->threads == 1) { + /* just a single thread and ring, so don't use ring param */ snprintf(devname, sizeof(devname), "netmap:%s%s%s", ns->iface, strlen(optstr) ? "/" : "", optstr); + SCLogDebug("device with %s-ring enabled (devname): %s", soft ? "SW" : "HW", devname); } else { - snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", - ns->iface, ring, strlen(optstr) ? "/" : "", optstr); + /* Going to be using multiple threads and rings */ + if (ns->sw_ring) { + /* Opening a host stack interface */ + if (ring == 0) { + /* Ring 0, so tell netmap how many host rings we want created */ + snprintf(devname, sizeof(devname), "netmap:%s%d%s%s@conf:host-rings=%d", ns->iface, + ring, strlen(optstr) ? "/" : "", optstr, ns->threads); + } else { + /* Software (host) ring, but not initial open of ring 0 */ + snprintf(devname, sizeof(devname), "netmap:%s%d%s%s", ns->iface, ring, + strlen(optstr) ? "/" : "", optstr); + } + SCLogDebug("device with SW-ring enabled (devname): %s", devname); + } else if (ring == 0 && soft) { + /* Ring 0 of HW endpoint, and other endpoint is SW stack, + * so request SW host stack rings to match HW rings count. + */ + snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s@conf:host-rings=%d", ns->iface, + ring, strlen(optstr) ? "/" : "", optstr, ns->threads); + SCLogDebug("device with HW-ring enabled (devname): %s", devname); + } else { + /* Hardware ring other than ring 0, or both endpoints are HW + * and there is no host stack (SW) endpoint */ + snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring, + strlen(optstr) ? "/" : "", optstr); + SCLogDebug("device with HW-ring enabled (devname): %s", devname); + } } + strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname)); - pdev->nmd = nm_open(devname, NULL, 0, NULL); + /* have the netmap API parse device name and prepare the port descriptor for us */ + pdev->nmd = nmport_prepare(devname); + + if (pdev->nmd != NULL) { + /* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening + */ + if (read) { + pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL; + } + + /* Now attempt to actually open the netmap port descriptor */ + if (nmport_open_desc(pdev->nmd) < 0) { + /* the open failed, so clean-up the descriptor and fall through to error handler */ + nmport_close(pdev->nmd); + pdev->nmd = NULL; + } + } + if (pdev->nmd == NULL) { - if (errno == EINVAL && opt_z[0] == 'z') { - SCLogNotice("got '%s' EINVAL: going to retry without 'z'", devname); - opt_z = ""; - goto retry; - } else if (errno == EINVAL && opt_x[0] == 'x') { - SCLogNotice("dev '%s' got EINVAL: going to retry without 'x'", devname); - opt_x = ""; - goto retry; + if (errno == EINVAL) { + if (opt_z[0] == 'z') { + SCLogNotice("got '%s' EINVAL: going to retry without 'z'", devname); + opt_z = ""; + goto retry; + } else if (opt_x[0] == 'x') { + SCLogNotice("dev '%s' got EINVAL: going to retry without 'x'", devname); + opt_x = ""; + goto retry; + } } - SCLogError(SC_ERR_NETMAP_CREATE, "opening devname %s failed: %s", - devname, strerror(errno)); - exit(EXIT_FAILURE); + NetmapCloseAll(); + FatalError(SC_ERR_FATAL, "opening devname %s failed: %s", devname, strerror(errno)); } - SCLogDebug("devname %s %s opened", devname, ns->iface); + + /* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */ + SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring, + pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring); + pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring; + pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring; + + SCLogInfo("devname [fd: %d] %s %s opened", pdev->nmd->fd, devname, ns->iface); pdev->direction = direction; pdev->ring = ring; + SCMutexInit(&pdev->netmap_dev_lock, NULL); TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next); - SCLogNotice("opened %s from %s: %p", devname, ns->iface, pdev->nmd); SCMutexUnlock(&netmap_devlist_lock); *pdevice = pdev; @@ -363,33 +476,6 @@ error: return -1; } -/** - * \brief Close or dereference netmap device instance. - * \param pdev Netmap device instance. - * \return Zero on success. - */ -static int NetmapClose(NetmapDevice *dev) -{ - NetmapDevice *pdev, *tmp; - - SCMutexLock(&netmap_devlist_lock); - - TAILQ_FOREACH_SAFE(pdev, &netmap_devlist, next, tmp) { - if (pdev == dev) { - pdev->ref--; - if (!pdev->ref) { - nm_close(pdev->nmd); - SCFree(pdev); - } - SCMutexUnlock(&netmap_devlist_lock); - return 0; - } - } - - SCMutexUnlock(&netmap_devlist_lock); - return -1; -} - /** * \brief PcapDumpCounters * \param ntv @@ -413,23 +499,18 @@ static inline void NetmapDumpCounters(NetmapThreadVars *ntv) static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, void **data) { SCEnter(); - NetmapIfaceConfig *aconf = (NetmapIfaceConfig *)initdata; + NetmapIfaceConfig *aconf = (NetmapIfaceConfig *)initdata; if (initdata == NULL) { SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL"); SCReturnInt(TM_ECODE_FAILED); } - NetmapThreadVars *ntv = SCMalloc(sizeof(*ntv)); + NetmapThreadVars *ntv = SCCalloc(1, sizeof(*ntv)); if (unlikely(ntv == NULL)) { SCLogError(SC_ERR_MEM_ALLOC, "Memory allocation failed"); goto error; } - memset(ntv, 0, sizeof(*ntv)); - - ntv->tv = tv; - ntv->checksum_mode = aconf->in.checksum_mode; - ntv->copy_mode = aconf->in.copy_mode; ntv->livedev = LiveGetDevice(aconf->iface_name); if (ntv->livedev == NULL) { @@ -437,30 +518,29 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi goto error_ntv; } + ntv->tv = tv; + ntv->checksum_mode = aconf->in.checksum_mode; + ntv->copy_mode = aconf->in.copy_mode; + /* enable zero-copy mode for workers runmode */ char const *active_runmode = RunmodeGetActive(); if (strcmp("workers", active_runmode) == 0) { ntv->flags |= NETMAP_FLAG_ZERO_COPY; SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface); + } else if (strcmp("autofp", active_runmode) == 0) { + ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS; } - if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, - (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0) != 0) { + /* Need to insure open of ring 0 conveys requested ring count for open */ + bool soft = aconf->in.sw_ring || aconf->out.sw_ring; + if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, + soft) != 0) { goto error_ntv; } - if (unlikely(aconf->in.sw_ring && aconf->in.threads > 1)) { - SCLogError(SC_ERR_INVALID_VALUE, - "Interface '%s+'. " - "Thread count can't be greater than 1 for SW ring.", - aconf->iface_name); - goto error_src; - } - if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { - SCLogDebug("IPS: opening out iface %s", aconf->out.iface); - if (NetmapOpen(&aconf->out, &ntv->ifdst, - 1, 0, false) != 0) { + if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, + soft) != 0) { goto error_src; } } @@ -491,17 +571,23 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi } } + SCLogDebug("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd); + *data = (void *)ntv; aconf->DerefFunc(aconf); SCReturnInt(TM_ECODE_OK); + error_dst: if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { NetmapClose(ntv->ifdst); } + error_src: NetmapClose(ntv->ifsrc); + error_ntv: SCFree(ntv); + error: aconf->DerefFunc(aconf); SCReturnInt(TM_ECODE_FAILED); @@ -521,16 +607,29 @@ static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p) } DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL); - if (nm_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { - SCLogDebug("failed to send %s -> %s", - ntv->ifsrc->ifname, ntv->ifdst->ifname); + /* Lock the destination netmap ring while writing to it */ + if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { + SCMutexLock(&ntv->ifdst->netmap_dev_lock); + } + + /* attempt to write the packet into the netmap ring buffer(s) */ + if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { + if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { + SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); + } + SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname); ntv->drops++; + return TM_ECODE_FAILED; } - SCLogDebug("sent succesfully: %s(%d)->%s(%d) (%u)", - ntv->ifsrc->ifname, ntv->ifsrc->ring, + + SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring, ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p)); + /* Instruct netmap to push the data on the TX ring on the destination port */ ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); + if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { + SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); + } return TM_ECODE_OK; } @@ -549,13 +648,11 @@ static void NetmapReleasePacket(Packet *p) PacketFreeOrRelease(p); } -static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_char *d) +static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph) { - NetmapThreadVars *ntv = (NetmapThreadVars *)user; - if (ntv->bpf_prog.bf_len) { struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len }; - if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, d) == 0) { + if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) { return; } } @@ -573,12 +670,12 @@ static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_cha ntv->bytes += ph->len; if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { - if (PacketSetData(p, (uint8_t *)d, ph->len) == -1) { + if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) { TmqhOutputPacketpool(ntv->tv, p); return; } } else { - if (PacketCopyData(p, (uint8_t *)d, ph->len) == -1) { + if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) { TmqhOutputPacketpool(ntv->tv, p); return; } @@ -593,6 +690,83 @@ static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_cha (void)TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p); } +/** + * \brief Copy netmap rings data into Packet structures. + * \param *d nmport_d (or nm_desc) netmap if structure. + * \param cnt int count of packets to read (-1 = all). + * \param *ntv NetmapThreadVars. + */ +static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv) +{ + struct nm_pkthdr hdr; + int last_ring = d->last_rx_ring - d->first_rx_ring + 1; + int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring; + + memset(&hdr, 0, sizeof(hdr)); + hdr.flags = NM_MORE_PKTS; + + if (cnt == 0) + cnt = -1; + + for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) { + struct netmap_ring *ring; + + if (cur_rx_ring > d->last_rx_ring) + cur_rx_ring = d->first_rx_ring; + + ring = NETMAP_RXRING(d->nifp, cur_rx_ring); + + /* cycle through the non-empty ring slots to fetch their data */ + for (; !nm_ring_empty(ring) && cnt != got; got++) { + u_int idx, i; + u_char *oldbuf; + struct netmap_slot *slot; + + if (hdr.buf) { /* from previous round */ + NetmapProcessPacket(ntv, &hdr); + } + + i = ring->cur; + slot = &ring->slot[i]; + idx = slot->buf_idx; + d->cur_rx_ring = cur_rx_ring; + hdr.slot = slot; + oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx); + hdr.len = hdr.caplen = slot->len; + + /* loop through the ring slots to get packet data */ + while (slot->flags & NS_MOREFRAG) { + /* packet can be fragmented across multiple slots, */ + /* so loop until we find the slot with the flag */ + /* cleared, signalling the end of the packet data. */ + u_char *nbuf; + u_int oldlen = slot->len; + i = nm_ring_next(ring, i); + slot = &ring->slot[i]; + hdr.len += slot->len; + nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); + + if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && + oldlen == ring->nr_buf_size) { + hdr.caplen += slot->len; + oldbuf = nbuf; + } else { + oldbuf = NULL; + } + } + + hdr.ts = ring->ts; + ring->head = ring->cur = nm_ring_next(ring, i); + } + } + + if (hdr.buf) { /* from previous round */ + hdr.flags = 0; + NetmapProcessPacket(ntv, &hdr); + } + return got; +} + /** * \brief Main netmap reading loop function */ @@ -608,6 +782,7 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) fds.fd = ntv->ifsrc->nmd->fd; fds.events = POLLIN; + SCLogDebug("thread %s polling on %d", tv->name, fds.fd); for(;;) { if (unlikely(suricata_ctl_flags != 0)) { break; @@ -618,19 +793,17 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) PacketPoolWait(); int r = poll(&fds, 1, POLL_TIMEOUT); + if (r < 0) { /* error */ if (errno != EINTR) SCLogError(SC_ERR_NETMAP_READ, - "Error polling netmap from iface '%s': (%d" PRIu32 ") %s", - ntv->ifsrc->ifname, errno, strerror(errno)); + "Error polling netmap from iface '%s': (%d" PRIu32 ") %s", + ntv->ifsrc->ifname, errno, strerror(errno)); continue; } else if (r == 0) { /* no events, timeout */ - //SCLogDebug("(%s:%d-%d) Poll timeout", ntv->ifsrc->ifname, - // ntv->src_ring_from, ntv->src_ring_to); - /* sync counters */ NetmapDumpCounters(ntv); StatsSyncCountersIfSignalled(tv); @@ -642,18 +815,18 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) if (unlikely(fds.revents & POLL_EVENTS)) { if (fds.revents & POLLERR) { - //SCLogError(SC_ERR_NETMAP_READ, - // "Error reading data from iface '%s': (%d" PRIu32 ") %s", - // ntv->ifsrc->ifname, errno, strerror(errno)); - } else if (fds.revents & POLLNVAL) { SCLogError(SC_ERR_NETMAP_READ, - "Invalid polling request"); + "Error reading netmap data via polling from iface '%s': (%d" PRIu32 ") %s", + ntv->ifsrc->ifname, errno, strerror(errno)); + } else if (fds.revents & POLLNVAL) { + SCLogError(SC_ERR_NETMAP_READ, "Invalid polling request"); } continue; } if (likely(fds.revents & POLLIN)) { - nm_dispatch(ntv->ifsrc->nmd, -1, NetmapCallback, (void *)ntv); + /* have data on RX ring, so copy to Packet for processing */ + NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv); } NetmapDumpCounters(ntv); @@ -713,7 +886,7 @@ static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data) /** * \brief Prepare netmap decode thread. - * \param tv Thread local avariables. + * \param tv Thread local variables. * \param initdata Thread config. * \param data Pointer to DecodeThreadVars placed here. */ diff --git a/src/source-netmap.h b/src/source-netmap.h index adfdaf57d3..b60d544d5c 100644 --- a/src/source-netmap.h +++ b/src/source-netmap.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2018 Open Information Security Foundation +/* Copyright (C) 2014-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -25,8 +25,6 @@ #ifndef __SOURCE_NETMAP_H__ #define __SOURCE_NETMAP_H__ -#include "queue.h" - /* copy modes */ enum { NETMAP_COPY_MODE_NONE,