From: Jeff Lucovsky Date: Fri, 20 Aug 2021 13:10:26 +0000 (-0400) Subject: netmap: V14 API changes X-Git-Tag: suricata-6.0.9~29 X-Git-Url: http://git.ipfire.org/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=1ba259450b7af3a0ae0a398cc56fd2ec9ae69495;p=thirdparty%2Fsuricata.git netmap: V14 API changes This commit modifies the Netmap packet handling to use API version 14. @bmeeks8 contributed many changes instrumental to this effort. (cherry picked from commit ca7d097225c2eeb21df28dd17658d74af4999e95) --- diff --git a/src/runmode-netmap.c b/src/runmode-netmap.c index 27bb845ad1..2450fba094 100644 --- a/src/runmode-netmap.c +++ b/src/runmode-netmap.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2018 Open Information Security Foundation +/* Copyright (C) 2014-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -22,13 +22,14 @@ */ /** -* \file -* -* \author Aleksey Katargin -* -* Netmap runmode -* -*/ + * \file + * + * \author Aleksey Katargin + * \author Bill Meeks + * + * Netmap runmode + * + */ #include "suricata-common.h" #include "tm-threads.h" @@ -52,6 +53,11 @@ #include "util-ioctl.h" #include "util-byte.h" +#ifdef HAVE_NETMAP +#define NETMAP_WITH_LIBS +#include +#endif /* HAVE_NETMAP */ + #include "source-netmap.h" extern int max_pending_packets; @@ -63,6 +69,15 @@ const char *RunModeNetmapGetDefaultMode(void) void RunModeIdsNetmapRegister(void) { +#if HAVE_NETMAP + SCLogInfo("Using netmap version %d [" +#if USE_NEW_NETMAP_API + "new" +#else + "legacy" +#endif + " API interfaces]", + NETMAP_API); RunModeRegisterNewRunMode(RUNMODE_NETMAP, "single", "Single threaded netmap mode", RunModeIdsNetmapSingle); @@ -76,6 +91,7 @@ void RunModeIdsNetmapRegister(void) "thread.", RunModeIdsNetmapAutoFp); return; +#endif } #ifdef HAVE_NETMAP @@ -111,6 +127,14 @@ static int ParseNetmapSettings(NetmapIfaceSettings *ns, const char *iface, } } + /* we will need the base interface name for later */ + char base_name[IFNAMSIZ]; + strlcpy(base_name, ns->iface, sizeof(base_name)); + if (strlen(base_name) > 0 && + (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) { + base_name[strlen(base_name) - 1] = '\0'; + } + /* prefixed with netmap or vale means it's not a real interface * and we don't check offloading. */ if (strncmp(ns->iface, "netmap:", 7) != 0 && @@ -208,17 +232,24 @@ finalize: ns->ips = (ns->copy_mode != NETMAP_COPY_MODE_NONE); +#if !USE_NEW_NETMAP_API if (ns->sw_ring) { /* just one thread per interface supported */ ns->threads = 1; - } else if (ns->threads_auto) { - /* As NetmapGetRSSCount used to be broken on Linux, - * fall back to GetIfaceRSSQueuesNum if needed. */ - ns->threads = NetmapGetRSSCount(ns->iface); - if (ns->threads == 0) { - ns->threads = GetIfaceRSSQueuesNum(ns->iface); + } else { +#endif + if (ns->threads_auto) { + /* As NetmapGetRSSCount used to be broken on Linux, + * fall back to GetIfaceRSSQueuesNum if needed. */ + ns->threads = NetmapGetRSSCount(ns->iface); + if (ns->threads == 0) { + /* need to use base_name of interface here */ + ns->threads = GetIfaceRSSQueuesNum(base_name); + } } +#if !USE_NEW_NETMAP_API } +#endif if (ns->threads <= 0) { ns->threads = 1; } @@ -227,15 +258,15 @@ finalize: } /** -* \brief extract information from config file -* -* The returned structure will be freed by the thread init function. -* This is thus necessary to or copy the structure before giving it -* to thread or to reparse the file for each thread (and thus have -* new structure. -* -* \return a NetmapIfaceConfig corresponding to the interface name -*/ + * \brief extract information from config file + * + * The returned structure will be freed by the thread init function. + * This is thus necessary to or copy the structure before giving it + * to thread or to reparse the file for each thread (and thus have + * new structure. + * + * \return a NetmapIfaceConfig corresponding to the interface name + */ static void *ParseNetmapConfig(const char *iface_name) { ConfNode *if_root = NULL; @@ -276,7 +307,7 @@ static void *ParseNetmapConfig(const char *iface_name) if (strlen(out_iface) > 0) { if_root = ConfFindDeviceConfig(netmap_node, out_iface); ParseNetmapSettings(&aconf->out, out_iface, if_root, if_default); - +#if !USE_NEW_NETMAP_API /* if one side of the IPS peering uses a sw_ring, we will default * to using a single ring/thread on the other side as well. Only * if thread variable is set to 'auto'. So the user can override @@ -286,9 +317,29 @@ static void *ParseNetmapConfig(const char *iface_name) } else if (aconf->in.sw_ring && aconf->out.threads_auto) { aconf->out.threads = aconf->in.threads = 1; } +#endif } } +#if USE_NEW_NETMAP_API + int ring_count = NetmapGetRSSCount(aconf->iface_name); + if (strlen(aconf->iface_name) > 0 && + (aconf->iface_name[strlen(aconf->iface_name) - 1] == '^' || + aconf->iface_name[strlen(aconf->iface_name) - 1] == '*')) { + SCLogDebug("%s -- using %d netmap host ring pair%s", aconf->iface_name, ring_count, + ring_count == 1 ? "" : "s"); + } else { + SCLogDebug("%s -- using %d netmap ring pair%s", aconf->iface_name, ring_count, + ring_count == 1 ? "" : "s"); + } + + for (int i = 0; i < ring_count; i++) { + char live_buf[32] = { 0 }; + snprintf(live_buf, sizeof(live_buf), "netmap%d", i); + LiveRegisterDevice(live_buf); + } + +#endif /* netmap needs all offloading to be disabled */ if (aconf->in.real) { char base_name[sizeof(aconf->in.iface)]; @@ -310,6 +361,9 @@ static void *ParseNetmapConfig(const char *iface_name) SCLogPerf("Using %d threads for interface %s", aconf->in.threads, aconf->iface_name); +#if USE_NEW_NETMAP_API + LiveDeviceHasNoStats(); +#endif return aconf; } @@ -415,12 +469,8 @@ int RunModeIdsNetmapAutoFp(void) SCLogDebug("live_dev %s", live_dev); - ret = RunModeSetLiveCaptureAutoFp( - ParseNetmapConfig, - NetmapConfigGeThreadsCount, - "ReceiveNetmap", - "DecodeNetmap", thread_name_autofp, - live_dev); + ret = RunModeSetLiveCaptureAutoFp(ParseNetmapConfig, NetmapConfigGeThreadsCount, + "ReceiveNetmap", "DecodeNetmap", thread_name_autofp, live_dev); if (ret != 0) { FatalError(SC_ERR_FATAL, "Unable to start runmode"); } @@ -447,12 +497,8 @@ int RunModeIdsNetmapSingle(void) (void)ConfGet("netmap.live-interface", &live_dev); - ret = RunModeSetLiveCaptureSingle( - ParseNetmapConfig, - NetmapConfigGeThreadsCount, - "ReceiveNetmap", - "DecodeNetmap", thread_name_single, - live_dev); + ret = RunModeSetLiveCaptureSingle(ParseNetmapConfig, NetmapConfigGeThreadsCount, + "ReceiveNetmap", "DecodeNetmap", thread_name_single, live_dev); if (ret != 0) { FatalError(SC_ERR_FATAL, "Unable to start runmode"); } @@ -482,12 +528,8 @@ int RunModeIdsNetmapWorkers(void) (void)ConfGet("netmap.live-interface", &live_dev); - ret = RunModeSetLiveCaptureWorkers( - ParseNetmapConfig, - NetmapConfigGeThreadsCount, - "ReceiveNetmap", - "DecodeNetmap", thread_name_workers, - live_dev); + ret = RunModeSetLiveCaptureWorkers(ParseNetmapConfig, NetmapConfigGeThreadsCount, + "ReceiveNetmap", "DecodeNetmap", thread_name_workers, live_dev); if (ret != 0) { FatalError(SC_ERR_FATAL, "Unable to start runmode"); } diff --git a/src/runmode-netmap.h b/src/runmode-netmap.h index 80e3a7ac5c..f7efb2d2c4 100644 --- a/src/runmode-netmap.h +++ b/src/runmode-netmap.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014 Open Information Security Foundation +/* Copyright (C) 2014-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free diff --git a/src/source-netmap.c b/src/source-netmap.c index c7f67c10da..00b4400b4e 100644 --- a/src/source-netmap.c +++ b/src/source-netmap.c @@ -1,4 +1,4 @@ -/* Copyright (C) 2011-2018 Open Information Security Foundation +/* Copyright (C) 2011-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -22,21 +22,20 @@ */ /** -* \file -* -* \author Aleksey Katargin -* \author Victor Julien -* -* Netmap socket acquisition support -* -* Many thanks to Luigi Rizzo for guidance and support. -* -*/ - + * \file + * + * \author Aleksey Katargin + * \author Victor Julien + * \author Bill Meeks + * + * Netmap socket acquisition support + * + * Many thanks to Luigi Rizzo for guidance and support. + * + */ -#include "suricata-common.h" #include "suricata.h" -#include "decode.h" +#include "suricata-common.h" #include "threads.h" #include "threadvars.h" #include "tm-threads.h" @@ -69,6 +68,9 @@ #define DEBUG_NETMAP_USER #endif #include +#if USE_NEW_NETMAP_API +#include +#endif #endif /* HAVE_NETMAP */ @@ -81,10 +83,10 @@ */ static TmEcode NoNetmapSupportExit(ThreadVars *tv, const void *initdata, void **data) { - SCLogError(SC_ERR_NO_NETMAP,"Error creating thread %s: you do not have " - "support for netmap enabled, please recompile " - "with --enable-netmap", tv->name); - exit(EXIT_FAILURE); + FatalError(SC_ERR_NO_NETMAP, + "Error creating thread %s: Netmap is not enabled. " + "Make sure to pass --enable-netmap to configure when building.", + tv->name); } void TmModuleReceiveNetmapRegister (void) @@ -119,14 +121,7 @@ void TmModuleDecodeNetmapRegister (void) #define POLL_EVENTS (POLLHUP|POLLERR|POLLNVAL) #endif -enum { - NETMAP_OK, - NETMAP_FAILURE, -}; - -enum { - NETMAP_FLAG_ZERO_COPY = 1, -}; +enum { NETMAP_FLAG_ZERO_COPY = 1, NETMAP_FLAG_EXCL_RING_ACCESS = 2 }; /** * \brief Netmap device instance. Each ring for each device gets its own @@ -134,7 +129,11 @@ enum { */ typedef struct NetmapDevice_ { +#if USE_NEW_NETMAP_API + struct nmport_d *nmd; +#else struct nm_desc *nmd; +#endif unsigned int ref; SC_ATOMIC_DECLARE(unsigned int, threads_run); TAILQ_ENTRY(NetmapDevice_) next; @@ -143,6 +142,9 @@ typedef struct NetmapDevice_ char ifname[32]; int ring; int direction; // 0 rx, 1 tx + + // autofp: Used to lock a destination ring while we are sending data. + SCMutex netmap_dev_lock; } NetmapDevice; /** @@ -150,7 +152,7 @@ typedef struct NetmapDevice_ */ typedef struct NetmapThreadVars_ { - /* receive inteface */ + /* receive interface */ NetmapDevice *ifsrc; /* dst interface for IPS mode */ NetmapDevice *ifdst; @@ -185,12 +187,21 @@ static SCMutex netmap_devlist_lock = SCMUTEX_INITIALIZER; */ int NetmapGetRSSCount(const char *ifname) { - struct nmreq nm_req; + struct nmreq_port_info_get req; + struct nmreq_header hdr; int rx_rings = 0; + /* we need the base interface name to query queues */ + char base_name[IFNAMSIZ]; + strlcpy(base_name, ifname, sizeof(base_name)); + if (strlen(base_name) > 0 && + (base_name[strlen(base_name) - 1] == '^' || base_name[strlen(base_name) - 1] == '*')) { + base_name[strlen(base_name) - 1] = '\0'; + } + SCMutexLock(&netmap_devlist_lock); - /* open netmap */ + /* open netmap device */ int fd = open("/dev/netmap", O_RDWR); if (fd == -1) { SCLogError(SC_ERR_NETMAP_CREATE, @@ -199,19 +210,24 @@ int NetmapGetRSSCount(const char *ifname) goto error_open; } - /* query netmap info */ - memset(&nm_req, 0, sizeof(nm_req)); - strlcpy(nm_req.nr_name, ifname, sizeof(nm_req.nr_name)); - nm_req.nr_version = NETMAP_API; + /* query netmap interface info */ + memset(&req, 0, sizeof(req)); + memset(&hdr, 0, sizeof(hdr)); + hdr.nr_version = NETMAP_API; + hdr.nr_reqtype = NETMAP_REQ_PORT_INFO_GET; + hdr.nr_body = (uintptr_t)&req; + strlcpy(hdr.nr_name, base_name, sizeof(hdr.nr_name)); - if (ioctl(fd, NIOCGINFO, &nm_req) != 0) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Couldn't query netmap for %s, error %s", + if (ioctl(fd, NIOCCTRL, &hdr) != 0) { + SCLogError(SC_ERR_NETMAP_CREATE, "Couldn't query netmap for info about %s, error %s", ifname, strerror(errno)); goto error_fd; }; - rx_rings = nm_req.nr_rx_rings; + /* return RX rings count if it equals TX rings count */ + if (req.nr_rx_rings == req.nr_tx_rings) { + rx_rings = req.nr_rx_rings; + } error_fd: close(fd); @@ -220,16 +236,68 @@ error_open: return rx_rings; } +static void NetmapDestroyDevice(NetmapDevice *pdev) +{ +#if USE_NEW_NETMAP_API + nmport_close(pdev->nmd); +#else + nm_close(pdev->nmd); +#endif + SCMutexDestroy(&pdev->netmap_dev_lock); + SCFree(pdev); +} + +/** + * \brief Close or dereference netmap device instance. + * \param dev Netmap device instance. + * \return Zero on success. + */ +static int NetmapClose(NetmapDevice *dev) +{ + NetmapDevice *pdev, *tmp; + + SCMutexLock(&netmap_devlist_lock); + + TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { + if (pdev == dev) { + pdev->ref--; + if (!pdev->ref) { + NetmapDestroyDevice(pdev); + } + SCMutexUnlock(&netmap_devlist_lock); + return 0; + } + } + + SCMutexUnlock(&netmap_devlist_lock); + return -1; +} + +/** + * \brief Close all open netmap device instances. + */ +static void NetmapCloseAll(void) +{ + NetmapDevice *pdev, *tmp; + + TAILQ_FOREACH_SAFE (pdev, &netmap_devlist, next, tmp) { + NetmapDestroyDevice(pdev); + } +} + /** * \brief Open interface in netmap mode. * \param ifname Interface name. * \param promisc Enable promiscuous mode. * \param dev Pointer to requested netmap device instance. * \param verbose Verbose error logging. + * \param read Indicates direction: RX or TX + * \param zerocopy 1 if zerocopy access requested + * \param soft Use Host stack (software) interface * \return Zero on success. */ -static int NetmapOpen(NetmapIfaceSettings *ns, - NetmapDevice **pdevice, int verbose, int read, bool zerocopy) +static int NetmapOpen(NetmapIfaceSettings *ns, NetmapDevice **pdevice, int verbose, int read, + bool zerocopy, bool soft) { SCEnter(); SCLogDebug("ifname %s", ns->iface); @@ -248,8 +316,7 @@ static int NetmapOpen(NetmapIfaceSettings *ns, int if_flags = GetIfaceFlags(base_name); if (if_flags == -1) { if (verbose) { - SCLogError(SC_ERR_NETMAP_CREATE, - "Can not access to interface '%s' (%s)", + SCLogError(SC_ERR_NETMAP_CREATE, "Cannot access network interface '%s' (%s)", base_name, ns->iface); } goto error; @@ -280,7 +347,9 @@ static int NetmapOpen(NetmapIfaceSettings *ns, const int direction = (read != 1); int ring = 0; - /* search interface in our already opened list */ + /* Search for interface in our already opened list. */ + /* We will find it when opening multiple rings on */ + /* the device when it exposes multiple RSS queues. */ TAILQ_FOREACH(spdev, &netmap_devlist, next) { SCLogDebug("spdev %s", spdev->ifname); if (direction == spdev->direction && strcmp(ns->iface, spdev->ifname) == 0) { @@ -294,8 +363,8 @@ static int NetmapOpen(NetmapIfaceSettings *ns, const char *opt_x = "x"; // not for IPS const char *opt_z = "z"; // zero copy, not for IPS +#if NETMAP_API <= 11 // FreeBSD 11 doesn't have R and T. -#if NETMAP_API<=11 opt_R = ""; opt_T = ""; #endif @@ -303,10 +372,36 @@ static int NetmapOpen(NetmapIfaceSettings *ns, char optstr[16]; if (ns->ips) opt_x = ""; -// z seems to not play well with multiple opens of a real dev on linux -// if (!zerocopy || ips) + // z seems to not play well with multiple opens of a real dev on linux opt_z = ""; + /* + * How netmap endpoint names are selected: + * + * The following logic within the "retry" loop builds endpoint names. + * + * IPS Mode: + * There are two endpoints: one hardware NIC and either a hardware NIC or host stack "NIC". + * + * IDS Mode: + * One endpoint -- usually a hardware NIC. + * + * IPS mode -- with one endpoint a host stack "NIC": + * When using multiple rings/threads, then the open of the initial Ring 0 MUST + * instruct netmap to open multiple Host Stack rings (as the default is to open only a single + * pair). This is also critical for the HW NIC endpoint. This is done by adding + * “@conf:host-rings=x” suffix option (where “x” is the number of host rings desired) + * to BOTH endpoint nmport_open_desc() calls for ring 0 (hardware and host stack). + * For subsequent additional ring open calls, omit the suffix option specifying host ring count. + * + * IPS mode -- both endpoints are hardware NICs: + * Do NOT pass any suffix option (even for Ring 0). You do not need to tell netmap how many + * rings, because it already knows the correct value from the NIC driver itself. Specifying a + * desired ring count when both ends are Hardware NICs confuses netmap, and it seems to default + * to using only a single hardware ring. In this scenario, specify only the specific ring number + * being opened. + */ + // loop to retry opening if unsupported options are used retry: snprintf(optstr, sizeof(optstr), "%s%s%s", opt_z, opt_x, direction == 0 ? opt_R : opt_T); @@ -317,44 +412,107 @@ retry: ns->iface, ring, strlen(optstr) ? "/" : "", optstr); } else if (strlen(ns->iface) > 5 && strncmp(ns->iface, "vale", 4) == 0 && isdigit(ns->iface[4])) { snprintf(devname, sizeof(devname), "%s", ns->iface); +#if NETMAP_API < 14 || !USET_NET_NETMAP_API } else if (ns->iface[strlen(ns->iface)-1] == '*' || ns->iface[strlen(ns->iface)-1] == '^') { SCLogDebug("device with SW-ring enabled (ns->iface): %s",ns->iface); snprintf(devname, sizeof(devname), "netmap:%s", ns->iface); SCLogDebug("device with SW-ring enabled (devname): %s",devname); - /* just a single ring, so don't use ring param */ +#endif } else if (ring == 0 && ns->threads == 1) { + /* just a single thread and ring, so don't use ring param */ snprintf(devname, sizeof(devname), "netmap:%s%s%s", ns->iface, strlen(optstr) ? "/" : "", optstr); + SCLogDebug("device with %s-ring enabled (devname): %s", soft ? "SW" : "HW", devname); } else { - snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", - ns->iface, ring, strlen(optstr) ? "/" : "", optstr); +#if !USE_NEW_NETMAP_API + snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring, + strlen(optstr) ? "/" : "", optstr); +#else + /* Going to be using multiple threads and rings */ + if (ns->sw_ring) { + /* Opening a host stack interface */ + if (ring == 0) { + /* Ring 0, so tell netmap how many host rings we want created */ + snprintf(devname, sizeof(devname), "netmap:%s%d%s%s@conf:host-rings=%d", ns->iface, + ring, strlen(optstr) ? "/" : "", optstr, ns->threads); + } else { + /* Software (host) ring, but not initial open of ring 0 */ + snprintf(devname, sizeof(devname), "netmap:%s%d%s%s", ns->iface, ring, + strlen(optstr) ? "/" : "", optstr); + } + SCLogDebug("device with SW-ring enabled (devname): %s", devname); + } else if (ring == 0 && soft) { + /* Ring 0 of HW endpoint, and other endpoint is SW stack, + * so request SW host stack rings to match HW rings count. + */ + snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s@conf:host-rings=%d", ns->iface, + ring, strlen(optstr) ? "/" : "", optstr, ns->threads); + SCLogDebug("device with HW-ring enabled (devname): %s", devname); + } else { + /* Hardware ring other than ring 0, or both endpoints are HW + * and there is no host stack (SW) endpoint */ + snprintf(devname, sizeof(devname), "netmap:%s-%d%s%s", ns->iface, ring, + strlen(optstr) ? "/" : "", optstr); + SCLogDebug("device with HW-ring enabled (devname): %s", devname); + } +#endif } + strlcpy(pdev->ifname, ns->iface, sizeof(pdev->ifname)); +#if USE_NEW_NETMAP_API + /* have the netmap API parse device name and prepare the port descriptor for us */ + pdev->nmd = nmport_prepare(devname); + + if (pdev->nmd != NULL) { + /* For RX devices, set the nr_mode flag we need on the netmap port TX rings prior to opening + */ + if (read) { + pdev->nmd->reg.nr_flags |= NR_NO_TX_POLL; + } + + /* Now attempt to actually open the netmap port descriptor */ + if (nmport_open_desc(pdev->nmd) < 0) { + /* the open failed, so clean-up the descriptor and fall through to error handler */ + nmport_close(pdev->nmd); + pdev->nmd = NULL; + } + } +#else pdev->nmd = nm_open(devname, NULL, 0, NULL); +#endif + if (pdev->nmd == NULL) { - if (errno == EINVAL && opt_z[0] == 'z') { - SCLogNotice("got '%s' EINVAL: going to retry without 'z'", devname); - opt_z = ""; - goto retry; - } else if (errno == EINVAL && opt_x[0] == 'x') { - SCLogNotice("dev '%s' got EINVAL: going to retry without 'x'", devname); - opt_x = ""; - goto retry; + if (errno == EINVAL) { + if (opt_z[0] == 'z') { + SCLogNotice("got '%s' EINVAL: going to retry without 'z'", devname); + opt_z = ""; + goto retry; + } else if (opt_x[0] == 'x') { + SCLogNotice("dev '%s' got EINVAL: going to retry without 'x'", devname); + opt_x = ""; + goto retry; + } } - SCLogError(SC_ERR_NETMAP_CREATE, "opening devname %s failed: %s", - devname, strerror(errno)); - exit(EXIT_FAILURE); + NetmapCloseAll(); + FatalError(SC_ERR_FATAL, "opening devname %s failed: %s", devname, strerror(errno)); } - SCLogDebug("devname %s %s opened", devname, ns->iface); + + /* Work around bug in libnetmap library where "cur_{r,t}x_ring" values not initialized */ + SCLogDebug("%s -- cur rings: [%d, %d] first rings: [%d, %d]", devname, pdev->nmd->cur_rx_ring, + pdev->nmd->cur_tx_ring, pdev->nmd->first_rx_ring, pdev->nmd->first_tx_ring); + pdev->nmd->cur_rx_ring = pdev->nmd->first_rx_ring; + pdev->nmd->cur_tx_ring = pdev->nmd->first_tx_ring; + + SCLogInfo("devname [fd: %d] %s %s opened", pdev->nmd->fd, devname, ns->iface); pdev->direction = direction; pdev->ring = ring; + SCMutexInit(&pdev->netmap_dev_lock, NULL); TAILQ_INSERT_TAIL(&netmap_devlist, pdev, next); - SCLogNotice("opened %s from %s: %p", devname, ns->iface, pdev->nmd); SCMutexUnlock(&netmap_devlist_lock); *pdevice = pdev; @@ -363,33 +521,6 @@ error: return -1; } -/** - * \brief Close or dereference netmap device instance. - * \param pdev Netmap device instance. - * \return Zero on success. - */ -static int NetmapClose(NetmapDevice *dev) -{ - NetmapDevice *pdev, *tmp; - - SCMutexLock(&netmap_devlist_lock); - - TAILQ_FOREACH_SAFE(pdev, &netmap_devlist, next, tmp) { - if (pdev == dev) { - pdev->ref--; - if (!pdev->ref) { - nm_close(pdev->nmd); - SCFree(pdev); - } - SCMutexUnlock(&netmap_devlist_lock); - return 0; - } - } - - SCMutexUnlock(&netmap_devlist_lock); - return -1; -} - /** * \brief PcapDumpCounters * \param ntv @@ -442,25 +573,30 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi if (strcmp("workers", active_runmode) == 0) { ntv->flags |= NETMAP_FLAG_ZERO_COPY; SCLogDebug("Enabling zero copy mode for %s", aconf->in.iface); + } else if (strcmp("autofp", active_runmode) == 0) { + ntv->flags |= NETMAP_FLAG_EXCL_RING_ACCESS; } - if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, - (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0) != 0) { + /* Need to insure open of ring 0 conveys requested ring count for open */ + bool soft = aconf->in.sw_ring || aconf->out.sw_ring; + if (NetmapOpen(&aconf->in, &ntv->ifsrc, 1, 1, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, + soft) != 0) { goto error_ntv; } +#if !USE_NEW_NETMAP_API if (unlikely(aconf->in.sw_ring && aconf->in.threads > 1)) { SCLogError(SC_ERR_INVALID_VALUE, - "Interface '%s+'. " - "Thread count can't be greater than 1 for SW ring.", - aconf->iface_name); + "Interface '%s^'. " + "Thread count can't be greater than 1 for SW ring.", + aconf->iface_name); goto error_src; } +#endif if (aconf->in.copy_mode != NETMAP_COPY_MODE_NONE) { - SCLogDebug("IPS: opening out iface %s", aconf->out.iface); - if (NetmapOpen(&aconf->out, &ntv->ifdst, - 1, 0, false) != 0) { + if (NetmapOpen(&aconf->out, &ntv->ifdst, 1, 0, (ntv->flags & NETMAP_FLAG_ZERO_COPY) != 0, + soft) != 0) { goto error_src; } } @@ -491,6 +627,8 @@ static TmEcode ReceiveNetmapThreadInit(ThreadVars *tv, const void *initdata, voi } } + SCLogNotice("thread: %s polling on fd: %d", tv->name, ntv->ifsrc->nmd->fd); + *data = (void *)ntv; aconf->DerefFunc(aconf); SCReturnInt(TM_ECODE_OK); @@ -521,16 +659,32 @@ static TmEcode NetmapWritePacket(NetmapThreadVars *ntv, Packet *p) } DEBUG_VALIDATE_BUG_ON(ntv->ifdst == NULL); + /* Lock the destination netmap ring while writing to it */ + if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { + SCMutexLock(&ntv->ifdst->netmap_dev_lock); + } + + /* attempt to write the packet into the netmap ring buffer(s) */ +#if USE_NEW_NETMAP_API + if (nmport_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { + if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { + SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); + } +#else if (nm_inject(ntv->ifdst->nmd, GET_PKT_DATA(p), GET_PKT_LEN(p)) == 0) { - SCLogDebug("failed to send %s -> %s", - ntv->ifsrc->ifname, ntv->ifdst->ifname); +#endif + SCLogDebug("failed to send %s -> %s", ntv->ifsrc->ifname, ntv->ifdst->ifname); ntv->drops++; + return TM_ECODE_FAILED; } - SCLogDebug("sent succesfully: %s(%d)->%s(%d) (%u)", - ntv->ifsrc->ifname, ntv->ifsrc->ring, + + SCLogDebug("sent successfully: %s(%d)->%s(%d) (%u)", ntv->ifsrc->ifname, ntv->ifsrc->ring, ntv->ifdst->ifname, ntv->ifdst->ring, GET_PKT_LEN(p)); ioctl(ntv->ifdst->nmd->fd, NIOCTXSYNC, 0); + if (ntv->flags & NETMAP_FLAG_EXCL_RING_ACCESS) { + SCMutexUnlock(&ntv->ifdst->netmap_dev_lock); + } return TM_ECODE_OK; } @@ -549,13 +703,12 @@ static void NetmapReleasePacket(Packet *p) PacketFreeOrRelease(p); } -static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_char *d) +static void NetmapProcessPacket(NetmapThreadVars *ntv, const struct nm_pkthdr *ph) { - NetmapThreadVars *ntv = (NetmapThreadVars *)user; if (ntv->bpf_prog.bf_len) { struct pcap_pkthdr pkthdr = { {0, 0}, ph->len, ph->len }; - if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, d) == 0) { + if (pcap_offline_filter(&ntv->bpf_prog, &pkthdr, ph->buf) == 0) { return; } } @@ -573,12 +726,12 @@ static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_cha ntv->bytes += ph->len; if (ntv->flags & NETMAP_FLAG_ZERO_COPY) { - if (PacketSetData(p, (uint8_t *)d, ph->len) == -1) { + if (PacketSetData(p, (uint8_t *)ph->buf, ph->len) == -1) { TmqhOutputPacketpool(ntv->tv, p); return; } } else { - if (PacketCopyData(p, (uint8_t *)d, ph->len) == -1) { + if (PacketCopyData(p, (uint8_t *)ph->buf, ph->len) == -1) { TmqhOutputPacketpool(ntv->tv, p); return; } @@ -593,6 +746,87 @@ static void NetmapCallback(u_char *user, const struct nm_pkthdr *ph, const u_cha (void)TmThreadsSlotProcessPkt(ntv->tv, ntv->slot, p); } +/** + * \brief Copy netmap rings data into Packet structures. + * \param *d nmport_d (or nm_desc) netmap if structure. + * \param cnt int count of packets to read (-1 = all). + * \param *ntv NetmapThreadVars. + */ +#if USE_NEW_NETMAP_API +static TmEcode NetmapReadPackets(struct nmport_d *d, int cnt, NetmapThreadVars *ntv) +#else +static TmEcode NetmapReadPackets(struct nm_desc *d, int cnt, NetmapThreadVars *ntv) +#endif +{ + struct nm_pkthdr hdr; + int last_ring = d->last_rx_ring - d->first_rx_ring + 1; + int cur_ring, got = 0, cur_rx_ring = d->cur_rx_ring; + + memset(&hdr, 0, sizeof(hdr)); + hdr.flags = NM_MORE_PKTS; + + if (cnt == 0) + cnt = -1; + + for (cur_ring = 0; cur_ring < last_ring && cnt != got; cur_ring++, cur_rx_ring++) { + struct netmap_ring *ring; + + if (cur_rx_ring > d->last_rx_ring) + cur_rx_ring = d->first_rx_ring; + + ring = NETMAP_RXRING(d->nifp, cur_rx_ring); + + /* cycle through the non-empty ring slots to fetch their data */ + for (; !nm_ring_empty(ring) && cnt != got; got++) { + u_int idx, i; + u_char *oldbuf; + struct netmap_slot *slot; + + if (hdr.buf) { /* from previous round */ + NetmapProcessPacket(ntv, &hdr); + } + + i = ring->cur; + slot = &ring->slot[i]; + idx = slot->buf_idx; + d->cur_rx_ring = cur_rx_ring; + hdr.slot = slot; + oldbuf = hdr.buf = (u_char *)NETMAP_BUF(ring, idx); + hdr.len = hdr.caplen = slot->len; + + /* loop through the ring slots to get packet data */ + while (slot->flags & NS_MOREFRAG) { + /* packet can be fragmented across multiple slots, */ + /* so loop until we find the slot with the flag */ + /* cleared, signalling the end of the packet data. */ + u_char *nbuf; + u_int oldlen = slot->len; + i = nm_ring_next(ring, i); + slot = &ring->slot[i]; + hdr.len += slot->len; + nbuf = (u_char *)NETMAP_BUF(ring, slot->buf_idx); + + if (oldbuf != NULL && nbuf - oldbuf == ring->nr_buf_size && + oldlen == ring->nr_buf_size) { + hdr.caplen += slot->len; + oldbuf = nbuf; + } else { + oldbuf = NULL; + } + } + + hdr.ts = ring->ts; + ring->head = ring->cur = nm_ring_next(ring, i); + } + } + + if (hdr.buf) { /* from previous round */ + hdr.flags = 0; + NetmapProcessPacket(ntv, &hdr); + } + return got; +} + /** * \brief Main netmap reading loop function */ @@ -608,6 +842,7 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) fds.fd = ntv->ifsrc->nmd->fd; fds.events = POLLIN; + SCLogDebug("thread %s polling on %d", tv->name, fds.fd); for(;;) { if (unlikely(suricata_ctl_flags != 0)) { break; @@ -622,13 +857,13 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) /* error */ if (errno != EINTR) SCLogError(SC_ERR_NETMAP_READ, - "Error polling netmap from iface '%s': (%d" PRIu32 ") %s", - ntv->ifsrc->ifname, errno, strerror(errno)); + "Error polling netmap from iface '%s': (%d" PRIu32 ") %s", + ntv->ifsrc->ifname, errno, strerror(errno)); continue; } else if (r == 0) { /* no events, timeout */ - //SCLogDebug("(%s:%d-%d) Poll timeout", ntv->ifsrc->ifname, + // SCLogDebug("(%s:%d-%d) Poll timeout", ntv->ifsrc->ifname, // ntv->src_ring_from, ntv->src_ring_to); /* sync counters */ @@ -642,18 +877,21 @@ static TmEcode ReceiveNetmapLoop(ThreadVars *tv, void *data, void *slot) if (unlikely(fds.revents & POLL_EVENTS)) { if (fds.revents & POLLERR) { - //SCLogError(SC_ERR_NETMAP_READ, + SCLogError(SC_ERR_NETMAP_READ, + "Error reading netmap data via polling from iface '%s': (%d" PRIu32 ") %s", + ntv->ifsrc->ifname, errno, strerror(errno)); + // SCLogError(SC_ERR_NETMAP_READ, // "Error reading data from iface '%s': (%d" PRIu32 ") %s", // ntv->ifsrc->ifname, errno, strerror(errno)); } else if (fds.revents & POLLNVAL) { - SCLogError(SC_ERR_NETMAP_READ, - "Invalid polling request"); + SCLogError(SC_ERR_NETMAP_READ, "Invalid polling request"); } continue; } if (likely(fds.revents & POLLIN)) { - nm_dispatch(ntv->ifsrc->nmd, -1, NetmapCallback, (void *)ntv); + /* have data on RX ring, so copy to Packet for processing */ + NetmapReadPackets(ntv->ifsrc->nmd, -1, ntv); } NetmapDumpCounters(ntv); @@ -713,7 +951,7 @@ static TmEcode ReceiveNetmapThreadDeinit(ThreadVars *tv, void *data) /** * \brief Prepare netmap decode thread. - * \param tv Thread local avariables. + * \param tv Thread local variables. * \param initdata Thread config. * \param data Pointer to DecodeThreadVars placed here. */ diff --git a/src/source-netmap.h b/src/source-netmap.h index adfdaf57d3..b60d544d5c 100644 --- a/src/source-netmap.h +++ b/src/source-netmap.h @@ -1,4 +1,4 @@ -/* Copyright (C) 2014-2018 Open Information Security Foundation +/* Copyright (C) 2014-2021 Open Information Security Foundation * * You can copy, redistribute or modify this Program under the terms of * the GNU General Public License version 2 as published by the Free @@ -25,8 +25,6 @@ #ifndef __SOURCE_NETMAP_H__ #define __SOURCE_NETMAP_H__ -#include "queue.h" - /* copy modes */ enum { NETMAP_COPY_MODE_NONE,