-/* Copyright (C) 2011-2018 Open Information Security Foundation
+/* Copyright (C) 2011-2021 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
#include "suricata-common.h"
-#include "config.h"
#include "suricata.h"
#include "decode.h"
#include "packet-queue.h"
#include "tmqh-packetpool.h"
#include "source-af-packet.h"
#include "runmodes.h"
+#include "flow-storage.h"
+#include "util-validate.h"
#ifdef HAVE_AF_PACKET
#include <sys/ioctl.h>
#endif
+#if HAVE_LINUX_SOCKIOS_H
+#include <linux/sockios.h>
+#endif
+
#ifdef HAVE_PACKET_EBPF
#include "util-ebpf.h"
#include <bpf/libbpf.h>
tmm_modules[TMM_RECEIVEAFP].Func = NULL;
tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
- tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
}
tmm_modules[TMM_DECODEAFP].Func = NULL;
tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
- tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
tmm_modules[TMM_DECODEAFP].cap_flags = 0;
tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
}
#define POLL_TIMEOUT 100
-#ifndef TP_STATUS_USER_BUSY
-/* for new use latest bit available in tp_status */
-#define TP_STATUS_USER_BUSY (1 << 31)
+/* kernel flags defined for RX ring tp_status */
+#ifndef TP_STATUS_KERNEL
+#define TP_STATUS_KERNEL 0
+#endif
+#ifndef TP_STATUS_USER
+#define TP_STATUS_USER BIT_U32(0)
+#endif
+#ifndef TP_STATUS_COPY
+#define TP_STATUS_COPY BIT_U32(1)
+#endif
+#ifndef TP_STATUS_LOSING
+#define TP_STATUS_LOSING BIT_U32(2)
+#endif
+#ifndef TP_STATUS_CSUMNOTREADY
+#define TP_STATUS_CSUMNOTREADY BIT_U32(3)
#endif
-
#ifndef TP_STATUS_VLAN_VALID
-#define TP_STATUS_VLAN_VALID (1 << 4)
+#define TP_STATUS_VLAN_VALID BIT_U32(4)
+#endif
+#ifndef TP_STATUS_BLK_TMO
+#define TP_STATUS_BLK_TMO BIT_U32(5)
+#endif
+#ifndef TP_STATUS_VLAN_TPID_VALID
+#define TP_STATUS_VLAN_TPID_VALID BIT_U32(6)
+#endif
+#ifndef TP_STATUS_CSUM_VALID
+#define TP_STATUS_CSUM_VALID BIT_U32(7)
+#endif
+
+#ifndef TP_STATUS_TS_SOFTWARE
+#define TP_STATUS_TS_SOFTWARE BIT_U32(29)
+#endif
+#ifndef TP_STATUS_TS_SYS_HARDWARE
+#define TP_STATUS_TS_SYS_HARDWARE BIT_U32(30) /* kernel comment says: "deprecated, never set" */
+#endif
+#ifndef TP_STATUS_TS_RAW_HARDWARE
+#define TP_STATUS_TS_RAW_HARDWARE BIT_U32(31)
#endif
+#ifndef TP_STATUS_USER_BUSY
+/* HACK special setting in the tp_status field for frames we are
+ * still working on. This can happen in autofp mode where the
+ * capture thread goes around the ring and finds a frame that still
+ * hasn't been released by a worker thread.
+ *
+ * We use bits 29, 30, 31. 29 and 31 are software and hardware
+ * timestamps. 30 should not be set by the kernel at all. Combined
+ * they should never be set on the rx-ring together.
+ *
+ * The excessive casting is for handling the fact that the kernel
+ * defines almost all of these as int flags, not unsigned ints. */
+#define TP_STATUS_USER_BUSY \
+ (uint32_t)((uint32_t)TP_STATUS_TS_SOFTWARE | (uint32_t)TP_STATUS_TS_SYS_HARDWARE | \
+ (uint32_t)TP_STATUS_TS_RAW_HARDWARE)
+#endif
+#define FRAME_BUSY(tp_status) \
+ (((uint32_t)(tp_status) & (uint32_t)TP_STATUS_USER_BUSY) == (uint32_t)TP_STATUS_USER_BUSY)
+
enum {
AFP_READ_OK,
AFP_READ_FAILURE,
void *raw;
};
+#ifdef HAVE_PACKET_EBPF
static int AFPBypassCallback(Packet *p);
static int AFPXDPBypassCallback(Packet *p);
+#endif
#define MAX_MAPS 32
/**
uint16_t capture_kernel_packets;
uint16_t capture_kernel_drops;
uint16_t capture_errors;
+ uint16_t afpacket_spin;
+ uint16_t capture_afp_poll;
+ uint16_t capture_afp_poll_signal;
+ uint16_t capture_afp_poll_timeout;
+ uint16_t capture_afp_poll_data;
+ uint16_t capture_afp_poll_err;
/* handle state */
uint8_t afp_state;
/* IPS peer */
AFPPeer *mpeer;
- /* no mmap mode */
- uint8_t *data; /** Per function and thread data */
- int datalen; /** Length of per function and thread data */
- int cooked;
-
/*
* Init related members
*/
int buffer_size;
/* Filter */
const char *bpf_filter;
- int ebpf_lb_fd;
- int ebpf_filter_fd;
int promisc;
int down_count;
- int cluster_id;
+ uint16_t cluster_id;
int cluster_type;
int threads;
unsigned int ring_buflen;
uint8_t *ring_buf;
- uint8_t xdp_mode;
-
#ifdef HAVE_PACKET_EBPF
+ uint8_t xdp_mode;
+ int ebpf_lb_fd;
+ int ebpf_filter_fd;
struct ebpf_timeout_config ebpf_t_config;
#endif
} AFPThreadVars;
-TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
-TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
-void ReceiveAFPThreadExitStats(ThreadVars *, void *);
-TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
-TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
+static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
+static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
+static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
+static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
-TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
-TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
-TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
+static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
+static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
+static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
-TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
+static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
static int AFPGetDevFlags(int fd, const char *ifname);
static int AFPDerefSocket(AFPPeer* peer);
tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
- tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
}
-
/**
* \defgroup afppeers AFP peers list
*
(void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
(void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
(void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
+ (void)SC_ATOMIC_SET(ptv->mpeer->send_errors, 0);
}
/**
{
if (peer->flags & AFP_SOCK_PROTECT)
SCMutexDestroy(&peer->sock_protect);
- SC_ATOMIC_DESTROY(peer->socket);
- SC_ATOMIC_DESTROY(peer->if_idx);
- SC_ATOMIC_DESTROY(peer->state);
SCFree(peer);
}
if (peerslist.turn == 0)
return;
- if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
+ if ((SC_ATOMIC_ADD(peerslist.reached, 1) + 1) == peerslist.turn) {
SCLogInfo("All AFP capture threads are running.");
(void)SC_ATOMIC_SET(peerslist.reached, 0);
/* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
- tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
tmm_modules[TMM_DECODEAFP].cap_flags = 0;
tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
}
#endif
}
-/**
- * \brief AF packet read function.
- *
- * This function fills
- * From here the packets are picked up by the DecodeAFP thread.
- *
- * \param user pointer to AFPThreadVars
- * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
- */
-static int AFPRead(AFPThreadVars *ptv)
-{
- Packet *p = NULL;
- /* XXX should try to use read that get directly to packet */
- int offset = 0;
- int caplen;
- struct sockaddr_ll from;
- struct iovec iov;
- struct msghdr msg;
- struct cmsghdr *cmsg;
- union {
- struct cmsghdr cmsg;
- char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
- } cmsg_buf;
- unsigned char aux_checksum = 0;
-
- msg.msg_name = &from;
- msg.msg_namelen = sizeof(from);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- msg.msg_flags = 0;
-
- if (ptv->cooked)
- offset = SLL_HEADER_LEN;
- else
- offset = 0;
- iov.iov_len = ptv->datalen - offset;
- iov.iov_base = ptv->data + offset;
-
- caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
-
- if (caplen < 0) {
- SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
- errno);
- SCReturnInt(AFP_READ_FAILURE);
- }
-
- p = PacketGetFromQueueOrAlloc();
- if (p == NULL) {
- SCReturnInt(AFP_SURI_FAILURE);
- }
- PKT_SET_SRC(p, PKT_SRC_WIRE);
- if (ptv->flags & AFP_BYPASS) {
- p->BypassPacketsFlow = AFPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
- if (ptv->flags & AFP_XDPBYPASS) {
- p->BypassPacketsFlow = AFPXDPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
-
- /* get timestamp of packet via ioctl */
- if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
- SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
- errno);
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_READ_FAILURE);
- }
-
- ptv->pkts++;
- p->livedev = ptv->livedev;
-
- /* add forged header */
- if (ptv->cooked) {
- SllHdr * hdrp = (SllHdr *)ptv->data;
- /* XXX this is minimalist, but this seems enough */
- hdrp->sll_protocol = from.sll_protocol;
- }
-
- p->datalink = ptv->datalink;
- SET_PKT_LEN(p, caplen + offset);
- if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
- GET_PKT_LEN(p), p, GET_PKT_DATA(p));
-
- /* We only check for checksum disable */
- if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ptv->livedev->ignore_checksum) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ChecksumAutoModeCheck(ptv->pkts,
- SC_ATOMIC_GET(ptv->livedev->pkts),
- SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->livedev->ignore_checksum = 1;
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- } else {
- aux_checksum = 1;
- }
-
- /* List is NULL if we don't have activated auxiliary data */
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- struct tpacket_auxdata *aux;
-
- if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
- cmsg->cmsg_level != SOL_PACKET ||
- cmsg->cmsg_type != PACKET_AUXDATA)
- continue;
-
- aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
-
- if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- break;
- }
-
- if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- SCReturnInt(AFP_READ_OK);
-}
-
/**
* \brief AF packet write function.
*
* \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
*
*/
-static TmEcode AFPWritePacket(Packet *p, int version)
+static void AFPWritePacket(Packet *p, int version)
{
struct sockaddr_ll socket_address;
int socket;
- uint8_t *pstart;
- size_t plen;
- union thdr h;
- uint16_t vlan_tci = 0;
if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
- if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
- return TM_ECODE_OK;
+ if (PacketTestAction(p, ACTION_DROP)) {
+ return;
}
}
- if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
- return TM_ECODE_OK;
-
if (p->ethh == NULL) {
SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
- return TM_ECODE_FAILED;
+ return;
}
+
/* Index of the network device */
socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
/* Address length*/
SCMutexLock(&p->afp_v.peer->sock_protect);
socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
- h.raw = p->afp_v.relptr;
-
- if (version == TPACKET_V2) {
- /* Copy VLAN header from ring memory. For post june 2011 kernel we test
- * the flag. It is not defined for older kernel so we go best effort
- * and test for non zero value of the TCI header. */
- if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
- vlan_tci = h.h2->tp_vlan_tci;
- }
- } else {
-#ifdef HAVE_TPACKET_V3
- if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
- vlan_tci = h.h3->hv1.tp_vlan_tci;
+ if (sendto(socket, GET_PKT_DATA(p), GET_PKT_LEN(p), 0, (struct sockaddr *)&socket_address,
+ sizeof(struct sockaddr_ll)) < 0) {
+ if (SC_ATOMIC_ADD(p->afp_v.peer->send_errors, 1) == 0) {
+ SCLogWarning(SC_ERR_SOCKET, "sending packet failed on socket %d: %s", socket,
+ strerror(errno));
}
-#else
- /* Should not get here */
- BUG_ON(1);
-#endif
- }
-
- if (vlan_tci != 0) {
- pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
- plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
- /* move ethernet addresses */
- memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
- /* write vlan info */
- *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
- *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
- } else {
- pstart = GET_PKT_DATA(p);
- plen = GET_PKT_LEN(p);
- }
-
- if (sendto(socket, pstart, plen, 0,
- (struct sockaddr*) &socket_address,
- sizeof(struct sockaddr_ll)) < 0) {
- SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
- socket,
- strerror(errno));
- if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
- SCMutexUnlock(&p->afp_v.peer->sock_protect);
- return TM_ECODE_FAILED;
}
if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
SCMutexUnlock(&p->afp_v.peer->sock_protect);
-
- return TM_ECODE_OK;
}
static void AFPReleaseDataFromRing(Packet *p)
{
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
+
/* Need to be in copy mode and need to detect early release
where Ethernet header could not be set (and pseudo packet) */
- if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
AFPWritePacket(p, TPACKET_V2);
}
- if (AFPDerefSocket(p->afp_v.mpeer) == 0)
- goto cleanup;
+ BUG_ON(p->afp_v.relptr == NULL);
- if (p->afp_v.relptr) {
- union thdr h;
- h.raw = p->afp_v.relptr;
- h.h2->tp_status = TP_STATUS_KERNEL;
- }
+ union thdr h;
+ h.raw = p->afp_v.relptr;
+ h.h2->tp_status = TP_STATUS_KERNEL;
+
+ (void)AFPDerefSocket(p->afp_v.mpeer);
-cleanup:
AFPV_CLEANUP(&p->afp_v);
}
#ifdef HAVE_TPACKET_V3
static void AFPReleasePacketV3(Packet *p)
{
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
+
/* Need to be in copy mode and need to detect early release
where Ethernet header could not be set (and pseudo packet) */
- if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
AFPWritePacket(p, TPACKET_V3);
}
PacketFreeOrRelease(p);
PacketFreeOrRelease(p);
}
+/** \internal
+ * \brief recoverable error - release packet and
+ * return AFP_SURI_FAILURE
+ */
+static inline int AFPSuriFailure(AFPThreadVars *ptv, union thdr h)
+{
+ h.h2->tp_status = TP_STATUS_KERNEL;
+ if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
+ ptv->frame_offset = 0;
+ }
+ SCReturnInt(AFP_SURI_FAILURE);
+}
+
+static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p)
+{
+#ifdef HAVE_PACKET_EBPF
+ if (ptv->flags & AFP_BYPASS) {
+ p->BypassPacketsFlow = AFPBypassCallback;
+ p->afp_v.v4_map_fd = ptv->v4_map_fd;
+ p->afp_v.v6_map_fd = ptv->v6_map_fd;
+ p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
+ }
+ if (ptv->flags & AFP_XDPBYPASS) {
+ p->BypassPacketsFlow = AFPXDPBypassCallback;
+ p->afp_v.v4_map_fd = ptv->v4_map_fd;
+ p->afp_v.v6_map_fd = ptv->v6_map_fd;
+ p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
+ }
+#endif
+}
+
+/** \internal
+ * \brief setup packet for AFPReadFromRing
+ */
+static void AFPReadFromRingSetupPacket(
+ AFPThreadVars *ptv, union thdr h, const unsigned int tp_status, Packet *p)
+{
+ PKT_SET_SRC(p, PKT_SRC_WIRE);
+
+ /* flag the packet as TP_STATUS_USER_BUSY, which is ignore by the kernel, but
+ * acts as an indicator that we've reached a frame that is not yet released by
+ * us in autofp mode. It will be cleared when the frame gets released to the kernel. */
+ h.h2->tp_status |= TP_STATUS_USER_BUSY;
+ p->livedev = ptv->livedev;
+ p->datalink = ptv->datalink;
+ ptv->pkts++;
+
+ AFPReadApplyBypass(ptv, p);
+
+ if (h.h2->tp_len > h.h2->tp_snaplen) {
+ SCLogDebug("Packet length (%d) > snaplen (%d), truncating", h.h2->tp_len, h.h2->tp_snaplen);
+ }
+
+ /* get vlan id from header */
+ if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
+ (tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
+ p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
+ p->vlan_idx = 1;
+ p->afp_v.vlan_tci = h.h2->tp_vlan_tci;
+ }
+
+ (void)PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen);
+
+ p->ReleasePacket = AFPReleasePacket;
+ p->afp_v.relptr = h.raw;
+ if (ptv->flags & AFP_NEED_PEER) {
+ p->afp_v.mpeer = ptv->mpeer;
+ AFPRefSocket(ptv->mpeer);
+ } else {
+ p->afp_v.mpeer = NULL;
+ }
+ p->afp_v.copy_mode = ptv->copy_mode;
+ p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
+
+ /* Timestamp */
+ p->ts.tv_sec = h.h2->tp_sec;
+ p->ts.tv_usec = h.h2->tp_nsec / 1000;
+ SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", GET_PKT_LEN(p), p, GET_PKT_DATA(p));
+
+ /* We only check for checksum disable */
+ if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
+ if (ChecksumAutoModeCheck(ptv->pkts, SC_ATOMIC_GET(ptv->livedev->pkts),
+ SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
+ ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ }
+ } else {
+ if (tp_status & TP_STATUS_CSUMNOTREADY) {
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ }
+ }
+}
+
+static inline int AFPReadFromRingWaitForPacket(AFPThreadVars *ptv)
+{
+ union thdr h;
+ struct timeval start_time;
+ gettimeofday(&start_time, NULL);
+ uint64_t busy_loop_iter = 0;
+
+ /* busy wait loop until we have packets available */
+ while (1) {
+ if (unlikely(suricata_ctl_flags != 0)) {
+ break;
+ }
+ h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
+ if (unlikely(h.raw == NULL)) {
+ return AFP_READ_FAILURE;
+ }
+ const unsigned int tp_status = h.h2->tp_status;
+ if (tp_status == TP_STATUS_KERNEL) {
+ busy_loop_iter++;
+
+ struct timeval cur_time;
+ memset(&cur_time, 0, sizeof(cur_time));
+ uint64_t milliseconds =
+ ((cur_time.tv_sec - start_time.tv_sec) * 1000) +
+ (((1000000 + cur_time.tv_usec - start_time.tv_usec) / 1000) - 1000);
+ if (milliseconds > 1000) {
+ break;
+ }
+ continue;
+ }
+ break;
+ }
+ if (busy_loop_iter) {
+ StatsAddUI64(ptv->tv, ptv->afpacket_spin, busy_loop_iter);
+ }
+ return AFP_READ_OK;
+}
+
/**
* \brief AF packet read function for ring
*
*/
static int AFPReadFromRing(AFPThreadVars *ptv)
{
- Packet *p = NULL;
union thdr h;
- uint8_t emergency_flush = 0;
- int read_pkts = 0;
- int loop_start = -1;
+ bool emergency_flush = false;
+ const unsigned int start_pos = ptv->frame_offset;
+ /* poll() told us there are frames, so lets wait for at least
+ * one frame to become available. */
+ if (AFPReadFromRingWaitForPacket(ptv) != AFP_READ_OK)
+ return AFP_READ_FAILURE;
- /* Loop till we have packets available */
+ /* process the frames in the ring */
while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
break;
}
-
- /* Read packet from ring */
h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
if (unlikely(h.raw == NULL)) {
- /* Impossible we reach this point in normal condition, so trigger
- * a failure in reading */
- SCReturnInt(AFP_READ_FAILURE);
+ return AFP_READ_FAILURE;
}
-
- if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
- if (read_pkts == 0) {
- if (loop_start == -1) {
- loop_start = ptv->frame_offset;
- } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
- SCReturnInt(AFP_READ_OK);
- }
- if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
- ptv->frame_offset = 0;
- }
- continue;
- }
- if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
- SCReturnInt(AFP_KERNEL_DROP);
- } else {
- SCReturnInt(AFP_READ_OK);
- }
+ const unsigned int tp_status = h.h2->tp_status;
+ /* if we find a kernel frame we are done */
+ if (unlikely(tp_status == TP_STATUS_KERNEL)) {
+ break;
}
-
- read_pkts++;
- loop_start = -1;
-
- /* Our packet is still used by suricata, we exit read loop to
- * gain some time */
- if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
- SCReturnInt(AFP_READ_OK);
+ /* if in autofp mode the frame is still busy, return to poll */
+ if (unlikely(FRAME_BUSY(tp_status))) {
+ break;
}
+ emergency_flush |= ((tp_status & TP_STATUS_LOSING) != 0);
- if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
+ if ((ptv->flags & AFP_EMERGENCY_MODE) && emergency_flush) {
h.h2->tp_status = TP_STATUS_KERNEL;
goto next_frame;
}
- p = PacketGetFromQueueOrAlloc();
+ Packet *p = PacketGetFromQueueOrAlloc();
if (p == NULL) {
- SCReturnInt(AFP_SURI_FAILURE);
- }
- PKT_SET_SRC(p, PKT_SRC_WIRE);
- if (ptv->flags & AFP_BYPASS) {
- p->BypassPacketsFlow = AFPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
- if (ptv->flags & AFP_XDPBYPASS) {
- p->BypassPacketsFlow = AFPXDPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
-
- /* Suricata will treat packet so telling it is busy, this
- * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
- * function. */
- h.h2->tp_status |= TP_STATUS_USER_BUSY;
-
- ptv->pkts++;
- p->livedev = ptv->livedev;
- p->datalink = ptv->datalink;
-
- if (h.h2->tp_len > h.h2->tp_snaplen) {
- SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
- h.h2->tp_len, h.h2->tp_snaplen);
- }
-
- /* get vlan id from header */
- if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
- (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
- p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
- p->vlan_idx = 1;
- p->vlanh[0] = NULL;
- }
-
- if (ptv->flags & AFP_ZERO_COPY) {
- if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- } else {
- p->afp_v.relptr = h.raw;
- p->ReleasePacket = AFPReleasePacket;
- p->afp_v.mpeer = ptv->mpeer;
- AFPRefSocket(ptv->mpeer);
-
- p->afp_v.copy_mode = ptv->copy_mode;
- if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
- p->afp_v.peer = ptv->mpeer->peer;
- } else {
- p->afp_v.peer = NULL;
- }
- }
- } else {
- if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
- /* As we can possibly fail to copy the data due to invalid data, let's
- * skip this packet and switch to the next one.
- */
- h.h2->tp_status = TP_STATUS_KERNEL;
- if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
- ptv->frame_offset = 0;
- }
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- }
-
- /* Timestamp */
- p->ts.tv_sec = h.h2->tp_sec;
- p->ts.tv_usec = h.h2->tp_nsec/1000;
- SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
- GET_PKT_LEN(p), p, GET_PKT_DATA(p));
-
- /* We only check for checksum disable */
- if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ptv->livedev->ignore_checksum) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ChecksumAutoModeCheck(ptv->pkts,
- SC_ATOMIC_GET(ptv->livedev->pkts),
- SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->livedev->ignore_checksum = 1;
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- } else {
- if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- }
- if (h.h2->tp_status & TP_STATUS_LOSING) {
- emergency_flush = 1;
- AFPDumpCounters(ptv);
- }
-
- /* release frame if not in zero copy mode */
- if (!(ptv->flags & AFP_ZERO_COPY)) {
- h.h2->tp_status = TP_STATUS_KERNEL;
+ return AFPSuriFailure(ptv, h);
}
+ AFPReadFromRingSetupPacket(ptv, h, tp_status, p);
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- h.h2->tp_status = TP_STATUS_KERNEL;
- if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
- ptv->frame_offset = 0;
- }
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
+ return AFPSuriFailure(ptv, h);
}
-
next_frame:
if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
ptv->frame_offset = 0;
/* Get out of loop to be sure we will reach maintenance tasks */
- SCReturnInt(AFP_READ_OK);
+ if (ptv->frame_offset == start_pos)
+ break;
}
}
-
+ if (emergency_flush) {
+ AFPDumpCounters(ptv);
+ }
SCReturnInt(AFP_READ_OK);
}
SCReturnInt(AFP_SURI_FAILURE);
}
PKT_SET_SRC(p, PKT_SRC_WIRE);
- if (ptv->flags & AFP_BYPASS) {
- p->BypassPacketsFlow = AFPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- } else if (ptv->flags & AFP_XDPBYPASS) {
- p->BypassPacketsFlow = AFPXDPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
+
+ AFPReadApplyBypass(ptv, p);
ptv->pkts++;
p->livedev = ptv->livedev;
p->datalink = ptv->datalink;
- if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
+ if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
(ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
p->vlan_idx = 1;
- p->vlanh[0] = NULL;
+ p->afp_v.vlan_tci = ppd->hv1.tp_vlan_tci;
}
- if (ptv->flags & AFP_ZERO_COPY) {
- if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- p->afp_v.relptr = ppd;
- p->ReleasePacket = AFPReleasePacketV3;
- p->afp_v.mpeer = ptv->mpeer;
- AFPRefSocket(ptv->mpeer);
+ (void)PacketSetData(p, (unsigned char *)ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+ p->ReleasePacket = AFPReleasePacketV3;
+ p->afp_v.relptr = NULL;
+ p->afp_v.mpeer = NULL;
+ p->afp_v.copy_mode = ptv->copy_mode;
+ p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
- p->afp_v.copy_mode = ptv->copy_mode;
- if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
- p->afp_v.peer = ptv->mpeer->peer;
- } else {
- p->afp_v.peer = NULL;
- }
- } else {
- if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- }
/* Timestamp */
p->ts.tv_sec = ppd->tp_sec;
p->ts.tv_usec = ppd->tp_nsec/1000;
if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
p->flags |= PKT_IGNORE_CHECKSUM;
} else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ptv->livedev->ignore_checksum) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ChecksumAutoModeCheck(ptv->pkts,
+ if (ChecksumAutoModeCheck(ptv->pkts,
SC_ATOMIC_GET(ptv->livedev->pkts),
SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->livedev->ignore_checksum = 1;
+ ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
p->flags |= PKT_IGNORE_CHECKSUM;
}
} else {
}
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- TmqhOutputPacketpool(ptv->tv, p);
SCReturnInt(AFP_SURI_FAILURE);
}
static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
{
- int num_pkts = pbd->hdr.bh1.num_pkts, i;
- uint8_t *ppd;
- int ret = 0;
-
- ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
- for (i = 0; i < num_pkts; ++i) {
- ret = AFPParsePacketV3(ptv, pbd,
- (struct tpacket3_hdr *)ppd);
+ const int num_pkts = pbd->hdr.bh1.num_pkts;
+ uint8_t *ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
+
+ for (int i = 0; i < num_pkts; ++i) {
+ int ret = AFPParsePacketV3(ptv, pbd, (struct tpacket3_hdr *)ppd);
switch (ret) {
case AFP_READ_OK:
break;
static int AFPReadFromRingV3(AFPThreadVars *ptv)
{
#ifdef HAVE_TPACKET_V3
- struct tpacket_block_desc *pbd;
- int ret = 0;
-
/* Loop till we have packets available */
while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
break;
}
- pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
+ struct tpacket_block_desc *pbd =
+ (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
/* block is not ready to be read */
if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
SCReturnInt(AFP_READ_OK);
}
- ret = AFPWalkBlock(ptv, pbd);
+ int ret = AFPWalkBlock(ptv, pbd);
if (unlikely(ret != AFP_READ_OK)) {
AFPFlushBlock(pbd);
SCReturnInt(ret);
if (peer == NULL)
return 1;
- if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
- if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
- SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
- close(SC_ATOMIC_GET(peer->socket));
- return 0;
- }
+ if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 1) {
+ return 0;
}
return 1;
}
-static void AFPSwitchState(AFPThreadVars *ptv, int state)
+static void AFPCloseSocket(AFPThreadVars *ptv)
{
- ptv->afp_state = state;
- ptv->down_count = 0;
-
- AFPPeerUpdate(ptv);
+ if (ptv->mpeer != NULL)
+ BUG_ON(SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0);
- /* Do cleaning if switching to down state */
- if (state == AFP_STATE_DOWN) {
-#ifdef HAVE_TPACKET_V3
- if (ptv->flags & AFP_TPACKET_V3) {
- if (!ptv->ring.v3) {
- SCFree(ptv->ring.v3);
- ptv->ring.v3 = NULL;
- }
- } else {
-#endif
- if (ptv->ring.v2) {
- /* only used in reading phase, we can free it */
- SCFree(ptv->ring.v2);
- ptv->ring.v2 = NULL;
- }
+ if (ptv->flags & AFP_TPACKET_V3) {
#ifdef HAVE_TPACKET_V3
+ if (ptv->ring.v3) {
+ SCFree(ptv->ring.v3);
+ ptv->ring.v3 = NULL;
}
#endif
- if (ptv->socket != -1) {
- /* we need to wait for all packets to return data */
- if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
- SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
- munmap(ptv->ring_buf, ptv->ring_buflen);
- close(ptv->socket);
- ptv->socket = -1;
- }
+ } else {
+ if (ptv->ring.v2) {
+ /* only used in reading phase, we can free it */
+ SCFree(ptv->ring.v2);
+ ptv->ring.v2 = NULL;
}
}
- if (state == AFP_STATE_UP) {
- (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
+ if (ptv->socket != -1) {
+ SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
+ munmap(ptv->ring_buf, ptv->ring_buflen);
+ close(ptv->socket);
+ ptv->socket = -1;
}
}
-static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
- uint64_t *discarded_pkts)
+static void AFPSwitchState(AFPThreadVars *ptv, int state)
{
- struct sockaddr_ll from;
- struct iovec iov;
- struct msghdr msg;
- struct timeval ts;
- union {
- struct cmsghdr cmsg;
- char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
- } cmsg_buf;
-
-
- if (unlikely(suricata_ctl_flags != 0)) {
- return 1;
- }
-
- msg.msg_name = &from;
- msg.msg_namelen = sizeof(from);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- msg.msg_flags = 0;
-
- iov.iov_len = ptv->datalen;
- iov.iov_base = ptv->data;
-
- (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
+ ptv->afp_state = state;
+ ptv->down_count = 0;
- if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
- /* FIXME */
- return -1;
+ if (state == AFP_STATE_DOWN) {
+ /* cleanup is done on thread cleanup or try reopen
+ * as there may still be packets in autofp that
+ * are referencing us */
+ (void)SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1);
}
-
- if ((ts.tv_sec > synctv->tv_sec) ||
- (ts.tv_sec >= synctv->tv_sec &&
- ts.tv_usec > synctv->tv_usec)) {
- return 1;
+ if (state == AFP_STATE_UP) {
+ AFPPeerUpdate(ptv);
+ (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
}
- return 0;
}
static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
uint64_t *discarded_pkts)
{
- union thdr h;
-
if (unlikely(suricata_ctl_flags != 0)) {
return 1;
}
#ifdef HAVE_TPACKET_V3
if (ptv->flags & AFP_TPACKET_V3) {
int ret = 0;
- struct tpacket_block_desc *pbd;
- pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
+ struct tpacket_block_desc *pbd =
+ (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
*discarded_pkts += pbd->hdr.bh1.num_pkts;
struct tpacket3_hdr *ppd =
(struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
#endif
{
/* Read packet from ring */
+ union thdr h;
h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
if (h.raw == NULL) {
return -1;
}
- (*discarded_pkts)++;
+ if (h.h2->tp_status == TP_STATUS_KERNEL)
+ return 0;
+
if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
((time_t)h.h2->tp_sec == synctv->tv_sec &&
(suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
return 1;
}
+ (*discarded_pkts)++;
h.h2->tp_status = TP_STATUS_KERNEL;
if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
ptv->frame_offset = 0;
}
}
-
return 0;
}
if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
gettimeofday(&synctv, NULL);
}
- if (ptv->flags & AFP_RING_MODE) {
- r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
- } else {
- r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
- }
+ r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
SCLogDebug("Discarding on %s", ptv->tv->name);
switch (r) {
case 1:
return -1;
}
+ /* ref cnt 0, we can close the old socket */
+ AFPCloseSocket(ptv);
+
int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
if (afp_activate_r != 0) {
if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
ptv->slot = s->slot_next;
- if (ptv->flags & AFP_RING_MODE) {
- if (ptv->flags & AFP_TPACKET_V3) {
- AFPReadFunc = AFPReadFromRingV3;
- } else {
- AFPReadFunc = AFPReadFromRing;
- }
+ if (ptv->flags & AFP_TPACKET_V3) {
+ AFPReadFunc = AFPReadFromRingV3;
} else {
- AFPReadFunc = AFPRead;
+ AFPReadFunc = AFPReadFromRing;
}
if (ptv->afp_state == AFP_STATE_DOWN) {
* us from alloc'ing packets at line rate */
PacketPoolWait();
+ StatsIncr(ptv->tv, ptv->capture_afp_poll);
+
r = poll(&fds, 1, POLL_TIMEOUT);
if (suricata_ctl_flags != 0) {
if (r > 0 &&
(fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_signal);
if (fds.revents & (POLLHUP | POLLRDHUP)) {
AFPSwitchState(ptv, AFP_STATE_DOWN);
continue;
continue;
}
} else if (r > 0) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_data);
r = AFPReadFunc(ptv);
switch (r) {
case AFP_READ_OK:
break;
}
} else if (unlikely(r == 0)) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_timeout);
/* Trigger one dump of stats every second */
current_time = time(NULL);
if (current_time != last_dump) {
last_dump = current_time;
}
/* poll timed out, lets see handle our timeout path */
- TmThreadsCaptureHandleTimeout(tv, ptv->slot, NULL);
+ TmThreadsCaptureHandleTimeout(tv, NULL);
} else if ((r < 0) && (errno != EINTR)) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_err);
SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
ptv->iface,
errno, strerror(errno));
}
#endif
- /* Let's reserve head room so we can add the VLAN header in IPS
- * or TAP mode before write the packet */
- if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
- /* Only one vlan is extracted from AFP header so
- * one VLAN header length is enough. */
- int reserve = VLAN_HEADER_LEN;
- if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
- sizeof(reserve)) < 0) {
- SCLogError(SC_ERR_AFP_CREATE,
- "Can't activate reserve on packet socket: %s",
- strerror(errno));
- return AFP_FATAL_ERROR;
- }
+ /* Reserve head room for a VLAN header. One vlan is extracted from AFP header
+ * so one VLAN header length is enough. */
+ int reserve = VLAN_HEADER_LEN;
+ if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *)&reserve, sizeof(reserve)) <
+ 0) {
+ SCLogError(
+ SC_ERR_AFP_CREATE, "Can't activate reserve on packet socket: %s", strerror(errno));
+ return AFP_FATAL_ERROR;
}
/* Allocate RX ring */
/** \brief test if we can use FANOUT. Older kernels like those in
* CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
*/
-int AFPIsFanoutSupported(void)
+int AFPIsFanoutSupported(uint16_t cluster_id)
{
#ifdef HAVE_PACKET_FANOUT
int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (fd < 0)
return 0;
- uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
- uint16_t id = 1;
- uint32_t option = (mode << 16) | (id & 0xffff);
+ uint32_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
+ uint32_t option = (mode << 16) | cluster_id;
int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
close(fd);
if (r < 0) {
- SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
+ SCLogError(SC_ERR_INVALID_VALUE, "fanout not supported by kernel: "
+ "Kernel too old or cluster-id %d already in use.", cluster_id);
return 0;
}
return 1;
#ifdef HAVE_PACKET_FANOUT
/* add binded socket to fanout group */
if (ptv->threads > 1) {
- uint16_t mode = ptv->cluster_type;
+ uint32_t mode = ptv->cluster_type;
uint16_t id = ptv->cluster_id;
uint32_t option = (mode << 16) | (id & 0xffff);
r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
}
#endif
- if (ptv->flags & AFP_RING_MODE) {
- ret = AFPSetupRing(ptv, devname);
- if (ret != 0)
- goto socket_err;
- }
+ ret = AFPSetupRing(ptv, devname);
+ if (ret != 0)
+ goto socket_err;
SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
- switch (ptv->datalink) {
- case ARPHRD_PPP:
- case ARPHRD_ATM:
- ptv->cooked = 1;
- break;
- }
TmEcode rc = AFPSetBPFFilter(ptv);
if (rc == TM_ECODE_FAILED) {
*
* \param mapfd file descriptor of the protocol bypass table
* \param key data to use as key in the table
- * \param pkts_cnt packet count for the half flow
- * \param bytes_cnt bytes count for the half flow
* \return 0 in case of error, 1 if success
*/
-static int AFPInsertHalfFlow(int mapd, void *key, uint32_t hash,
- uint64_t pkts_cnt, uint64_t bytes_cnt,
- unsigned int nr_cpus)
+static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
{
BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
unsigned int i;
for (i = 0; i < nr_cpus; i++) {
BPF_PERCPU(value, i).packets = 0;
BPF_PERCPU(value, i).bytes = 0;
- BPF_PERCPU(value, i).hash = hash;
}
if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
switch (errno) {
/* no more place in the hash */
case E2BIG:
return 0;
+ /* no more place in the hash for some hardware bypass */
+ case EAGAIN:
+ return 0;
/* if we already have the key then bypass is a success */
case EEXIST:
return 1;
}
return 1;
}
-#endif
+
+static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
+ int family)
+{
+ FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
+ if (fc) {
+ EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
+ if (eb == NULL) {
+ EBPFDeleteKey(map_fd, key0);
+ EBPFDeleteKey(map_fd, key1);
+ LiveDevAddBypassFail(p->livedev, 1, family);
+ SCFree(key0);
+ SCFree(key1);
+ return 0;
+ }
+ eb->key[0] = key0;
+ eb->key[1] = key1;
+ eb->mapfd = map_fd;
+ eb->cpus_count = p->afp_v.nr_cpus;
+ fc->BypassUpdate = EBPFBypassUpdate;
+ fc->BypassFree = EBPFBypassFree;
+ fc->bypass_data = eb;
+ } else {
+ EBPFDeleteKey(map_fd, key0);
+ EBPFDeleteKey(map_fd, key1);
+ LiveDevAddBypassFail(p->livedev, 1, family);
+ SCFree(key0);
+ SCFree(key1);
+ return 0;
+ }
+
+ LiveDevAddBypassStats(p->livedev, 1, family);
+ LiveDevAddBypassSuccess(p->livedev, 1, family);
+ return 1;
+}
/**
* Bypass function for AF_PACKET capture in eBPF mode
*/
static int AFPBypassCallback(Packet *p)
{
-#ifdef HAVE_PACKET_EBPF
SCLogDebug("Calling af_packet callback function");
/* Only bypass TCP and UDP */
if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
return 0;
}
+ /* If we don't have a flow attached to packet the eBPF map entries
+ * will be destroyed at first flow bypass manager pass as we won't
+ * find any associated entry */
+ if (p->flow == NULL) {
+ return 0;
+ }
/* Bypassing tunneled packets is currently not supported
* because we can't discard the inner packet only due to
* primitive parsing in eBPF */
if (p->afp_v.v4_map_fd == -1) {
return 0;
}
- struct flowv4_keys key = {};
- key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
- key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
- key.port16[0] = GET_TCP_SRC_PORT(p);
- key.port16[1] = GET_TCP_DST_PORT(p);
- key.vlan_id[0] = p->vlan_id[0];
- key.vlan_id[1] = p->vlan_id[1];
-
- key.ip_proto = IPV4_GET_IPPROTO(p);
- if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
- p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
+ struct flowv4_keys *keys[2];
+ keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[0] == NULL) {
+ return 0;
+ }
+ keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
+ keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
+ keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
+ keys[0]->port16[1] = GET_TCP_DST_PORT(p);
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+
+ if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
+ }
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
return 0;
}
- key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
- key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
- key.port16[0] = GET_TCP_DST_PORT(p);
- key.port16[1] = GET_TCP_SRC_PORT(p);
- if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
- p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
+ keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
+ keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
+ keys[1]->port16[0] = GET_TCP_DST_PORT(p);
+ keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
return 0;
}
EBPFUpdateFlow(p->flow, p, NULL);
- return 1;
+ return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
}
/* For IPv6 case we don't handle extended header in eBPF */
if (PKT_IS_IPV6(p) &&
return 0;
}
SCLogDebug("add an IPv6");
- struct flowv6_keys key = {};
- for (i = 0; i < 4; i++) {
- key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
- key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
- }
- key.port16[0] = GET_TCP_SRC_PORT(p);
- key.port16[1] = GET_TCP_DST_PORT(p);
- key.vlan_id[0] = p->vlan_id[0];
- key.vlan_id[1] = p->vlan_id[1];
- key.ip_proto = IPV6_GET_NH(p);
- if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
- p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
+ struct flowv6_keys *keys[2];
+ keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[0] == NULL) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
return 0;
}
for (i = 0; i < 4; i++) {
- key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
- key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+ keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+ keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
+ }
+ keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
+ keys[0]->port16[1] = GET_TCP_DST_PORT(p);
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+
+ if (IPV6_GET_NH(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
}
- key.port16[0] = GET_TCP_DST_PORT(p);
- key.port16[1] = GET_TCP_SRC_PORT(p);
- if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
- p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
return 0;
}
- EBPFUpdateFlow(p->flow, p, NULL);
- return 1;
+ keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ return 0;
+ }
+ for (i = 0; i < 4; i++) {
+ keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
+ keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+ }
+ keys[1]->port16[0] = GET_TCP_DST_PORT(p);
+ keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ if (p->flow)
+ EBPFUpdateFlow(p->flow, p, NULL);
+ return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
}
-#endif
return 0;
}
*/
static int AFPXDPBypassCallback(Packet *p)
{
-#ifdef HAVE_PACKET_XDP
SCLogDebug("Calling af_packet callback function");
/* Only bypass TCP and UDP */
if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
return 0;
}
+ /* If we don't have a flow attached to packet the eBPF map entries
+ * will be destroyed at first flow bypass manager pass as we won't
+ * find any associated entry */
+ if (p->flow == NULL) {
+ return 0;
+ }
/* Bypassing tunneled packets is currently not supported
* because we can't discard the inner packet only due to
* primitive parsing in eBPF */
return 0;
}
if (PKT_IS_IPV4(p)) {
- struct flowv4_keys key = {};
+ struct flowv4_keys *keys[2];
+ keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[0] == NULL) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ return 0;
+ }
if (p->afp_v.v4_map_fd == -1) {
+ SCFree(keys[0]);
return 0;
}
- key.src = p->flow->src.addr_data32[0];
- key.dst = p->flow->dst.addr_data32[0];
+ keys[0]->src = p->src.addr_data32[0];
+ keys[0]->dst = p->dst.addr_data32[0];
/* In the XDP filter we get port from parsing of packet and not from skb
* (as in eBPF filter) so we need to pass from host to network order */
- key.port16[0] = htons(p->flow->sp);
- key.port16[1] = htons(p->flow->dp);
- key.vlan_id[0] = p->vlan_id[0];
- key.vlan_id[1] = p->vlan_id[1];
- key.ip_proto = IPV4_GET_IPPROTO(p);
- if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
- p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
+ keys[0]->port16[0] = htons(p->sp);
+ keys[0]->port16[1] = htons(p->dp);
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+ if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
+ }
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
return 0;
}
- key.src = p->flow->dst.addr_data32[0];
- key.dst = p->flow->src.addr_data32[0];
- key.port16[0] = htons(p->flow->dp);
- key.port16[1] = htons(p->flow->sp);
- if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
- p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
+ keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
return 0;
}
- return 1;
+ keys[1]->src = p->dst.addr_data32[0];
+ keys[1]->dst = p->src.addr_data32[0];
+ keys[1]->port16[0] = htons(p->dp);
+ keys[1]->port16[1] = htons(p->sp);
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
}
/* For IPv6 case we don't handle extended header in eBPF */
if (PKT_IS_IPV6(p) &&
return 0;
}
int i;
- struct flowv6_keys key = {};
- for (i = 0; i < 4; i++) {
- key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
- key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
- }
- key.port16[0] = htons(GET_TCP_SRC_PORT(p));
- key.port16[1] = htons(GET_TCP_DST_PORT(p));
- key.vlan_id[0] = p->vlan_id[0];
- key.vlan_id[1] = p->vlan_id[1];
- key.ip_proto = IPV6_GET_NH(p);
- if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
- p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
+ struct flowv6_keys *keys[2];
+ keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[0] == NULL) {
return 0;
}
+
for (i = 0; i < 4; i++) {
- key.src[i] = GET_IPV6_DST_ADDR(p)[i];
- key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
+ keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
+ keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
+ }
+ keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
+ keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+ if (IPV6_GET_NH(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
}
- key.port16[0] = htons(GET_TCP_DST_PORT(p));
- key.port16[1] = htons(GET_TCP_SRC_PORT(p));
- if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
- p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
return 0;
}
- return 1;
+ keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ return 0;
+ }
+ for (i = 0; i < 4; i++) {
+ keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
+ keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
+ }
+ keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
+ keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
}
-#endif
return 0;
}
+bool g_flowv4_ok = true;
+bool g_flowv6_ok = true;
+
+#endif /* HAVE_PACKET_EBPF */
+
/**
* \brief Init function for ReceiveAFP.
*
memset(ptv, 0, sizeof(AFPThreadVars));
ptv->tv = tv;
- ptv->cooked = 0;
strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
if (afpconfig->bpf_filter) {
ptv->bpf_filter = afpconfig->bpf_filter;
}
+#ifdef HAVE_PACKET_EBPF
ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
ptv->xdp_mode = afpconfig->xdp_mode;
-#ifdef HAVE_PACKET_EBPF
ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
if (ptv->v4_map_fd == -1) {
- SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
+ if (g_flowv4_ok == false) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
+ "flow_table_v4");
+ g_flowv4_ok = true;
+ }
}
ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
if (ptv->v6_map_fd == -1) {
- SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
+ if (g_flowv6_ok) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
+ "flow_table_v6");
+ g_flowv6_ok = false;
+ }
}
}
ptv->ebpf_t_config = afpconfig->ebpf_t_config;
ptv->tv);
ptv->capture_errors = StatsRegisterCounter("capture.errors",
ptv->tv);
+
+ ptv->afpacket_spin = StatsRegisterAvgCounter("capture.afpacket.busy_loop_avg", ptv->tv);
+
+ ptv->capture_afp_poll = StatsRegisterCounter("capture.afpacket.polls", ptv->tv);
+ ptv->capture_afp_poll_signal = StatsRegisterCounter("capture.afpacket.poll_signal", ptv->tv);
+ ptv->capture_afp_poll_timeout = StatsRegisterCounter("capture.afpacket.poll_timeout", ptv->tv);
+ ptv->capture_afp_poll_data = StatsRegisterCounter("capture.afpacket.poll_data", ptv->tv);
+ ptv->capture_afp_poll_err = StatsRegisterCounter("capture.afpacket.poll_errors", ptv->tv);
#endif
ptv->copy_mode = afpconfig->copy_mode;
SCReturnInt(TM_ECODE_FAILED);
}
-#define T_DATA_SIZE 70000
- ptv->data = SCMalloc(T_DATA_SIZE);
- if (ptv->data == NULL) {
- afpconfig->DerefFunc(afpconfig);
- SCFree(ptv);
- SCReturnInt(TM_ECODE_FAILED);
- }
- ptv->datalen = T_DATA_SIZE;
-#undef T_DATA_SIZE
-
*data = (void *)ptv;
afpconfig->DerefFunc(afpconfig);
- /* A bit strange to have this here but we only have vlan information
- * during reading so we need to know if we want to keep vlan during
- * the capture phase */
- int vlanbool = 0;
- if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
- ptv->flags |= AFP_VLAN_DISABLED;
- }
-
/* If kernel is older than 3.0, VLAN is not stripped so we don't
* get the info from packet extended header but we will use a standard
* parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
- if (! SCKernelVersionIsAtLeast(3, 0)) {
- ptv->flags |= AFP_VLAN_DISABLED;
+ if (SCKernelVersionIsAtLeast(3, 0)) {
+ ptv->flags |= AFP_VLAN_IN_HEADER;
}
SCReturnInt(TM_ECODE_OK);
EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
}
#endif
- if (ptv->data != NULL) {
- SCFree(ptv->data);
- ptv->data = NULL;
- }
- ptv->datalen = 0;
ptv->bpf_filter = NULL;
if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
SCReturnInt(TM_ECODE_OK);
}
+/** \internal
+ * \brief add a VLAN header into the raw data for inspection, logging
+ * and sending out in IPS mode
+ *
+ * The kernel doesn't provide the first VLAN header the raw packet data,
+ * but instead feeds it to us through meta data. For logging and IPS
+ * we need to put it back into the raw data. Luckily there is some head
+ * room in the original data so its enough to move the ethernet header
+ * a bit to make space for the VLAN header.
+ */
+static void UpdateRawDataForVLANHdr(Packet *p)
+{
+ if (p->afp_v.vlan_tci != 0) {
+ uint8_t *pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
+ size_t plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
+ /* move ethernet addresses */
+ memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
+ /* write vlan info */
+ *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
+ *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(p->afp_v.vlan_tci);
+
+ /* update the packet raw data pointer to start at the new offset */
+ (void)PacketSetData(p, pstart, plen);
+ /* update ethernet header pointer to point to the new start of the data */
+ p->ethh = (void *)pstart;
+ }
+}
+
/**
* \brief This function passes off to link type decoders.
*
- * DecodeAFP reads packets from the PacketQueue and passes
+ * DecodeAFP decodes packets from AF_PACKET and passes
* them off to the proper link type decoder.
*
* \param t pointer to ThreadVars
* \param p pointer to the current packet
* \param data pointer that gets cast into AFPThreadVars for ptv
- * \param pq pointer to the current PacketQueue
*/
-TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
+TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
{
SCEnter();
+
+ const bool afp_vlan_hdr = p->vlan_idx != 0;
DecodeThreadVars *dtv = (DecodeThreadVars *)data;
- /* XXX HACK: flow timeout can call us for injected pseudo packets
- * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
- if (p->flags & PKT_PSEUDO_STREAM_END)
- return TM_ECODE_OK;
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
/* update counters */
DecodeUpdatePacketCounters(tv, dtv, p);
- /* If suri has set vlan during reading, we increase vlan counter */
- if (p->vlan_idx) {
- StatsIncr(tv, dtv->counter_vlan);
- }
-
/* call the decoder */
- switch (p->datalink) {
- case LINKTYPE_ETHERNET:
- DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_LINUX_SLL:
- DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_PPP:
- DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_RAW:
- case LINKTYPE_GRE_OVER_IP:
- DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_NULL:
- DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- default:
- SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
- break;
+ DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
+ /* post-decoding put vlan hdr back into the raw data) */
+ if (afp_vlan_hdr) {
+ StatsIncr(tv, dtv->counter_vlan);
+ UpdateRawDataForVLANHdr(p);
}
PacketDecodeFinalize(tv, dtv, p);
TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
{
SCEnter();
- DecodeThreadVars *dtv = NULL;
-
- dtv = DecodeThreadVarsAlloc(tv);
-
+ DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
if (dtv == NULL)
SCReturnInt(TM_ECODE_FAILED);