-/* Copyright (C) 2011-2017 Open Information Security Foundation
+/* Copyright (C) 2011-2021 Open Information Security Foundation
*
* You can copy, redistribute or modify this Program under the terms of
* the GNU General Public License version 2 as published by the Free
*
* AF_PACKET socket acquisition support
*
- * \todo watch other interface event to detect suppression of the monitored
- * interface
*/
+#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
+#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
#include "suricata-common.h"
-#include "config.h"
#include "suricata.h"
#include "decode.h"
#include "packet-queue.h"
#include "tm-threads.h"
#include "tm-threads-common.h"
#include "conf.h"
+#include "util-cpu.h"
#include "util-debug.h"
#include "util-device.h"
+#include "util-ebpf.h"
#include "util-error.h"
#include "util-privs.h"
#include "util-optimize.h"
#include "tmqh-packetpool.h"
#include "source-af-packet.h"
#include "runmodes.h"
+#include "flow-storage.h"
+#include "util-validate.h"
#ifdef HAVE_AF_PACKET
#include <sys/ioctl.h>
#endif
+#if HAVE_LINUX_SOCKIOS_H
+#include <linux/sockios.h>
+#endif
+
+#ifdef HAVE_PACKET_EBPF
+#include "util-ebpf.h"
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+#endif
+
+struct bpf_program {
+ unsigned int bf_len;
+ struct bpf_insn *bf_insns;
+};
+
+#ifdef HAVE_PCAP_H
+#include <pcap.h>
+#endif
+
+#ifdef HAVE_PCAP_PCAP_H
+#include <pcap/pcap.h>
+#endif
+
+#include "util-bpf.h"
+
#if HAVE_LINUX_IF_ETHER_H
#include <linux/if_ether.h>
#endif
tmm_modules[TMM_RECEIVEAFP].Func = NULL;
tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
- tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
}
/**
* \brief Registration Function for DecodeAFP.
- * \todo Unit tests are needed for this module.
*/
void TmModuleDecodeAFPRegister (void)
{
tmm_modules[TMM_DECODEAFP].Func = NULL;
tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
- tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
tmm_modules[TMM_DECODEAFP].cap_flags = 0;
tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
}
#define POLL_TIMEOUT 100
-#ifndef TP_STATUS_USER_BUSY
-/* for new use latest bit available in tp_status */
-#define TP_STATUS_USER_BUSY (1 << 31)
+/* kernel flags defined for RX ring tp_status */
+#ifndef TP_STATUS_KERNEL
+#define TP_STATUS_KERNEL 0
+#endif
+#ifndef TP_STATUS_USER
+#define TP_STATUS_USER BIT_U32(0)
+#endif
+#ifndef TP_STATUS_COPY
+#define TP_STATUS_COPY BIT_U32(1)
+#endif
+#ifndef TP_STATUS_LOSING
+#define TP_STATUS_LOSING BIT_U32(2)
+#endif
+#ifndef TP_STATUS_CSUMNOTREADY
+#define TP_STATUS_CSUMNOTREADY BIT_U32(3)
#endif
-
#ifndef TP_STATUS_VLAN_VALID
-#define TP_STATUS_VLAN_VALID (1 << 4)
+#define TP_STATUS_VLAN_VALID BIT_U32(4)
+#endif
+#ifndef TP_STATUS_BLK_TMO
+#define TP_STATUS_BLK_TMO BIT_U32(5)
+#endif
+#ifndef TP_STATUS_VLAN_TPID_VALID
+#define TP_STATUS_VLAN_TPID_VALID BIT_U32(6)
+#endif
+#ifndef TP_STATUS_CSUM_VALID
+#define TP_STATUS_CSUM_VALID BIT_U32(7)
#endif
-/** protect pfring_set_bpf_filter, as it is not thread safe */
-static SCMutex afpacket_bpf_set_filter_lock = SCMUTEX_INITIALIZER;
+#ifndef TP_STATUS_TS_SOFTWARE
+#define TP_STATUS_TS_SOFTWARE BIT_U32(29)
+#endif
+#ifndef TP_STATUS_TS_SYS_HARDWARE
+#define TP_STATUS_TS_SYS_HARDWARE BIT_U32(30) /* kernel comment says: "deprecated, never set" */
+#endif
+#ifndef TP_STATUS_TS_RAW_HARDWARE
+#define TP_STATUS_TS_RAW_HARDWARE BIT_U32(31)
+#endif
+
+#ifndef TP_STATUS_USER_BUSY
+/* HACK special setting in the tp_status field for frames we are
+ * still working on. This can happen in autofp mode where the
+ * capture thread goes around the ring and finds a frame that still
+ * hasn't been released by a worker thread.
+ *
+ * We use bits 29, 30, 31. 29 and 31 are software and hardware
+ * timestamps. 30 should not be set by the kernel at all. Combined
+ * they should never be set on the rx-ring together.
+ *
+ * The excessive casting is for handling the fact that the kernel
+ * defines almost all of these as int flags, not unsigned ints. */
+#define TP_STATUS_USER_BUSY \
+ (uint32_t)((uint32_t)TP_STATUS_TS_SOFTWARE | (uint32_t)TP_STATUS_TS_SYS_HARDWARE | \
+ (uint32_t)TP_STATUS_TS_RAW_HARDWARE)
+#endif
+#define FRAME_BUSY(tp_status) \
+ (((uint32_t)(tp_status) & (uint32_t)TP_STATUS_USER_BUSY) == (uint32_t)TP_STATUS_USER_BUSY)
enum {
AFP_READ_OK,
AFP_READ_FAILURE,
- AFP_FAILURE,
+ /** Error during treatment by other functions of Suricata */
+ AFP_SURI_FAILURE,
AFP_KERNEL_DROP,
};
void *raw;
};
+#ifdef HAVE_PACKET_EBPF
+static int AFPBypassCallback(Packet *p);
+static int AFPXDPBypassCallback(Packet *p);
+#endif
+
+#define MAX_MAPS 32
/**
* \brief Structure to hold thread specific variables.
*/
typedef struct AFPThreadVars_
{
- union {
- char *ring_v2;
- struct iovec *ring_v3;
- };
+ union AFPRing {
+ char *v2;
+ struct iovec *v3;
+ } ring;
/* counters */
uint64_t pkts;
/* data link type for the thread */
uint32_t datalink;
+#ifdef HAVE_PACKET_EBPF
+ /* File descriptor of the IPv4 flow bypass table maps */
+ int v4_map_fd;
+ /* File descriptor of the IPv6 flow bypass table maps */
+ int v6_map_fd;
+#endif
+
unsigned int frame_offset;
ChecksumValidationMode checksum_mode;
/* references to packet and drop counters */
uint16_t capture_kernel_packets;
uint16_t capture_kernel_drops;
+ uint16_t capture_errors;
+ uint16_t afpacket_spin;
+ uint16_t capture_afp_poll;
+ uint16_t capture_afp_poll_signal;
+ uint16_t capture_afp_poll_timeout;
+ uint16_t capture_afp_poll_data;
+ uint16_t capture_afp_poll_err;
/* handle state */
uint8_t afp_state;
uint8_t copy_mode;
- uint8_t flags;
+ unsigned int flags;
/* IPS peer */
AFPPeer *mpeer;
- /* no mmap mode */
- uint8_t *data; /** Per function and thread data */
- int datalen; /** Length of per function and thread data */
- int cooked;
-
/*
* Init related members
*/
int down_count;
- int cluster_id;
+ uint16_t cluster_id;
int cluster_type;
int threads;
- union {
- struct tpacket_req req;
+ union AFPTpacketReq {
+ struct tpacket_req v2;
#ifdef HAVE_TPACKET_V3
- struct tpacket_req3 req3;
+ struct tpacket_req3 v3;
#endif
- };
+ } req;
char iface[AFP_IFACE_NAME_LENGTH];
/* IPS output iface */
char out_iface[AFP_IFACE_NAME_LENGTH];
+ /* mmap'ed ring buffer */
+ unsigned int ring_buflen;
+ uint8_t *ring_buf;
+
+#ifdef HAVE_PACKET_EBPF
+ uint8_t xdp_mode;
+ int ebpf_lb_fd;
+ int ebpf_filter_fd;
+ struct ebpf_timeout_config ebpf_t_config;
+#endif
+
} AFPThreadVars;
-TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
-TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
-void ReceiveAFPThreadExitStats(ThreadVars *, void *);
-TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
-TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
+static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
+static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
+static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
+static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
-TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
-TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
-TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
+static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
+static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
+static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
-TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
+static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
static int AFPGetDevFlags(int fd, const char *ifname);
static int AFPDerefSocket(AFPPeer* peer);
static int AFPRefSocket(AFPPeer* peer);
+
/**
* \brief Registration Function for RecieveAFP.
* \todo Unit tests are needed for this module.
tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
- tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
-}
+}
/**
* \defgroup afppeers AFP peers list
(void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
(void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
(void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
+ (void)SC_ATOMIC_SET(ptv->mpeer->send_errors, 0);
}
/**
{
if (peer->flags & AFP_SOCK_PROTECT)
SCMutexDestroy(&peer->sock_protect);
- SC_ATOMIC_DESTROY(peer->socket);
- SC_ATOMIC_DESTROY(peer->if_idx);
- SC_ATOMIC_DESTROY(peer->state);
SCFree(peer);
}
if (peerslist.turn == 0)
return;
- if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
+ if ((SC_ATOMIC_ADD(peerslist.reached, 1) + 1) == peerslist.turn) {
SCLogInfo("All AFP capture threads are running.");
(void)SC_ATOMIC_SET(peerslist.reached, 0);
/* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
- tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
tmm_modules[TMM_DECODEAFP].cap_flags = 0;
tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
}
#endif
}
-/**
- * \brief AF packet read function.
- *
- * This function fills
- * From here the packets are picked up by the DecodeAFP thread.
- *
- * \param user pointer to AFPThreadVars
- * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
- */
-static int AFPRead(AFPThreadVars *ptv)
-{
- Packet *p = NULL;
- /* XXX should try to use read that get directly to packet */
- int offset = 0;
- int caplen;
- struct sockaddr_ll from;
- struct iovec iov;
- struct msghdr msg;
- struct cmsghdr *cmsg;
- union {
- struct cmsghdr cmsg;
- char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
- } cmsg_buf;
- unsigned char aux_checksum = 0;
-
- msg.msg_name = &from;
- msg.msg_namelen = sizeof(from);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- msg.msg_flags = 0;
-
- if (ptv->cooked)
- offset = SLL_HEADER_LEN;
- else
- offset = 0;
- iov.iov_len = ptv->datalen - offset;
- iov.iov_base = ptv->data + offset;
-
- caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
-
- if (caplen < 0) {
- SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
- errno);
- SCReturnInt(AFP_READ_FAILURE);
- }
-
- p = PacketGetFromQueueOrAlloc();
- if (p == NULL) {
- SCReturnInt(AFP_FAILURE);
- }
- PKT_SET_SRC(p, PKT_SRC_WIRE);
-
- /* get timestamp of packet via ioctl */
- if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
- SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
- errno);
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_READ_FAILURE);
- }
-
- ptv->pkts++;
- p->livedev = ptv->livedev;
-
- /* add forged header */
- if (ptv->cooked) {
- SllHdr * hdrp = (SllHdr *)ptv->data;
- /* XXX this is minimalist, but this seems enough */
- hdrp->sll_protocol = from.sll_protocol;
- }
-
- p->datalink = ptv->datalink;
- SET_PKT_LEN(p, caplen + offset);
- if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
- }
- SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
- GET_PKT_LEN(p), p, GET_PKT_DATA(p));
-
- /* We only check for checksum disable */
- if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ptv->livedev->ignore_checksum) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ChecksumAutoModeCheck(ptv->pkts,
- SC_ATOMIC_GET(ptv->livedev->pkts),
- SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->livedev->ignore_checksum = 1;
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- } else {
- aux_checksum = 1;
- }
-
- /* List is NULL if we don't have activated auxiliary data */
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- struct tpacket_auxdata *aux;
-
- if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
- cmsg->cmsg_level != SOL_PACKET ||
- cmsg->cmsg_type != PACKET_AUXDATA)
- continue;
-
- aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
-
- if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- break;
- }
-
- if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
- }
- SCReturnInt(AFP_READ_OK);
-}
-
/**
* \brief AF packet write function.
*
* \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
*
*/
-static TmEcode AFPWritePacket(Packet *p, int version)
+static void AFPWritePacket(Packet *p, int version)
{
struct sockaddr_ll socket_address;
int socket;
- uint8_t *pstart;
- size_t plen;
- union thdr h;
- uint16_t vlan_tci = 0;
if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
- if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
- return TM_ECODE_OK;
+ if (PacketTestAction(p, ACTION_DROP)) {
+ return;
}
}
- if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
- return TM_ECODE_OK;
-
if (p->ethh == NULL) {
SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
- return TM_ECODE_FAILED;
+ return;
}
+
/* Index of the network device */
socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
/* Address length*/
SCMutexLock(&p->afp_v.peer->sock_protect);
socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
- h.raw = p->afp_v.relptr;
-
- if (version == TPACKET_V2) {
- /* Copy VLAN header from ring memory. For post june 2011 kernel we test
- * the flag. It is not defined for older kernel so we go best effort
- * and test for non zero value of the TCI header. */
- if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
- vlan_tci = h.h2->tp_vlan_tci;
- }
- } else {
-#ifdef HAVE_TPACKET_V3
- if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
- vlan_tci = h.h3->hv1.tp_vlan_tci;
+ if (sendto(socket, GET_PKT_DATA(p), GET_PKT_LEN(p), 0, (struct sockaddr *)&socket_address,
+ sizeof(struct sockaddr_ll)) < 0) {
+ if (SC_ATOMIC_ADD(p->afp_v.peer->send_errors, 1) == 0) {
+ SCLogWarning(SC_ERR_SOCKET, "sending packet failed on socket %d: %s", socket,
+ strerror(errno));
}
-#else
- /* Should not get here */
- BUG_ON(1);
-#endif
- }
-
- if (vlan_tci != 0) {
- pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
- plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
- /* move ethernet addresses */
- memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
- /* write vlan info */
- *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
- *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
- } else {
- pstart = GET_PKT_DATA(p);
- plen = GET_PKT_LEN(p);
- }
-
- if (sendto(socket, pstart, plen, 0,
- (struct sockaddr*) &socket_address,
- sizeof(struct sockaddr_ll)) < 0) {
- SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
- socket,
- strerror(errno));
- if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
- SCMutexUnlock(&p->afp_v.peer->sock_protect);
- return TM_ECODE_FAILED;
}
if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
SCMutexUnlock(&p->afp_v.peer->sock_protect);
-
- return TM_ECODE_OK;
}
static void AFPReleaseDataFromRing(Packet *p)
{
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
+
/* Need to be in copy mode and need to detect early release
where Ethernet header could not be set (and pseudo packet) */
- if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
AFPWritePacket(p, TPACKET_V2);
}
- if (AFPDerefSocket(p->afp_v.mpeer) == 0)
- goto cleanup;
+ BUG_ON(p->afp_v.relptr == NULL);
- if (p->afp_v.relptr) {
- union thdr h;
- h.raw = p->afp_v.relptr;
- h.h2->tp_status = TP_STATUS_KERNEL;
- }
+ union thdr h;
+ h.raw = p->afp_v.relptr;
+ h.h2->tp_status = TP_STATUS_KERNEL;
+
+ (void)AFPDerefSocket(p->afp_v.mpeer);
-cleanup:
AFPV_CLEANUP(&p->afp_v);
}
#ifdef HAVE_TPACKET_V3
static void AFPReleasePacketV3(Packet *p)
{
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
+
/* Need to be in copy mode and need to detect early release
where Ethernet header could not be set (and pseudo packet) */
- if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
AFPWritePacket(p, TPACKET_V3);
}
PacketFreeOrRelease(p);
PacketFreeOrRelease(p);
}
+/** \internal
+ * \brief recoverable error - release packet and
+ * return AFP_SURI_FAILURE
+ */
+static inline int AFPSuriFailure(AFPThreadVars *ptv, union thdr h)
+{
+ h.h2->tp_status = TP_STATUS_KERNEL;
+ if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
+ ptv->frame_offset = 0;
+ }
+ SCReturnInt(AFP_SURI_FAILURE);
+}
+
+static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p)
+{
+#ifdef HAVE_PACKET_EBPF
+ if (ptv->flags & AFP_BYPASS) {
+ p->BypassPacketsFlow = AFPBypassCallback;
+ p->afp_v.v4_map_fd = ptv->v4_map_fd;
+ p->afp_v.v6_map_fd = ptv->v6_map_fd;
+ p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
+ }
+ if (ptv->flags & AFP_XDPBYPASS) {
+ p->BypassPacketsFlow = AFPXDPBypassCallback;
+ p->afp_v.v4_map_fd = ptv->v4_map_fd;
+ p->afp_v.v6_map_fd = ptv->v6_map_fd;
+ p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
+ }
+#endif
+}
+
+/** \internal
+ * \brief setup packet for AFPReadFromRing
+ */
+static void AFPReadFromRingSetupPacket(
+ AFPThreadVars *ptv, union thdr h, const unsigned int tp_status, Packet *p)
+{
+ PKT_SET_SRC(p, PKT_SRC_WIRE);
+
+ /* flag the packet as TP_STATUS_USER_BUSY, which is ignore by the kernel, but
+ * acts as an indicator that we've reached a frame that is not yet released by
+ * us in autofp mode. It will be cleared when the frame gets released to the kernel. */
+ h.h2->tp_status |= TP_STATUS_USER_BUSY;
+ p->livedev = ptv->livedev;
+ p->datalink = ptv->datalink;
+ ptv->pkts++;
+
+ AFPReadApplyBypass(ptv, p);
+
+ if (h.h2->tp_len > h.h2->tp_snaplen) {
+ SCLogDebug("Packet length (%d) > snaplen (%d), truncating", h.h2->tp_len, h.h2->tp_snaplen);
+ }
+
+ /* get vlan id from header */
+ if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
+ (tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
+ p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
+ p->vlan_idx = 1;
+ p->afp_v.vlan_tci = h.h2->tp_vlan_tci;
+ }
+
+ (void)PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen);
+
+ p->ReleasePacket = AFPReleasePacket;
+ p->afp_v.relptr = h.raw;
+ if (ptv->flags & AFP_NEED_PEER) {
+ p->afp_v.mpeer = ptv->mpeer;
+ AFPRefSocket(ptv->mpeer);
+ } else {
+ p->afp_v.mpeer = NULL;
+ }
+ p->afp_v.copy_mode = ptv->copy_mode;
+ p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
+
+ /* Timestamp */
+ p->ts.tv_sec = h.h2->tp_sec;
+ p->ts.tv_usec = h.h2->tp_nsec / 1000;
+ SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", GET_PKT_LEN(p), p, GET_PKT_DATA(p));
+
+ /* We only check for checksum disable */
+ if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
+ if (ChecksumAutoModeCheck(ptv->pkts, SC_ATOMIC_GET(ptv->livedev->pkts),
+ SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
+ ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ }
+ } else {
+ if (tp_status & TP_STATUS_CSUMNOTREADY) {
+ p->flags |= PKT_IGNORE_CHECKSUM;
+ }
+ }
+}
+
+static inline int AFPReadFromRingWaitForPacket(AFPThreadVars *ptv)
+{
+ union thdr h;
+ struct timeval start_time;
+ gettimeofday(&start_time, NULL);
+ uint64_t busy_loop_iter = 0;
+
+ /* busy wait loop until we have packets available */
+ while (1) {
+ if (unlikely(suricata_ctl_flags != 0)) {
+ break;
+ }
+ h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
+ if (unlikely(h.raw == NULL)) {
+ return AFP_READ_FAILURE;
+ }
+ const unsigned int tp_status = h.h2->tp_status;
+ if (tp_status == TP_STATUS_KERNEL) {
+ busy_loop_iter++;
+
+ struct timeval cur_time;
+ memset(&cur_time, 0, sizeof(cur_time));
+ uint64_t milliseconds =
+ ((cur_time.tv_sec - start_time.tv_sec) * 1000) +
+ (((1000000 + cur_time.tv_usec - start_time.tv_usec) / 1000) - 1000);
+ if (milliseconds > 1000) {
+ break;
+ }
+ continue;
+ }
+ break;
+ }
+ if (busy_loop_iter) {
+ StatsAddUI64(ptv->tv, ptv->afpacket_spin, busy_loop_iter);
+ }
+ return AFP_READ_OK;
+}
+
/**
* \brief AF packet read function for ring
*
*/
static int AFPReadFromRing(AFPThreadVars *ptv)
{
- Packet *p = NULL;
union thdr h;
- uint8_t emergency_flush = 0;
- int read_pkts = 0;
- int loop_start = -1;
+ bool emergency_flush = false;
+ const unsigned int start_pos = ptv->frame_offset;
+ /* poll() told us there are frames, so lets wait for at least
+ * one frame to become available. */
+ if (AFPReadFromRingWaitForPacket(ptv) != AFP_READ_OK)
+ return AFP_READ_FAILURE;
- /* Loop till we have packets available */
+ /* process the frames in the ring */
while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
break;
}
-
- /* Read packet from ring */
- h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
- if (h.raw == NULL) {
- SCReturnInt(AFP_FAILURE);
+ h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
+ if (unlikely(h.raw == NULL)) {
+ return AFP_READ_FAILURE;
}
-
- if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
- if (read_pkts == 0) {
- if (loop_start == -1) {
- loop_start = ptv->frame_offset;
- } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
- SCReturnInt(AFP_READ_OK);
- }
- if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
- ptv->frame_offset = 0;
- }
- continue;
- }
- if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
- SCReturnInt(AFP_KERNEL_DROP);
- } else {
- SCReturnInt(AFP_READ_OK);
- }
+ const unsigned int tp_status = h.h2->tp_status;
+ /* if we find a kernel frame we are done */
+ if (unlikely(tp_status == TP_STATUS_KERNEL)) {
+ break;
}
-
- read_pkts++;
- loop_start = -1;
-
- /* Our packet is still used by suricata, we exit read loop to
- * gain some time */
- if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
- SCReturnInt(AFP_READ_OK);
+ /* if in autofp mode the frame is still busy, return to poll */
+ if (unlikely(FRAME_BUSY(tp_status))) {
+ break;
}
+ emergency_flush |= ((tp_status & TP_STATUS_LOSING) != 0);
- if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
+ if ((ptv->flags & AFP_EMERGENCY_MODE) && emergency_flush) {
h.h2->tp_status = TP_STATUS_KERNEL;
goto next_frame;
}
- p = PacketGetFromQueueOrAlloc();
+ Packet *p = PacketGetFromQueueOrAlloc();
if (p == NULL) {
- SCReturnInt(AFP_FAILURE);
- }
- PKT_SET_SRC(p, PKT_SRC_WIRE);
-
- /* Suricata will treat packet so telling it is busy, this
- * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
- * function. */
- h.h2->tp_status |= TP_STATUS_USER_BUSY;
-
- ptv->pkts++;
- p->livedev = ptv->livedev;
- p->datalink = ptv->datalink;
-
- if (h.h2->tp_len > h.h2->tp_snaplen) {
- SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
- h.h2->tp_len, h.h2->tp_snaplen);
- }
-
- /* get vlan id from header */
- if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
- (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
- p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
- p->vlan_idx = 1;
- p->vlanh[0] = NULL;
- }
-
- if (ptv->flags & AFP_ZERO_COPY) {
- if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
- } else {
- p->afp_v.relptr = h.raw;
- p->ReleasePacket = AFPReleasePacket;
- p->afp_v.mpeer = ptv->mpeer;
- AFPRefSocket(ptv->mpeer);
-
- p->afp_v.copy_mode = ptv->copy_mode;
- if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
- p->afp_v.peer = ptv->mpeer->peer;
- } else {
- p->afp_v.peer = NULL;
- }
- }
- } else {
- if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
- }
- }
- /* Timestamp */
- p->ts.tv_sec = h.h2->tp_sec;
- p->ts.tv_usec = h.h2->tp_nsec/1000;
- SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
- GET_PKT_LEN(p), p, GET_PKT_DATA(p));
-
- /* We only check for checksum disable */
- if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ptv->livedev->ignore_checksum) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ChecksumAutoModeCheck(ptv->pkts,
- SC_ATOMIC_GET(ptv->livedev->pkts),
- SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->livedev->ignore_checksum = 1;
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- } else {
- if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- }
- if (h.h2->tp_status & TP_STATUS_LOSING) {
- emergency_flush = 1;
- AFPDumpCounters(ptv);
- }
-
- /* release frame if not in zero copy mode */
- if (!(ptv->flags & AFP_ZERO_COPY)) {
- h.h2->tp_status = TP_STATUS_KERNEL;
+ return AFPSuriFailure(ptv, h);
}
+ AFPReadFromRingSetupPacket(ptv, h, tp_status, p);
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- h.h2->tp_status = TP_STATUS_KERNEL;
- if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
- ptv->frame_offset = 0;
- }
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
+ return AFPSuriFailure(ptv, h);
}
-
next_frame:
- if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
+ if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
ptv->frame_offset = 0;
/* Get out of loop to be sure we will reach maintenance tasks */
- SCReturnInt(AFP_READ_OK);
+ if (ptv->frame_offset == start_pos)
+ break;
}
}
-
+ if (emergency_flush) {
+ AFPDumpCounters(ptv);
+ }
SCReturnInt(AFP_READ_OK);
}
{
Packet *p = PacketGetFromQueueOrAlloc();
if (p == NULL) {
- SCReturnInt(AFP_FAILURE);
+ SCReturnInt(AFP_SURI_FAILURE);
}
PKT_SET_SRC(p, PKT_SRC_WIRE);
+ AFPReadApplyBypass(ptv, p);
+
ptv->pkts++;
p->livedev = ptv->livedev;
p->datalink = ptv->datalink;
- if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
+ if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
(ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
p->vlan_idx = 1;
- p->vlanh[0] = NULL;
+ p->afp_v.vlan_tci = ppd->hv1.tp_vlan_tci;
}
- if (ptv->flags & AFP_ZERO_COPY) {
- if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
- }
- p->afp_v.relptr = ppd;
- p->ReleasePacket = AFPReleasePacketV3;
- p->afp_v.mpeer = ptv->mpeer;
- AFPRefSocket(ptv->mpeer);
+ (void)PacketSetData(p, (unsigned char *)ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+ p->ReleasePacket = AFPReleasePacketV3;
+ p->afp_v.relptr = NULL;
+ p->afp_v.mpeer = NULL;
+ p->afp_v.copy_mode = ptv->copy_mode;
+ p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
- p->afp_v.copy_mode = ptv->copy_mode;
- if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
- p->afp_v.peer = ptv->mpeer->peer;
- } else {
- p->afp_v.peer = NULL;
- }
- } else {
- if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
- }
- }
/* Timestamp */
p->ts.tv_sec = ppd->tp_sec;
p->ts.tv_usec = ppd->tp_nsec/1000;
if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
p->flags |= PKT_IGNORE_CHECKSUM;
} else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ptv->livedev->ignore_checksum) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ChecksumAutoModeCheck(ptv->pkts,
+ if (ChecksumAutoModeCheck(ptv->pkts,
SC_ATOMIC_GET(ptv->livedev->pkts),
SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->livedev->ignore_checksum = 1;
+ ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
p->flags |= PKT_IGNORE_CHECKSUM;
}
} else {
}
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_FAILURE);
+ SCReturnInt(AFP_SURI_FAILURE);
}
SCReturnInt(AFP_READ_OK);
static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
{
- int num_pkts = pbd->hdr.bh1.num_pkts, i;
- uint8_t *ppd;
+ const int num_pkts = pbd->hdr.bh1.num_pkts;
+ uint8_t *ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
- ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
- for (i = 0; i < num_pkts; ++i) {
- if (unlikely(AFPParsePacketV3(ptv, pbd,
- (struct tpacket3_hdr *)ppd) == AFP_FAILURE)) {
- SCReturnInt(AFP_READ_FAILURE);
+ for (int i = 0; i < num_pkts; ++i) {
+ int ret = AFPParsePacketV3(ptv, pbd, (struct tpacket3_hdr *)ppd);
+ switch (ret) {
+ case AFP_READ_OK:
+ break;
+ case AFP_SURI_FAILURE:
+ /* Internal error but let's just continue and
+ * treat thenext packet */
+ break;
+ case AFP_READ_FAILURE:
+ SCReturnInt(AFP_READ_FAILURE);
+ default:
+ SCReturnInt(ret);
}
ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
}
static int AFPReadFromRingV3(AFPThreadVars *ptv)
{
#ifdef HAVE_TPACKET_V3
- struct tpacket_block_desc *pbd;
-
/* Loop till we have packets available */
while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
break;
}
- pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
+ struct tpacket_block_desc *pbd =
+ (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
/* block is not ready to be read */
if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
SCReturnInt(AFP_READ_OK);
}
- if (unlikely(AFPWalkBlock(ptv, pbd) != AFP_READ_OK)) {
+ int ret = AFPWalkBlock(ptv, pbd);
+ if (unlikely(ret != AFP_READ_OK)) {
AFPFlushBlock(pbd);
- SCReturnInt(AFP_READ_FAILURE);
+ SCReturnInt(ret);
}
AFPFlushBlock(pbd);
- ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
+ ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
/* return to maintenance task after one loop on the ring */
if (ptv->frame_offset == 0) {
SCReturnInt(AFP_READ_OK);
if (peer == NULL)
return 1;
- if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
- if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
- SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
- close(SC_ATOMIC_GET(peer->socket));
- return 0;
- }
+ if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 1) {
+ return 0;
}
return 1;
}
-static void AFPSwitchState(AFPThreadVars *ptv, int state)
+static void AFPCloseSocket(AFPThreadVars *ptv)
{
- ptv->afp_state = state;
- ptv->down_count = 0;
-
- AFPPeerUpdate(ptv);
+ if (ptv->mpeer != NULL)
+ BUG_ON(SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0);
- /* Do cleaning if switching to down state */
- if (state == AFP_STATE_DOWN) {
-#ifdef HAVE_TPACKET_V3
- if (ptv->flags & AFP_TPACKET_V3) {
- if (!ptv->ring_v3) {
- SCFree(ptv->ring_v3);
- ptv->ring_v3 = NULL;
- }
- } else {
-#endif
- if (ptv->ring_v2) {
- /* only used in reading phase, we can free it */
- SCFree(ptv->ring_v2);
- ptv->ring_v2 = NULL;
- }
+ if (ptv->flags & AFP_TPACKET_V3) {
#ifdef HAVE_TPACKET_V3
+ if (ptv->ring.v3) {
+ SCFree(ptv->ring.v3);
+ ptv->ring.v3 = NULL;
}
#endif
- if (ptv->socket != -1) {
- /* we need to wait for all packets to return data */
- if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
- SCLogInfo("Cleaning socket connected to '%s'", ptv->iface);
- close(ptv->socket);
- ptv->socket = -1;
- }
+ } else {
+ if (ptv->ring.v2) {
+ /* only used in reading phase, we can free it */
+ SCFree(ptv->ring.v2);
+ ptv->ring.v2 = NULL;
}
}
- if (state == AFP_STATE_UP) {
- (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
+ if (ptv->socket != -1) {
+ SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
+ munmap(ptv->ring_buf, ptv->ring_buflen);
+ close(ptv->socket);
+ ptv->socket = -1;
}
}
-static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
- uint64_t *discarded_pkts)
+static void AFPSwitchState(AFPThreadVars *ptv, int state)
{
- struct sockaddr_ll from;
- struct iovec iov;
- struct msghdr msg;
- struct timeval ts;
- union {
- struct cmsghdr cmsg;
- char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
- } cmsg_buf;
-
-
- if (unlikely(suricata_ctl_flags != 0)) {
- return 1;
- }
-
- msg.msg_name = &from;
- msg.msg_namelen = sizeof(from);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- msg.msg_flags = 0;
-
- iov.iov_len = ptv->datalen;
- iov.iov_base = ptv->data;
-
- (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
+ ptv->afp_state = state;
+ ptv->down_count = 0;
- if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
- /* FIXME */
- return -1;
+ if (state == AFP_STATE_DOWN) {
+ /* cleanup is done on thread cleanup or try reopen
+ * as there may still be packets in autofp that
+ * are referencing us */
+ (void)SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1);
}
-
- if ((ts.tv_sec > synctv->tv_sec) ||
- (ts.tv_sec >= synctv->tv_sec &&
- ts.tv_usec > synctv->tv_usec)) {
- return 1;
+ if (state == AFP_STATE_UP) {
+ AFPPeerUpdate(ptv);
+ (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
}
- return 0;
}
static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
uint64_t *discarded_pkts)
{
- union thdr h;
-
if (unlikely(suricata_ctl_flags != 0)) {
return 1;
}
#ifdef HAVE_TPACKET_V3
if (ptv->flags & AFP_TPACKET_V3) {
- struct tpacket_block_desc *pbd;
- pbd = (struct tpacket_block_desc *) ptv->ring_v3[ptv->frame_offset].iov_base;
+ int ret = 0;
+ struct tpacket_block_desc *pbd =
+ (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
*discarded_pkts += pbd->hdr.bh1.num_pkts;
+ struct tpacket3_hdr *ppd =
+ (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
+ if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
+ ((time_t)ppd->tp_sec == synctv->tv_sec &&
+ (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
+ ret = 1;
+ }
AFPFlushBlock(pbd);
- ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req3.tp_block_nr;
- return 1;
+ ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
+ return ret;
} else
#endif
{
/* Read packet from ring */
- h.raw = (((union thdr **)ptv->ring_v2)[ptv->frame_offset]);
+ union thdr h;
+ h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
if (h.raw == NULL) {
return -1;
}
- (*discarded_pkts)++;
+ if (h.h2->tp_status == TP_STATUS_KERNEL)
+ return 0;
+
if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
((time_t)h.h2->tp_sec == synctv->tv_sec &&
(suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
return 1;
}
+ (*discarded_pkts)++;
h.h2->tp_status = TP_STATUS_KERNEL;
- if (++ptv->frame_offset >= ptv->req.tp_frame_nr) {
+ if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
ptv->frame_offset = 0;
}
}
-
return 0;
}
*/
static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
{
- int r;
struct timeval synctv;
struct pollfd fds;
synctv.tv_usec = 0xffffffff;
while (1) {
- r = poll(&fds, 1, POLL_TIMEOUT);
+ int r = poll(&fds, 1, POLL_TIMEOUT);
if (r > 0 &&
(fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
gettimeofday(&synctv, NULL);
}
- if (ptv->flags & AFP_RING_MODE) {
- r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
- } else {
- r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
- }
+ r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
SCLogDebug("Discarding on %s", ptv->tv->name);
switch (r) {
case 1:
*/
static int AFPTryReopen(AFPThreadVars *ptv)
{
- int afp_activate_r;
-
ptv->down_count++;
-
/* Don't reconnect till we have packet that did not release data */
if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
return -1;
}
- afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
+ /* ref cnt 0, we can close the old socket */
+ AFPCloseSocket(ptv);
+
+ int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
if (afp_activate_r != 0) {
if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
ptv->slot = s->slot_next;
- if (ptv->flags & AFP_RING_MODE) {
- if (ptv->flags & AFP_TPACKET_V3) {
- AFPReadFunc = AFPReadFromRingV3;
- } else {
- AFPReadFunc = AFPReadFromRing;
- }
+ if (ptv->flags & AFP_TPACKET_V3) {
+ AFPReadFunc = AFPReadFromRingV3;
} else {
- AFPReadFunc = AFPRead;
+ AFPReadFunc = AFPReadFromRing;
}
if (ptv->afp_state == AFP_STATE_DOWN) {
}
if (ptv->afp_state == AFP_STATE_UP) {
SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
- if ((ptv->flags & AFP_TPACKET_V3) != 0) {
- AFPSynchronizeStart(ptv, &discarded_pkts);
- }
+ AFPSynchronizeStart(ptv, &discarded_pkts);
/* let's reset counter as we will start the capture at the
* next function call */
#ifdef PACKET_STATISTICS
* us from alloc'ing packets at line rate */
PacketPoolWait();
+ StatsIncr(ptv->tv, ptv->capture_afp_poll);
+
r = poll(&fds, 1, POLL_TIMEOUT);
if (suricata_ctl_flags != 0) {
if (r > 0 &&
(fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_signal);
if (fds.revents & (POLLHUP | POLLRDHUP)) {
AFPSwitchState(ptv, AFP_STATE_DOWN);
continue;
if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
continue; /* what, no error? */
SCLogError(SC_ERR_AFP_READ,
- "Error reading data from iface '%s': (%d" PRIu32 ") %s",
+ "Error reading data from iface '%s': (%d) %s",
ptv->iface, errno, strerror(errno));
AFPSwitchState(ptv, AFP_STATE_DOWN);
continue;
continue;
}
} else if (r > 0) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_data);
r = AFPReadFunc(ptv);
switch (r) {
case AFP_READ_OK:
case AFP_READ_FAILURE:
/* AFPRead in error: best to reset the socket */
SCLogError(SC_ERR_AFP_READ,
- "AFPRead error reading data from iface '%s': (%d" PRIu32 ") %s",
+ "AFPRead error reading data from iface '%s': (%d) %s",
ptv->iface, errno, strerror(errno));
AFPSwitchState(ptv, AFP_STATE_DOWN);
continue;
- case AFP_FAILURE:
- AFPSwitchState(ptv, AFP_STATE_DOWN);
- SCReturnInt(TM_ECODE_FAILED);
+ case AFP_SURI_FAILURE:
+ StatsIncr(ptv->tv, ptv->capture_errors);
break;
case AFP_KERNEL_DROP:
AFPDumpCounters(ptv);
break;
}
} else if (unlikely(r == 0)) {
- /* poll timed out, lets see if we need to inject a fake packet */
- TmThreadsCaptureInjectPacket(tv, ptv->slot, NULL);
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_timeout);
+ /* Trigger one dump of stats every second */
+ current_time = time(NULL);
+ if (current_time != last_dump) {
+ AFPDumpCounters(ptv);
+ last_dump = current_time;
+ }
+ /* poll timed out, lets see handle our timeout path */
+ TmThreadsCaptureHandleTimeout(tv, NULL);
} else if ((r < 0) && (errno != EINTR)) {
- SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d" PRIu32 ") %s",
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_err);
+ SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
ptv->iface,
errno, strerror(errno));
AFPSwitchState(ptv, AFP_STATE_DOWN);
strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
- if (verbose)
- SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
- ifname, strerror(errno));
+ if (verbose)
+ SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
+ ifname, strerror(errno));
return -1;
}
}
}
- ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
- ptv->req.tp_block_size = getpagesize() << order;
- int frames_per_block = ptv->req.tp_block_size / ptv->req.tp_frame_size;
+ ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
+ ptv->req.v2.tp_block_size = getpagesize() << order;
+ int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
if (frames_per_block == 0) {
SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
return -1;
}
- ptv->req.tp_frame_nr = ptv->ring_size;
- ptv->req.tp_block_nr = ptv->req.tp_frame_nr / frames_per_block + 1;
+ ptv->req.v2.tp_frame_nr = ptv->ring_size;
+ ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
/* exact division */
- ptv->req.tp_frame_nr = ptv->req.tp_block_nr * frames_per_block;
+ ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
- ptv->req.tp_block_size, ptv->req.tp_block_nr,
- ptv->req.tp_frame_size, ptv->req.tp_frame_nr);
+ ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
+ ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
return 1;
}
#ifdef HAVE_TPACKET_V3
static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
{
- ptv->req3.tp_block_size = ptv->block_size;
- ptv->req3.tp_frame_size = 2048;
+ ptv->req.v3.tp_block_size = ptv->block_size;
+ ptv->req.v3.tp_frame_size = 2048;
int frames_per_block = 0;
int tp_hdrlen = sizeof(struct tpacket3_hdr);
int snaplen = default_packet_size;
}
}
- ptv->req.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
- frames_per_block = ptv->req3.tp_block_size / ptv->req3.tp_frame_size;
+ ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
+ frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
if (frames_per_block == 0) {
SCLogError(SC_ERR_INVALID_VALUE,
"Block size is too small, it should be at least %d",
- ptv->req3.tp_frame_size);
+ ptv->req.v3.tp_frame_size);
return -1;
}
- ptv->req3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
+ ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
/* exact division */
- ptv->req3.tp_frame_nr = ptv->req3.tp_block_nr * frames_per_block;
- ptv->req3.tp_retire_blk_tov = ptv->block_timeout;
- ptv->req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+ ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
+ ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
+ ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
- ptv->req3.tp_block_size, ptv->req3.tp_block_nr,
- ptv->req3.tp_frame_size, ptv->req3.tp_frame_nr,
- ptv->req3.tp_block_size * ptv->req3.tp_block_nr
+ ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
+ ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
+ ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
);
return 1;
}
{
int val;
unsigned int len = sizeof(val), i;
- unsigned int ring_buflen;
- uint8_t * ring_buf;
int order;
int r, mmap_flag;
}
#endif
- /* Let's reserve head room so we can add the VLAN header in IPS
- * or TAP mode before write the packet */
- if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
- /* Only one vlan is extracted from AFP header so
- * one VLAN header length is enough. */
- int reserve = VLAN_HEADER_LEN;
- if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
- sizeof(reserve)) < 0) {
- SCLogError(SC_ERR_AFP_CREATE,
- "Can't activate reserve on packet socket: %s",
- strerror(errno));
- return AFP_FATAL_ERROR;
- }
+ /* Reserve head room for a VLAN header. One vlan is extracted from AFP header
+ * so one VLAN header length is enough. */
+ int reserve = VLAN_HEADER_LEN;
+ if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *)&reserve, sizeof(reserve)) <
+ 0) {
+ SCLogError(
+ SC_ERR_AFP_CREATE, "Can't activate reserve on packet socket: %s", strerror(errno));
+ return AFP_FATAL_ERROR;
}
/* Allocate RX ring */
return AFP_FATAL_ERROR;
}
r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
- (void *) &ptv->req3, sizeof(ptv->req3));
+ (void *) &ptv->req.v3, sizeof(ptv->req.v3));
if (r < 0) {
SCLogError(SC_ERR_MEM_ALLOC,
"Unable to allocate RX Ring for iface %s: (%d) %s",
/* Allocate the Ring */
#ifdef HAVE_TPACKET_V3
if (ptv->flags & AFP_TPACKET_V3) {
- ring_buflen = ptv->req3.tp_block_nr * ptv->req3.tp_block_size;
+ ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
} else {
#endif
- ring_buflen = ptv->req.tp_block_nr * ptv->req.tp_block_size;
+ ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
#ifdef HAVE_TPACKET_V3
}
#endif
mmap_flag = MAP_SHARED;
if (ptv->flags & AFP_MMAP_LOCKED)
mmap_flag |= MAP_LOCKED;
- ring_buf = mmap(0, ring_buflen, PROT_READ|PROT_WRITE,
+ ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
mmap_flag, ptv->socket, 0);
- if (ring_buf == MAP_FAILED) {
+ if (ptv->ring_buf == MAP_FAILED) {
SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
strerror(errno));
goto mmap_err;
}
#ifdef HAVE_TPACKET_V3
if (ptv->flags & AFP_TPACKET_V3) {
- ptv->ring_v3 = SCMalloc(ptv->req3.tp_block_nr * sizeof(*ptv->ring_v3));
- if (!ptv->ring_v3) {
- SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring_v3");
+ ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
+ if (!ptv->ring.v3) {
+ SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
goto postmmap_err;
}
- for (i = 0; i < ptv->req3.tp_block_nr; ++i) {
- ptv->ring_v3[i].iov_base = ring_buf + (i * ptv->req3.tp_block_size);
- ptv->ring_v3[i].iov_len = ptv->req3.tp_block_size;
+ for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
+ ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
+ ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
}
} else {
#endif
/* allocate a ring for each frame header pointer*/
- ptv->ring_v2 = SCMalloc(ptv->req.tp_frame_nr * sizeof (union thdr *));
- if (ptv->ring_v2 == NULL) {
+ ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
+ if (ptv->ring.v2 == NULL) {
SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
goto postmmap_err;
}
- memset(ptv->ring_v2, 0, ptv->req.tp_frame_nr * sizeof (union thdr *));
+ memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
/* fill the header ring with proper frame ptr*/
ptv->frame_offset = 0;
- for (i = 0; i < ptv->req.tp_block_nr; ++i) {
- void *base = &ring_buf[i * ptv->req.tp_block_size];
+ for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
+ void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
unsigned int j;
- for (j = 0; j < ptv->req.tp_block_size / ptv->req.tp_frame_size; ++j, ++ptv->frame_offset) {
- (((union thdr **)ptv->ring_v2)[ptv->frame_offset]) = base;
- base += ptv->req.tp_frame_size;
+ for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
+ (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
+ base += ptv->req.v2.tp_frame_size;
}
}
ptv->frame_offset = 0;
return 0;
postmmap_err:
- munmap(ring_buf, ring_buflen);
- if (ptv->ring_v2)
- SCFree(ptv->ring_v2);
- if (ptv->ring_v3)
- SCFree(ptv->ring_v3);
+ munmap(ptv->ring_buf, ptv->ring_buflen);
+ if (ptv->ring.v2)
+ SCFree(ptv->ring.v2);
+ if (ptv->ring.v3)
+ SCFree(ptv->ring.v3);
mmap_err:
/* Packet mmap does the cleaning when socket is closed */
return AFP_FATAL_ERROR;
/** \brief test if we can use FANOUT. Older kernels like those in
* CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
*/
-int AFPIsFanoutSupported(void)
+int AFPIsFanoutSupported(uint16_t cluster_id)
{
#ifdef HAVE_PACKET_FANOUT
int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
if (fd < 0)
return 0;
- uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
- uint16_t id = 1;
- uint32_t option = (mode << 16) | (id & 0xffff);
+ uint32_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
+ uint32_t option = (mode << 16) | cluster_id;
int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
close(fd);
if (r < 0) {
- SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
+ SCLogError(SC_ERR_INVALID_VALUE, "fanout not supported by kernel: "
+ "Kernel too old or cluster-id %d already in use.", cluster_id);
return 0;
}
return 1;
#endif
}
+#ifdef HAVE_PACKET_EBPF
+
+static int SockFanoutSeteBPF(AFPThreadVars *ptv)
+{
+ int pfd = ptv->ebpf_lb_fd;
+ if (pfd == -1) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Fanout file descriptor is invalid");
+ return -1;
+ }
+
+ if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
+ return -1;
+ }
+ SCLogInfo("Activated eBPF on socket");
+
+ return 0;
+}
+
+static int SetEbpfFilter(AFPThreadVars *ptv)
+{
+ int pfd = ptv->ebpf_filter_fd;
+ if (pfd == -1) {
+ SCLogError(SC_ERR_INVALID_VALUE,
+ "Filter file descriptor is invalid");
+ return -1;
+ }
+
+ if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
+ return -1;
+ }
+ SCLogInfo("Activated eBPF filter on socket");
+
+ return 0;
+}
+#endif
+
static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
{
int r;
SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
goto error;
}
+
if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
+
+ if (if_idx == -1) {
+ goto socket_err;
+ }
+
/* bind socket */
memset(&bind_address, 0, sizeof(bind_address));
bind_address.sll_family = AF_PACKET;
goto socket_err;
}
+ int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
+ if (if_flags == -1) {
+ if (verbose) {
+ SCLogError(SC_ERR_AFP_READ,
+ "Couldn't get flags for interface '%s'",
+ ptv->iface);
+ }
+ ret = AFP_RECOVERABLE_ERROR;
+ goto socket_err;
+ } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
+ if (verbose) {
+ SCLogError(SC_ERR_AFP_READ,
+ "Interface '%s' is down",
+ ptv->iface);
+ }
+ ret = AFP_RECOVERABLE_ERROR;
+ goto socket_err;
+ }
+
if (ptv->promisc != 0) {
/* Force promiscuous mode */
memset(&sock_params, 0, sizeof(sock_params));
goto socket_err;
}
+
#ifdef HAVE_PACKET_FANOUT
/* add binded socket to fanout group */
if (ptv->threads > 1) {
- uint16_t mode = ptv->cluster_type;
+ uint32_t mode = ptv->cluster_type;
uint16_t id = ptv->cluster_id;
uint32_t option = (mode << 16) | (id & 0xffff);
r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
}
#endif
- int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
- if (if_flags == -1) {
- if (verbose) {
- SCLogError(SC_ERR_AFP_READ,
- "Couldn't get flags for interface '%s'",
- ptv->iface);
- }
- ret = AFP_RECOVERABLE_ERROR;
- goto socket_err;
- }
- if ((if_flags & IFF_UP) == 0) {
- if (verbose) {
- SCLogError(SC_ERR_AFP_READ,
- "Interface '%s' is down",
- ptv->iface);
+#ifdef HAVE_PACKET_EBPF
+ if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
+ r = SockFanoutSeteBPF(ptv);
+ if (r < 0) {
+ SCLogError(SC_ERR_AFP_CREATE,
+ "Coudn't set EBPF, error %s",
+ strerror(errno));
+ goto socket_err;
}
- ret = AFP_RECOVERABLE_ERROR;
- goto socket_err;
}
+#endif
- if (ptv->flags & AFP_RING_MODE) {
- ret = AFPSetupRing(ptv, devname);
- if (ret != 0)
- goto socket_err;
- }
+ ret = AFPSetupRing(ptv, devname);
+ if (ret != 0)
+ goto socket_err;
SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
- switch (ptv->datalink) {
- case ARPHRD_PPP:
- case ARPHRD_ATM:
- ptv->cooked = 1;
- break;
- }
TmEcode rc = AFPSetBPFFilter(ptv);
if (rc == TM_ECODE_FAILED) {
- SCLogError(SC_ERR_AFP_CREATE, "Set AF_PACKET bpf filter \"%s\" failed.", ptv->bpf_filter);
ret = AFP_FATAL_ERROR;
goto socket_err;
}
close(ptv->socket);
ptv->socket = -1;
if (ptv->flags & AFP_TPACKET_V3) {
- if (ptv->ring_v3) {
- SCFree(ptv->ring_v3);
- ptv->ring_v3 = NULL;
+ if (ptv->ring.v3) {
+ SCFree(ptv->ring.v3);
+ ptv->ring.v3 = NULL;
}
} else {
- if (ptv->ring_v2) {
- SCFree(ptv->ring_v2);
- ptv->ring_v2 = NULL;
+ if (ptv->ring.v2) {
+ SCFree(ptv->ring.v2);
+ ptv->ring.v2 = NULL;
}
}
struct sock_fprog fcode;
int rc;
+#ifdef HAVE_PACKET_EBPF
+ if (ptv->ebpf_filter_fd != -1) {
+ return SetEbpfFilter(ptv);
+ }
+#endif
+
if (!ptv->bpf_filter)
return TM_ECODE_OK;
- SCMutexLock(&afpacket_bpf_set_filter_lock);
-
SCLogInfo("Using BPF '%s' on iface '%s'",
ptv->bpf_filter,
ptv->iface);
- if (pcap_compile_nopcap(default_packet_size, /* snaplen_arg */
+
+ char errbuf[PCAP_ERRBUF_SIZE];
+ if (SCBPFCompile(default_packet_size, /* snaplen_arg */
ptv->datalink, /* linktype_arg */
&filter, /* program */
ptv->bpf_filter, /* const char *buf */
1, /* optimize */
- 0 /* mask */
- ) == -1) {
- SCLogError(SC_ERR_AFP_CREATE, "Filter compilation failed.");
- SCMutexUnlock(&afpacket_bpf_set_filter_lock);
- return TM_ECODE_FAILED;
- }
- SCMutexUnlock(&afpacket_bpf_set_filter_lock);
-
- if (filter.bf_insns == NULL) {
- SCLogError(SC_ERR_AFP_CREATE, "Filter badly setup.");
- pcap_freecode(&filter);
+ 0, /* mask */
+ errbuf,
+ sizeof(errbuf)) == -1) {
+ SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
+ ptv->bpf_filter,
+ errbuf);
return TM_ECODE_FAILED;
}
rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
- pcap_freecode(&filter);
+ SCBPFFree(&filter);
if(rc == -1) {
SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
return TM_ECODE_FAILED;
return TM_ECODE_OK;
}
+#ifdef HAVE_PACKET_EBPF
+/**
+ * Insert a half flow in the kernel bypass table
+ *
+ * \param mapfd file descriptor of the protocol bypass table
+ * \param key data to use as key in the table
+ * \return 0 in case of error, 1 if success
+ */
+static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
+{
+ BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
+ unsigned int i;
+
+ if (mapd == -1) {
+ return 0;
+ }
+
+ /* We use a per CPU structure so we have to set an array of values as the kernel
+ * is not duplicating the data on each CPU by itself. */
+ for (i = 0; i < nr_cpus; i++) {
+ BPF_PERCPU(value, i).packets = 0;
+ BPF_PERCPU(value, i).bytes = 0;
+ }
+ if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
+ switch (errno) {
+ /* no more place in the hash */
+ case E2BIG:
+ return 0;
+ /* no more place in the hash for some hardware bypass */
+ case EAGAIN:
+ return 0;
+ /* if we already have the key then bypass is a success */
+ case EEXIST:
+ return 1;
+ /* Not supposed to be there so issue a error */
+ default:
+ SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
+ strerror(errno),
+ errno);
+ return 0;
+ }
+ }
+ return 1;
+}
+
+static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
+ int family)
+{
+ FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
+ if (fc) {
+ EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
+ if (eb == NULL) {
+ EBPFDeleteKey(map_fd, key0);
+ EBPFDeleteKey(map_fd, key1);
+ LiveDevAddBypassFail(p->livedev, 1, family);
+ SCFree(key0);
+ SCFree(key1);
+ return 0;
+ }
+ eb->key[0] = key0;
+ eb->key[1] = key1;
+ eb->mapfd = map_fd;
+ eb->cpus_count = p->afp_v.nr_cpus;
+ fc->BypassUpdate = EBPFBypassUpdate;
+ fc->BypassFree = EBPFBypassFree;
+ fc->bypass_data = eb;
+ } else {
+ EBPFDeleteKey(map_fd, key0);
+ EBPFDeleteKey(map_fd, key1);
+ LiveDevAddBypassFail(p->livedev, 1, family);
+ SCFree(key0);
+ SCFree(key1);
+ return 0;
+ }
+
+ LiveDevAddBypassStats(p->livedev, 1, family);
+ LiveDevAddBypassSuccess(p->livedev, 1, family);
+ return 1;
+}
+
+/**
+ * Bypass function for AF_PACKET capture in eBPF mode
+ *
+ * This function creates two half flows in the map shared with the kernel
+ * to trigger bypass.
+ *
+ * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
+ * This table contains the list of half flows to bypass. The in-kernel filter
+ * will skip/drop the packet if they belong to a flow in one of the flows
+ * table.
+ *
+ * \param p the packet belonging to the flow to bypass
+ * \return 0 if unable to bypass, 1 if success
+ */
+static int AFPBypassCallback(Packet *p)
+{
+ SCLogDebug("Calling af_packet callback function");
+ /* Only bypass TCP and UDP */
+ if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
+ return 0;
+ }
+
+ /* If we don't have a flow attached to packet the eBPF map entries
+ * will be destroyed at first flow bypass manager pass as we won't
+ * find any associated entry */
+ if (p->flow == NULL) {
+ return 0;
+ }
+ /* Bypassing tunneled packets is currently not supported
+ * because we can't discard the inner packet only due to
+ * primitive parsing in eBPF */
+ if (IS_TUNNEL_PKT(p)) {
+ return 0;
+ }
+ if (PKT_IS_IPV4(p)) {
+ SCLogDebug("add an IPv4");
+ if (p->afp_v.v4_map_fd == -1) {
+ return 0;
+ }
+ struct flowv4_keys *keys[2];
+ keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[0] == NULL) {
+ return 0;
+ }
+ keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
+ keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
+ keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
+ keys[0]->port16[1] = GET_TCP_DST_PORT(p);
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+
+ if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
+ }
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
+ keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
+ keys[1]->port16[0] = GET_TCP_DST_PORT(p);
+ keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ EBPFUpdateFlow(p->flow, p, NULL);
+ return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
+ }
+ /* For IPv6 case we don't handle extended header in eBPF */
+ if (PKT_IS_IPV6(p) &&
+ ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
+ int i;
+ if (p->afp_v.v6_map_fd == -1) {
+ return 0;
+ }
+ SCLogDebug("add an IPv6");
+ struct flowv6_keys *keys[2];
+ keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[0] == NULL) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ return 0;
+ }
+ for (i = 0; i < 4; i++) {
+ keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+ keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
+ }
+ keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
+ keys[0]->port16[1] = GET_TCP_DST_PORT(p);
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+
+ if (IPV6_GET_NH(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
+ }
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ return 0;
+ }
+ for (i = 0; i < 4; i++) {
+ keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
+ keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
+ }
+ keys[1]->port16[0] = GET_TCP_DST_PORT(p);
+ keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ if (p->flow)
+ EBPFUpdateFlow(p->flow, p, NULL);
+ return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
+ }
+ return 0;
+}
+
+/**
+ * Bypass function for AF_PACKET capture in XDP mode
+ *
+ * This function creates two half flows in the map shared with the kernel
+ * to trigger bypass. This function is similar to AFPBypassCallback() but
+ * the bytes order is changed for some data due to the way we get the data
+ * in the XDP case.
+ *
+ * \param p the packet belonging to the flow to bypass
+ * \return 0 if unable to bypass, 1 if success
+ */
+static int AFPXDPBypassCallback(Packet *p)
+{
+ SCLogDebug("Calling af_packet callback function");
+ /* Only bypass TCP and UDP */
+ if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
+ return 0;
+ }
+
+ /* If we don't have a flow attached to packet the eBPF map entries
+ * will be destroyed at first flow bypass manager pass as we won't
+ * find any associated entry */
+ if (p->flow == NULL) {
+ return 0;
+ }
+ /* Bypassing tunneled packets is currently not supported
+ * because we can't discard the inner packet only due to
+ * primitive parsing in eBPF */
+ if (IS_TUNNEL_PKT(p)) {
+ return 0;
+ }
+ if (PKT_IS_IPV4(p)) {
+ struct flowv4_keys *keys[2];
+ keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[0] == NULL) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ return 0;
+ }
+ if (p->afp_v.v4_map_fd == -1) {
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[0]->src = p->src.addr_data32[0];
+ keys[0]->dst = p->dst.addr_data32[0];
+ /* In the XDP filter we get port from parsing of packet and not from skb
+ * (as in eBPF filter) so we need to pass from host to network order */
+ keys[0]->port16[0] = htons(p->sp);
+ keys[0]->port16[1] = htons(p->dp);
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+ if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
+ }
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]->src = p->dst.addr_data32[0];
+ keys[1]->dst = p->src.addr_data32[0];
+ keys[1]->port16[0] = htons(p->dp);
+ keys[1]->port16[1] = htons(p->sp);
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
+ }
+ /* For IPv6 case we don't handle extended header in eBPF */
+ if (PKT_IS_IPV6(p) &&
+ ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
+ SCLogDebug("add an IPv6");
+ if (p->afp_v.v6_map_fd == -1) {
+ return 0;
+ }
+ int i;
+ struct flowv6_keys *keys[2];
+ keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[0] == NULL) {
+ return 0;
+ }
+
+ for (i = 0; i < 4; i++) {
+ keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
+ keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
+ }
+ keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
+ keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
+ keys[0]->vlan0 = p->vlan_id[0];
+ keys[0]->vlan1 = p->vlan_id[1];
+ if (IPV6_GET_NH(p) == IPPROTO_TCP) {
+ keys[0]->ip_proto = 1;
+ } else {
+ keys[0]->ip_proto = 0;
+ }
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
+ p->afp_v.nr_cpus) == 0) {
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ return 0;
+ }
+ keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
+ if (keys[1] == NULL) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ return 0;
+ }
+ for (i = 0; i < 4; i++) {
+ keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
+ keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
+ }
+ keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
+ keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
+ keys[1]->vlan0 = p->vlan_id[0];
+ keys[1]->vlan1 = p->vlan_id[1];
+ keys[1]->ip_proto = keys[0]->ip_proto;
+ if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
+ p->afp_v.nr_cpus) == 0) {
+ EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
+ LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
+ SCFree(keys[0]);
+ SCFree(keys[1]);
+ return 0;
+ }
+ return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
+ }
+ return 0;
+}
+
+bool g_flowv4_ok = true;
+bool g_flowv6_ok = true;
+
+#endif /* HAVE_PACKET_EBPF */
+
/**
* \brief Init function for ReceiveAFP.
*
memset(ptv, 0, sizeof(AFPThreadVars));
ptv->tv = tv;
- ptv->cooked = 0;
strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
ptv->buffer_size = afpconfig->buffer_size;
ptv->ring_size = afpconfig->ring_size;
ptv->block_size = afpconfig->block_size;
+ ptv->block_timeout = afpconfig->block_timeout;
ptv->promisc = afpconfig->promisc;
ptv->checksum_mode = afpconfig->checksum_mode;
if (afpconfig->bpf_filter) {
ptv->bpf_filter = afpconfig->bpf_filter;
}
+#ifdef HAVE_PACKET_EBPF
+ ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
+ ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
+ ptv->xdp_mode = afpconfig->xdp_mode;
+ ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
+
+ if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
+ ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
+ if (ptv->v4_map_fd == -1) {
+ if (g_flowv4_ok == false) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
+ "flow_table_v4");
+ g_flowv4_ok = true;
+ }
+ }
+ ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
+ if (ptv->v6_map_fd == -1) {
+ if (g_flowv6_ok) {
+ SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
+ "flow_table_v6");
+ g_flowv6_ok = false;
+ }
+ }
+ }
+ ptv->ebpf_t_config = afpconfig->ebpf_t_config;
+#endif
#ifdef PACKET_STATISTICS
ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
ptv->tv);
ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
ptv->tv);
+ ptv->capture_errors = StatsRegisterCounter("capture.errors",
+ ptv->tv);
+
+ ptv->afpacket_spin = StatsRegisterAvgCounter("capture.afpacket.busy_loop_avg", ptv->tv);
+
+ ptv->capture_afp_poll = StatsRegisterCounter("capture.afpacket.polls", ptv->tv);
+ ptv->capture_afp_poll_signal = StatsRegisterCounter("capture.afpacket.poll_signal", ptv->tv);
+ ptv->capture_afp_poll_timeout = StatsRegisterCounter("capture.afpacket.poll_timeout", ptv->tv);
+ ptv->capture_afp_poll_data = StatsRegisterCounter("capture.afpacket.poll_data", ptv->tv);
+ ptv->capture_afp_poll_err = StatsRegisterCounter("capture.afpacket.poll_errors", ptv->tv);
#endif
ptv->copy_mode = afpconfig->copy_mode;
SCReturnInt(TM_ECODE_FAILED);
}
-#define T_DATA_SIZE 70000
- ptv->data = SCMalloc(T_DATA_SIZE);
- if (ptv->data == NULL) {
- afpconfig->DerefFunc(afpconfig);
- SCFree(ptv);
- SCReturnInt(TM_ECODE_FAILED);
- }
- ptv->datalen = T_DATA_SIZE;
-#undef T_DATA_SIZE
-
*data = (void *)ptv;
afpconfig->DerefFunc(afpconfig);
- /* A bit strange to have this here but we only have vlan information
- * during reading so we need to know if we want to keep vlan during
- * the capture phase */
- int vlanbool = 0;
- if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
- ptv->flags |= AFP_VLAN_DISABLED;
- }
-
/* If kernel is older than 3.0, VLAN is not stripped so we don't
* get the info from packet extended header but we will use a standard
* parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
- if (! SCKernelVersionIsAtLeast(3, 0)) {
- ptv->flags |= AFP_VLAN_DISABLED;
+ if (SCKernelVersionIsAtLeast(3, 0)) {
+ ptv->flags |= AFP_VLAN_IN_HEADER;
}
SCReturnInt(TM_ECODE_OK);
AFPSwitchState(ptv, AFP_STATE_DOWN);
- if (ptv->data != NULL) {
- SCFree(ptv->data);
- ptv->data = NULL;
+#ifdef HAVE_PACKET_XDP
+ if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
+ (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
+ EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
}
- ptv->datalen = 0;
+#endif
ptv->bpf_filter = NULL;
- if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring_v3) {
- SCFree(ptv->ring_v3);
+ if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
+ SCFree(ptv->ring.v3);
} else {
- if (ptv->ring_v2)
- SCFree(ptv->ring_v2);
+ if (ptv->ring.v2)
+ SCFree(ptv->ring.v2);
}
SCFree(ptv);
SCReturnInt(TM_ECODE_OK);
}
+/** \internal
+ * \brief add a VLAN header into the raw data for inspection, logging
+ * and sending out in IPS mode
+ *
+ * The kernel doesn't provide the first VLAN header the raw packet data,
+ * but instead feeds it to us through meta data. For logging and IPS
+ * we need to put it back into the raw data. Luckily there is some head
+ * room in the original data so its enough to move the ethernet header
+ * a bit to make space for the VLAN header.
+ */
+static void UpdateRawDataForVLANHdr(Packet *p)
+{
+ if (p->afp_v.vlan_tci != 0) {
+ uint8_t *pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
+ size_t plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
+ /* move ethernet addresses */
+ memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
+ /* write vlan info */
+ *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
+ *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(p->afp_v.vlan_tci);
+
+ /* update the packet raw data pointer to start at the new offset */
+ (void)PacketSetData(p, pstart, plen);
+ /* update ethernet header pointer to point to the new start of the data */
+ p->ethh = (void *)pstart;
+ }
+}
+
/**
* \brief This function passes off to link type decoders.
*
- * DecodeAFP reads packets from the PacketQueue and passes
+ * DecodeAFP decodes packets from AF_PACKET and passes
* them off to the proper link type decoder.
*
* \param t pointer to ThreadVars
* \param p pointer to the current packet
* \param data pointer that gets cast into AFPThreadVars for ptv
- * \param pq pointer to the current PacketQueue
*/
-TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
+TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
{
SCEnter();
+
+ const bool afp_vlan_hdr = p->vlan_idx != 0;
DecodeThreadVars *dtv = (DecodeThreadVars *)data;
- /* XXX HACK: flow timeout can call us for injected pseudo packets
- * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
- if (p->flags & PKT_PSEUDO_STREAM_END)
- return TM_ECODE_OK;
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
/* update counters */
DecodeUpdatePacketCounters(tv, dtv, p);
- /* If suri has set vlan during reading, we increase vlan counter */
- if (p->vlan_idx) {
- StatsIncr(tv, dtv->counter_vlan);
- }
-
/* call the decoder */
- switch (p->datalink) {
- case LINKTYPE_ETHERNET:
- DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_LINUX_SLL:
- DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_PPP:
- DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_RAW:
- DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- case LINKTYPE_NULL:
- DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
- break;
- default:
- SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
- break;
+ DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
+ /* post-decoding put vlan hdr back into the raw data) */
+ if (afp_vlan_hdr) {
+ StatsIncr(tv, dtv->counter_vlan);
+ UpdateRawDataForVLANHdr(p);
}
PacketDecodeFinalize(tv, dtv, p);
TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
{
SCEnter();
- DecodeThreadVars *dtv = NULL;
-
- dtv = DecodeThreadVarsAlloc(tv);
-
+ DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
if (dtv == NULL)
SCReturnInt(TM_ECODE_FAILED);