#include "source-af-packet.h"
#include "runmodes.h"
#include "flow-storage.h"
+#include "util-validate.h"
#ifdef HAVE_AF_PACKET
#endif
#ifndef TP_STATUS_USER_BUSY
-/* for new use latest bit available in tp_status */
-#define TP_STATUS_USER_BUSY BIT_U32(31)
+/* HACK special setting in the tp_status field for frames we are
+ * still working on. This can happen in autofp mode where the
+ * capture thread goes around the ring and finds a frame that still
+ * hasn't been released by a worker thread.
+ *
+ * We use bits 29, 30, 31. 29 and 31 are software and hardware
+ * timestamps. 30 should not be set by the kernel at all. Combined
+ * they should never be set on the rx-ring together.
+ *
+ * The excessive casting is for handling the fact that the kernel
+ * defines almost all of these as int flags, not unsigned ints. */
+#define TP_STATUS_USER_BUSY \
+ (uint32_t)((uint32_t)TP_STATUS_TS_SOFTWARE | (uint32_t)TP_STATUS_TS_SYS_HARDWARE | \
+ (uint32_t)TP_STATUS_TS_RAW_HARDWARE)
#endif
+#define FRAME_BUSY(tp_status) \
+ (((uint32_t)(tp_status) & (uint32_t)TP_STATUS_USER_BUSY) == (uint32_t)TP_STATUS_USER_BUSY)
enum {
AFP_READ_OK,
void *raw;
};
+#ifdef HAVE_PACKET_EBPF
static int AFPBypassCallback(Packet *p);
static int AFPXDPBypassCallback(Packet *p);
+#endif
#define MAX_MAPS 32
/**
uint16_t capture_kernel_drops;
uint16_t capture_errors;
uint16_t afpacket_spin;
+ uint16_t capture_afp_poll;
+ uint16_t capture_afp_poll_signal;
+ uint16_t capture_afp_poll_timeout;
+ uint16_t capture_afp_poll_data;
+ uint16_t capture_afp_poll_err;
/* handle state */
uint8_t afp_state;
/* IPS peer */
AFPPeer *mpeer;
- /* no mmap mode */
- uint8_t *data; /** Per function and thread data */
- int datalen; /** Length of per function and thread data */
- int cooked;
-
/*
* Init related members
*/
int buffer_size;
/* Filter */
const char *bpf_filter;
- int ebpf_lb_fd;
- int ebpf_filter_fd;
int promisc;
unsigned int ring_buflen;
uint8_t *ring_buf;
- uint8_t xdp_mode;
-
#ifdef HAVE_PACKET_EBPF
+ uint8_t xdp_mode;
+ int ebpf_lb_fd;
+ int ebpf_filter_fd;
struct ebpf_timeout_config ebpf_t_config;
#endif
}
-
/**
* \defgroup afppeers AFP peers list
*
(void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
(void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
(void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
+ (void)SC_ATOMIC_SET(ptv->mpeer->send_errors, 0);
}
/**
#endif
}
-/**
- * \brief AF packet read function.
- *
- * This function fills
- * From here the packets are picked up by the DecodeAFP thread.
- *
- * \param user pointer to AFPThreadVars
- * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
- */
-static int AFPRead(AFPThreadVars *ptv)
-{
- Packet *p = NULL;
- /* XXX should try to use read that get directly to packet */
- int offset = 0;
- int caplen;
- struct sockaddr_ll from;
- struct iovec iov;
- struct msghdr msg;
- struct cmsghdr *cmsg;
- union {
- struct cmsghdr cmsg;
- char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
- } cmsg_buf;
- unsigned char aux_checksum = 0;
-
- msg.msg_name = &from;
- msg.msg_namelen = sizeof(from);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- msg.msg_flags = 0;
-
- if (ptv->cooked)
- offset = SLL_HEADER_LEN;
- else
- offset = 0;
- iov.iov_len = ptv->datalen - offset;
- iov.iov_base = ptv->data + offset;
-
- caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
-
- if (caplen < 0) {
- SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
- errno);
- SCReturnInt(AFP_READ_FAILURE);
- }
-
- p = PacketGetFromQueueOrAlloc();
- if (p == NULL) {
- SCReturnInt(AFP_SURI_FAILURE);
- }
- PKT_SET_SRC(p, PKT_SRC_WIRE);
- if (ptv->flags & AFP_BYPASS) {
- p->BypassPacketsFlow = AFPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
- if (ptv->flags & AFP_XDPBYPASS) {
- p->BypassPacketsFlow = AFPXDPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
- p->afp_v.v4_map_fd = ptv->v4_map_fd;
- p->afp_v.v6_map_fd = ptv->v6_map_fd;
- p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
- }
-
- /* get timestamp of packet via ioctl */
- if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
- SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
- errno);
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_READ_FAILURE);
- }
-
- ptv->pkts++;
- p->livedev = ptv->livedev;
-
- /* add forged header */
- if (ptv->cooked) {
- SllHdr * hdrp = (SllHdr *)ptv->data;
- /* XXX this is minimalist, but this seems enough */
- hdrp->sll_protocol = from.sll_protocol;
- }
-
- p->datalink = ptv->datalink;
- SET_PKT_LEN(p, caplen + offset);
- if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
- GET_PKT_LEN(p), p, GET_PKT_DATA(p));
-
- /* We only check for checksum disable */
- if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
- if (ChecksumAutoModeCheck(ptv->pkts,
- SC_ATOMIC_GET(ptv->livedev->pkts),
- SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
- ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- } else {
- aux_checksum = 1;
- }
-
- /* List is NULL if we don't have activated auxiliary data */
- for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
- struct tpacket_auxdata *aux;
-
- if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
- cmsg->cmsg_level != SOL_PACKET ||
- cmsg->cmsg_type != PACKET_AUXDATA)
- continue;
-
- aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
-
- if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
- p->flags |= PKT_IGNORE_CHECKSUM;
- }
- break;
- }
-
- if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
- SCReturnInt(AFP_SURI_FAILURE);
- }
- SCReturnInt(AFP_READ_OK);
-}
-
/**
* \brief AF packet write function.
*
* \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
*
*/
-static TmEcode AFPWritePacket(Packet *p, int version)
+static void AFPWritePacket(Packet *p, int version)
{
struct sockaddr_ll socket_address;
int socket;
- uint8_t *pstart;
- size_t plen;
- union thdr h;
- uint16_t vlan_tci = 0;
if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
if (PacketTestAction(p, ACTION_DROP)) {
- return TM_ECODE_OK;
+ return;
}
}
- if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
- return TM_ECODE_OK;
-
if (p->ethh == NULL) {
SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
- return TM_ECODE_FAILED;
+ return;
}
+
/* Index of the network device */
socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
/* Address length*/
SCMutexLock(&p->afp_v.peer->sock_protect);
socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
- h.raw = p->afp_v.relptr;
-
- if (version == TPACKET_V2) {
- /* Copy VLAN header from ring memory. For post june 2011 kernel we test
- * the flag. It is not defined for older kernel so we go best effort
- * and test for non zero value of the TCI header. */
- if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
- vlan_tci = h.h2->tp_vlan_tci;
- }
- } else {
-#ifdef HAVE_TPACKET_V3
- if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
- vlan_tci = h.h3->hv1.tp_vlan_tci;
+ if (sendto(socket, GET_PKT_DATA(p), GET_PKT_LEN(p), 0, (struct sockaddr *)&socket_address,
+ sizeof(struct sockaddr_ll)) < 0) {
+ if (SC_ATOMIC_ADD(p->afp_v.peer->send_errors, 1) == 0) {
+ SCLogWarning(SC_ERR_SOCKET, "sending packet failed on socket %d: %s", socket,
+ strerror(errno));
}
-#else
- /* Should not get here */
- BUG_ON(1);
-#endif
- }
-
- if (vlan_tci != 0) {
- pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
- plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
- /* move ethernet addresses */
- memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
- /* write vlan info */
- *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
- *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
- } else {
- pstart = GET_PKT_DATA(p);
- plen = GET_PKT_LEN(p);
- }
-
- if (sendto(socket, pstart, plen, 0,
- (struct sockaddr*) &socket_address,
- sizeof(struct sockaddr_ll)) < 0) {
- SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
- socket,
- strerror(errno));
- if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
- SCMutexUnlock(&p->afp_v.peer->sock_protect);
- return TM_ECODE_FAILED;
}
if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
SCMutexUnlock(&p->afp_v.peer->sock_protect);
-
- return TM_ECODE_OK;
}
static void AFPReleaseDataFromRing(Packet *p)
{
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
+
/* Need to be in copy mode and need to detect early release
where Ethernet header could not be set (and pseudo packet) */
- if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
AFPWritePacket(p, TPACKET_V2);
}
- if (AFPDerefSocket(p->afp_v.mpeer) == 0)
- goto cleanup;
+ BUG_ON(p->afp_v.relptr == NULL);
- if (p->afp_v.relptr) {
- union thdr h;
- h.raw = p->afp_v.relptr;
- h.h2->tp_status = TP_STATUS_KERNEL;
- }
+ union thdr h;
+ h.raw = p->afp_v.relptr;
+ h.h2->tp_status = TP_STATUS_KERNEL;
+
+ (void)AFPDerefSocket(p->afp_v.mpeer);
-cleanup:
AFPV_CLEANUP(&p->afp_v);
}
#ifdef HAVE_TPACKET_V3
static void AFPReleasePacketV3(Packet *p)
{
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
+
/* Need to be in copy mode and need to detect early release
where Ethernet header could not be set (and pseudo packet) */
- if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
+ if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
AFPWritePacket(p, TPACKET_V3);
}
PacketFreeOrRelease(p);
static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p)
{
+#ifdef HAVE_PACKET_EBPF
if (ptv->flags & AFP_BYPASS) {
p->BypassPacketsFlow = AFPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
p->afp_v.v4_map_fd = ptv->v4_map_fd;
p->afp_v.v6_map_fd = ptv->v6_map_fd;
p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
}
if (ptv->flags & AFP_XDPBYPASS) {
p->BypassPacketsFlow = AFPXDPBypassCallback;
-#ifdef HAVE_PACKET_EBPF
p->afp_v.v4_map_fd = ptv->v4_map_fd;
p->afp_v.v6_map_fd = ptv->v6_map_fd;
p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
-#endif
}
+#endif
}
/** \internal
* \brief setup packet for AFPReadFromRing
*/
-static bool AFPReadFromRingSetupPacket(
+static void AFPReadFromRingSetupPacket(
AFPThreadVars *ptv, union thdr h, const unsigned int tp_status, Packet *p)
{
PKT_SET_SRC(p, PKT_SRC_WIRE);
- /* Suricata will treat packet so telling it is busy, this
- * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
- * function. */
+ /* flag the packet as TP_STATUS_USER_BUSY, which is ignore by the kernel, but
+ * acts as an indicator that we've reached a frame that is not yet released by
+ * us in autofp mode. It will be cleared when the frame gets released to the kernel. */
h.h2->tp_status |= TP_STATUS_USER_BUSY;
p->livedev = ptv->livedev;
p->datalink = ptv->datalink;
(tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
p->vlan_idx = 1;
+ p->afp_v.vlan_tci = h.h2->tp_vlan_tci;
}
- if (ptv->flags & AFP_ZERO_COPY) {
- if (PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
- return false;
- }
+ (void)PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen);
- p->afp_v.relptr = h.raw;
- p->ReleasePacket = AFPReleasePacket;
+ p->ReleasePacket = AFPReleasePacket;
+ p->afp_v.relptr = h.raw;
+ if (ptv->flags & AFP_NEED_PEER) {
p->afp_v.mpeer = ptv->mpeer;
AFPRefSocket(ptv->mpeer);
-
- p->afp_v.copy_mode = ptv->copy_mode;
- if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
- p->afp_v.peer = ptv->mpeer->peer;
- } else {
- p->afp_v.peer = NULL;
- }
} else {
- if (PacketCopyData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
- return false;
- }
+ p->afp_v.mpeer = NULL;
}
+ p->afp_v.copy_mode = ptv->copy_mode;
+ p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
+
/* Timestamp */
p->ts.tv_sec = h.h2->tp_sec;
p->ts.tv_usec = h.h2->tp_nsec / 1000;
p->flags |= PKT_IGNORE_CHECKSUM;
}
}
- return true;
}
static inline int AFPReadFromRingWaitForPacket(AFPThreadVars *ptv)
break;
}
/* if in autofp mode the frame is still busy, return to poll */
- if (unlikely(tp_status & TP_STATUS_USER_BUSY)) {
+ if (unlikely(FRAME_BUSY(tp_status))) {
break;
}
emergency_flush |= ((tp_status & TP_STATUS_LOSING) != 0);
if (p == NULL) {
return AFPSuriFailure(ptv, h);
}
- if (AFPReadFromRingSetupPacket(ptv, h, tp_status, p) == false) {
- TmqhOutputPacketpool(ptv->tv, p);
- return AFPSuriFailure(ptv, h);
- }
- /* release frame if not in zero copy mode */
- if (!(ptv->flags & AFP_ZERO_COPY)) {
- h.h2->tp_status = TP_STATUS_KERNEL;
- }
+ AFPReadFromRingSetupPacket(ptv, h, tp_status, p);
if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
return AFPSuriFailure(ptv, h);
(ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
p->vlan_idx = 1;
+ p->afp_v.vlan_tci = ppd->hv1.tp_vlan_tci;
}
- if (ptv->flags & AFP_ZERO_COPY) {
- if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- p->afp_v.relptr = ppd;
- p->ReleasePacket = AFPReleasePacketV3;
- p->afp_v.mpeer = ptv->mpeer;
- AFPRefSocket(ptv->mpeer);
+ (void)PacketSetData(p, (unsigned char *)ppd + ppd->tp_mac, ppd->tp_snaplen);
+
+ p->ReleasePacket = AFPReleasePacketV3;
+ p->afp_v.relptr = NULL;
+ p->afp_v.mpeer = NULL;
+ p->afp_v.copy_mode = ptv->copy_mode;
+ p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
- p->afp_v.copy_mode = ptv->copy_mode;
- if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
- p->afp_v.peer = ptv->mpeer->peer;
- } else {
- p->afp_v.peer = NULL;
- }
- } else {
- if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
- TmqhOutputPacketpool(ptv->tv, p);
- SCReturnInt(AFP_SURI_FAILURE);
- }
- }
/* Timestamp */
p->ts.tv_sec = ppd->tp_sec;
p->ts.tv_usec = ppd->tp_nsec/1000;
static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
{
- int num_pkts = pbd->hdr.bh1.num_pkts, i;
- uint8_t *ppd;
- int ret = 0;
-
- ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
- for (i = 0; i < num_pkts; ++i) {
- ret = AFPParsePacketV3(ptv, pbd,
- (struct tpacket3_hdr *)ppd);
+ const int num_pkts = pbd->hdr.bh1.num_pkts;
+ uint8_t *ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
+
+ for (int i = 0; i < num_pkts; ++i) {
+ int ret = AFPParsePacketV3(ptv, pbd, (struct tpacket3_hdr *)ppd);
switch (ret) {
case AFP_READ_OK:
break;
static int AFPReadFromRingV3(AFPThreadVars *ptv)
{
#ifdef HAVE_TPACKET_V3
- struct tpacket_block_desc *pbd;
- int ret = 0;
-
/* Loop till we have packets available */
while (1) {
if (unlikely(suricata_ctl_flags != 0)) {
break;
}
- pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
+ struct tpacket_block_desc *pbd =
+ (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
/* block is not ready to be read */
if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
SCReturnInt(AFP_READ_OK);
}
- ret = AFPWalkBlock(ptv, pbd);
+ int ret = AFPWalkBlock(ptv, pbd);
if (unlikely(ret != AFP_READ_OK)) {
AFPFlushBlock(pbd);
SCReturnInt(ret);
return 1;
if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 1) {
- if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
- SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
- close(SC_ATOMIC_GET(peer->socket));
- return 0;
- }
+ return 0;
}
return 1;
}
-static void AFPSwitchState(AFPThreadVars *ptv, int state)
+static void AFPCloseSocket(AFPThreadVars *ptv)
{
- ptv->afp_state = state;
- ptv->down_count = 0;
-
- AFPPeerUpdate(ptv);
+ if (ptv->mpeer != NULL)
+ BUG_ON(SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0);
- /* Do cleaning if switching to down state */
- if (state == AFP_STATE_DOWN) {
-#ifdef HAVE_TPACKET_V3
- if (ptv->flags & AFP_TPACKET_V3) {
- if (!ptv->ring.v3) {
- SCFree(ptv->ring.v3);
- ptv->ring.v3 = NULL;
- }
- } else {
-#endif
- if (ptv->ring.v2) {
- /* only used in reading phase, we can free it */
- SCFree(ptv->ring.v2);
- ptv->ring.v2 = NULL;
- }
+ if (ptv->flags & AFP_TPACKET_V3) {
#ifdef HAVE_TPACKET_V3
+ if (ptv->ring.v3) {
+ SCFree(ptv->ring.v3);
+ ptv->ring.v3 = NULL;
}
#endif
- if (ptv->socket != -1) {
- /* we need to wait for all packets to return data */
- if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 1) {
- SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
- munmap(ptv->ring_buf, ptv->ring_buflen);
- close(ptv->socket);
- ptv->socket = -1;
- }
+ } else {
+ if (ptv->ring.v2) {
+ /* only used in reading phase, we can free it */
+ SCFree(ptv->ring.v2);
+ ptv->ring.v2 = NULL;
}
}
- if (state == AFP_STATE_UP) {
- (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
+ if (ptv->socket != -1) {
+ SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
+ munmap(ptv->ring_buf, ptv->ring_buflen);
+ close(ptv->socket);
+ ptv->socket = -1;
}
}
-static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
- uint64_t *discarded_pkts)
+static void AFPSwitchState(AFPThreadVars *ptv, int state)
{
- struct sockaddr_ll from;
- struct iovec iov;
- struct msghdr msg;
- struct timeval ts;
- union {
- struct cmsghdr cmsg;
- char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
- } cmsg_buf;
-
-
- if (unlikely(suricata_ctl_flags != 0)) {
- return 1;
- }
-
- msg.msg_name = &from;
- msg.msg_namelen = sizeof(from);
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- msg.msg_control = &cmsg_buf;
- msg.msg_controllen = sizeof(cmsg_buf);
- msg.msg_flags = 0;
-
- iov.iov_len = ptv->datalen;
- iov.iov_base = ptv->data;
-
- (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
+ ptv->afp_state = state;
+ ptv->down_count = 0;
- if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
- /* FIXME */
- return -1;
+ if (state == AFP_STATE_DOWN) {
+ /* cleanup is done on thread cleanup or try reopen
+ * as there may still be packets in autofp that
+ * are referencing us */
+ (void)SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1);
}
-
- if ((ts.tv_sec > synctv->tv_sec) ||
- (ts.tv_sec >= synctv->tv_sec &&
- ts.tv_usec > synctv->tv_usec)) {
- return 1;
+ if (state == AFP_STATE_UP) {
+ AFPPeerUpdate(ptv);
+ (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
}
- return 0;
}
static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
uint64_t *discarded_pkts)
{
- union thdr h;
-
if (unlikely(suricata_ctl_flags != 0)) {
return 1;
}
#ifdef HAVE_TPACKET_V3
if (ptv->flags & AFP_TPACKET_V3) {
int ret = 0;
- struct tpacket_block_desc *pbd;
- pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
+ struct tpacket_block_desc *pbd =
+ (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
*discarded_pkts += pbd->hdr.bh1.num_pkts;
struct tpacket3_hdr *ppd =
(struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
#endif
{
/* Read packet from ring */
+ union thdr h;
h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
if (h.raw == NULL) {
return -1;
}
}
-
return 0;
}
if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
gettimeofday(&synctv, NULL);
}
- if (ptv->flags & AFP_RING_MODE) {
- r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
- } else {
- r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
- }
+ r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
SCLogDebug("Discarding on %s", ptv->tv->name);
switch (r) {
case 1:
return -1;
}
+ /* ref cnt 0, we can close the old socket */
+ AFPCloseSocket(ptv);
+
int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
if (afp_activate_r != 0) {
if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
ptv->slot = s->slot_next;
- if (ptv->flags & AFP_RING_MODE) {
- if (ptv->flags & AFP_TPACKET_V3) {
- AFPReadFunc = AFPReadFromRingV3;
- } else {
- AFPReadFunc = AFPReadFromRing;
- }
+ if (ptv->flags & AFP_TPACKET_V3) {
+ AFPReadFunc = AFPReadFromRingV3;
} else {
- AFPReadFunc = AFPRead;
+ AFPReadFunc = AFPReadFromRing;
}
if (ptv->afp_state == AFP_STATE_DOWN) {
* us from alloc'ing packets at line rate */
PacketPoolWait();
+ StatsIncr(ptv->tv, ptv->capture_afp_poll);
+
r = poll(&fds, 1, POLL_TIMEOUT);
if (suricata_ctl_flags != 0) {
if (r > 0 &&
(fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_signal);
if (fds.revents & (POLLHUP | POLLRDHUP)) {
AFPSwitchState(ptv, AFP_STATE_DOWN);
continue;
continue;
}
} else if (r > 0) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_data);
r = AFPReadFunc(ptv);
switch (r) {
case AFP_READ_OK:
break;
}
} else if (unlikely(r == 0)) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_timeout);
/* Trigger one dump of stats every second */
current_time = time(NULL);
if (current_time != last_dump) {
TmThreadsCaptureHandleTimeout(tv, NULL);
} else if ((r < 0) && (errno != EINTR)) {
+ StatsIncr(ptv->tv, ptv->capture_afp_poll_err);
SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
ptv->iface,
errno, strerror(errno));
}
#endif
- /* Let's reserve head room so we can add the VLAN header in IPS
- * or TAP mode before write the packet */
- if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
- /* Only one vlan is extracted from AFP header so
- * one VLAN header length is enough. */
- int reserve = VLAN_HEADER_LEN;
- if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
- sizeof(reserve)) < 0) {
- SCLogError(SC_ERR_AFP_CREATE,
- "Can't activate reserve on packet socket: %s",
- strerror(errno));
- return AFP_FATAL_ERROR;
- }
+ /* Reserve head room for a VLAN header. One vlan is extracted from AFP header
+ * so one VLAN header length is enough. */
+ int reserve = VLAN_HEADER_LEN;
+ if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *)&reserve, sizeof(reserve)) <
+ 0) {
+ SCLogError(
+ SC_ERR_AFP_CREATE, "Can't activate reserve on packet socket: %s", strerror(errno));
+ return AFP_FATAL_ERROR;
}
/* Allocate RX ring */
}
#endif
- if (ptv->flags & AFP_RING_MODE) {
- ret = AFPSetupRing(ptv, devname);
- if (ret != 0)
- goto socket_err;
- }
+ ret = AFPSetupRing(ptv, devname);
+ if (ret != 0)
+ goto socket_err;
SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
- switch (ptv->datalink) {
- case ARPHRD_PPP:
- case ARPHRD_ATM:
- ptv->cooked = 1;
- break;
- }
TmEcode rc = AFPSetBPFFilter(ptv);
if (rc == TM_ECODE_FAILED) {
return 1;
}
-#endif
-
/**
* Bypass function for AF_PACKET capture in eBPF mode
*
*/
static int AFPBypassCallback(Packet *p)
{
-#ifdef HAVE_PACKET_EBPF
SCLogDebug("Calling af_packet callback function");
/* Only bypass TCP and UDP */
if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
EBPFUpdateFlow(p->flow, p, NULL);
return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
}
-#endif
return 0;
}
*/
static int AFPXDPBypassCallback(Packet *p)
{
-#ifdef HAVE_PACKET_XDP
SCLogDebug("Calling af_packet callback function");
/* Only bypass TCP and UDP */
if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
}
return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
}
-#endif
return 0;
}
-
bool g_flowv4_ok = true;
bool g_flowv6_ok = true;
+#endif /* HAVE_PACKET_EBPF */
+
/**
* \brief Init function for ReceiveAFP.
*
memset(ptv, 0, sizeof(AFPThreadVars));
ptv->tv = tv;
- ptv->cooked = 0;
strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
if (afpconfig->bpf_filter) {
ptv->bpf_filter = afpconfig->bpf_filter;
}
+#ifdef HAVE_PACKET_EBPF
ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
ptv->xdp_mode = afpconfig->xdp_mode;
-#ifdef HAVE_PACKET_EBPF
ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
ptv->tv);
ptv->capture_errors = StatsRegisterCounter("capture.errors",
ptv->tv);
- ptv->afpacket_spin = StatsRegisterAvgCounter("afpacket.busy_loop_avg", ptv->tv);
+
+ ptv->afpacket_spin = StatsRegisterAvgCounter("capture.afpacket.busy_loop_avg", ptv->tv);
+
+ ptv->capture_afp_poll = StatsRegisterCounter("capture.afpacket.polls", ptv->tv);
+ ptv->capture_afp_poll_signal = StatsRegisterCounter("capture.afpacket.poll_signal", ptv->tv);
+ ptv->capture_afp_poll_timeout = StatsRegisterCounter("capture.afpacket.poll_timeout", ptv->tv);
+ ptv->capture_afp_poll_data = StatsRegisterCounter("capture.afpacket.poll_data", ptv->tv);
+ ptv->capture_afp_poll_err = StatsRegisterCounter("capture.afpacket.poll_errors", ptv->tv);
#endif
ptv->copy_mode = afpconfig->copy_mode;
SCReturnInt(TM_ECODE_FAILED);
}
-#define T_DATA_SIZE 70000
- ptv->data = SCMalloc(T_DATA_SIZE);
- if (ptv->data == NULL) {
- afpconfig->DerefFunc(afpconfig);
- SCFree(ptv);
- SCReturnInt(TM_ECODE_FAILED);
- }
- ptv->datalen = T_DATA_SIZE;
-#undef T_DATA_SIZE
-
*data = (void *)ptv;
afpconfig->DerefFunc(afpconfig);
EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
}
#endif
- if (ptv->data != NULL) {
- SCFree(ptv->data);
- ptv->data = NULL;
- }
- ptv->datalen = 0;
ptv->bpf_filter = NULL;
if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
SCReturnInt(TM_ECODE_OK);
}
+/** \internal
+ * \brief add a VLAN header into the raw data for inspection, logging
+ * and sending out in IPS mode
+ *
+ * The kernel doesn't provide the first VLAN header the raw packet data,
+ * but instead feeds it to us through meta data. For logging and IPS
+ * we need to put it back into the raw data. Luckily there is some head
+ * room in the original data so its enough to move the ethernet header
+ * a bit to make space for the VLAN header.
+ */
+static void UpdateRawDataForVLANHdr(Packet *p)
+{
+ if (p->afp_v.vlan_tci != 0) {
+ uint8_t *pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
+ size_t plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
+ /* move ethernet addresses */
+ memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
+ /* write vlan info */
+ *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
+ *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(p->afp_v.vlan_tci);
+
+ /* update the packet raw data pointer to start at the new offset */
+ (void)PacketSetData(p, pstart, plen);
+ /* update ethernet header pointer to point to the new start of the data */
+ p->ethh = (void *)pstart;
+ }
+}
+
/**
* \brief This function passes off to link type decoders.
*
TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
{
SCEnter();
+
+ const bool afp_vlan_hdr = p->vlan_idx != 0;
DecodeThreadVars *dtv = (DecodeThreadVars *)data;
- BUG_ON(PKT_IS_PSEUDOPKT(p));
+ DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
/* update counters */
DecodeUpdatePacketCounters(tv, dtv, p);
- /* If suri has set vlan during reading, we increase vlan counter */
- if (p->vlan_idx) {
- StatsIncr(tv, dtv->counter_vlan);
- }
-
/* call the decoder */
DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
+ /* post-decoding put vlan hdr back into the raw data) */
+ if (afp_vlan_hdr) {
+ StatsIncr(tv, dtv->counter_vlan);
+ UpdateRawDataForVLANHdr(p);
+ }
PacketDecodeFinalize(tv, dtv, p);
TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
{
SCEnter();
- DecodeThreadVars *dtv = NULL;
-
- dtv = DecodeThreadVarsAlloc(tv);
-
+ DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
if (dtv == NULL)
SCReturnInt(TM_ECODE_FAILED);