1 /* Copyright (C) 2011-2017 Open Information Security Foundation
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * \defgroup afppacket AF_PACKET running mode
27 * \author Eric Leblond <eric@regit.org>
29 * AF_PACKET socket acquisition support
31 * \todo watch other interface event to detect suppression of the monitored
35 #define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
36 #define SC_PCAP_DONT_INCLUDE_PCAP_H 1
37 #include "suricata-common.h"
41 #include "packet-queue.h"
43 #include "threadvars.h"
44 #include "tm-queuehandlers.h"
45 #include "tm-modules.h"
46 #include "tm-threads.h"
47 #include "tm-threads-common.h"
50 #include "util-debug.h"
51 #include "util-device.h"
52 #include "util-ebpf.h"
53 #include "util-error.h"
54 #include "util-privs.h"
55 #include "util-optimize.h"
56 #include "util-checksum.h"
57 #include "util-ioctl.h"
58 #include "util-host-info.h"
59 #include "tmqh-packetpool.h"
60 #include "source-af-packet.h"
66 #include <sys/ioctl.h>
69 #ifdef HAVE_PACKET_EBPF
70 #include "util-ebpf.h"
71 #include <bpf/libbpf.h>
77 struct bpf_insn
*bf_insns
;
84 #ifdef HAVE_PCAP_PCAP_H
85 #include <pcap/pcap.h>
88 #if HAVE_LINUX_IF_ETHER_H
89 #include <linux/if_ether.h>
92 #if HAVE_LINUX_IF_PACKET_H
93 #include <linux/if_packet.h>
96 #if HAVE_LINUX_IF_ARP_H
97 #include <linux/if_arp.h>
100 #if HAVE_LINUX_FILTER_H
101 #include <linux/filter.h>
105 #include <sys/mman.h>
108 #ifdef HAVE_HW_TIMESTAMPING
109 #include <linux/net_tstamp.h>
112 #endif /* HAVE_AF_PACKET */
114 extern int max_pending_packets
;
116 #ifndef HAVE_AF_PACKET
118 TmEcode
NoAFPSupportExit(ThreadVars
*, const void *, void **);
120 void TmModuleReceiveAFPRegister (void)
122 tmm_modules
[TMM_RECEIVEAFP
].name
= "ReceiveAFP";
123 tmm_modules
[TMM_RECEIVEAFP
].ThreadInit
= NoAFPSupportExit
;
124 tmm_modules
[TMM_RECEIVEAFP
].Func
= NULL
;
125 tmm_modules
[TMM_RECEIVEAFP
].ThreadExitPrintStats
= NULL
;
126 tmm_modules
[TMM_RECEIVEAFP
].ThreadDeinit
= NULL
;
127 tmm_modules
[TMM_RECEIVEAFP
].RegisterTests
= NULL
;
128 tmm_modules
[TMM_RECEIVEAFP
].cap_flags
= 0;
129 tmm_modules
[TMM_RECEIVEAFP
].flags
= TM_FLAG_RECEIVE_TM
;
133 * \brief Registration Function for DecodeAFP.
134 * \todo Unit tests are needed for this module.
136 void TmModuleDecodeAFPRegister (void)
138 tmm_modules
[TMM_DECODEAFP
].name
= "DecodeAFP";
139 tmm_modules
[TMM_DECODEAFP
].ThreadInit
= NoAFPSupportExit
;
140 tmm_modules
[TMM_DECODEAFP
].Func
= NULL
;
141 tmm_modules
[TMM_DECODEAFP
].ThreadExitPrintStats
= NULL
;
142 tmm_modules
[TMM_DECODEAFP
].ThreadDeinit
= NULL
;
143 tmm_modules
[TMM_DECODEAFP
].RegisterTests
= NULL
;
144 tmm_modules
[TMM_DECODEAFP
].cap_flags
= 0;
145 tmm_modules
[TMM_DECODEAFP
].flags
= TM_FLAG_DECODE_TM
;
149 * \brief this function prints an error message and exits.
151 TmEcode
NoAFPSupportExit(ThreadVars
*tv
, const void *initdata
, void **data
)
153 SCLogError(SC_ERR_NO_AF_PACKET
,"Error creating thread %s: you do not have "
154 "support for AF_PACKET enabled, on Linux host please recompile "
155 "with --enable-af-packet", tv
->name
);
159 #else /* We have AF_PACKET support */
161 #define AFP_IFACE_NAME_LENGTH 48
163 #define AFP_STATE_DOWN 0
164 #define AFP_STATE_UP 1
166 #define AFP_RECONNECT_TIMEOUT 500000
167 #define AFP_DOWN_COUNTER_INTERVAL 40
169 #define POLL_TIMEOUT 100
171 #ifndef TP_STATUS_USER_BUSY
172 /* for new use latest bit available in tp_status */
173 #define TP_STATUS_USER_BUSY (1 << 31)
176 #ifndef TP_STATUS_VLAN_VALID
177 #define TP_STATUS_VLAN_VALID (1 << 4)
180 /** protect pfring_set_bpf_filter, as it is not thread safe */
181 static SCMutex afpacket_bpf_set_filter_lock
= SCMUTEX_INITIALIZER
;
192 AFP_RECOVERABLE_ERROR
,
196 struct tpacket2_hdr
*h2
;
197 #ifdef HAVE_TPACKET_V3
198 struct tpacket3_hdr
*h3
;
203 static int AFPBypassCallback(Packet
*p
);
204 static int AFPXDPBypassCallback(Packet
*p
);
208 * \brief Structure to hold thread specific variables.
210 typedef struct AFPThreadVars_
214 struct iovec
*ring_v3
;
223 /* data link type for the thread */
226 #ifdef HAVE_PACKET_EBPF
231 unsigned int frame_offset
;
233 ChecksumValidationMode checksum_mode
;
235 /* references to packet and drop counters */
236 uint16_t capture_kernel_packets
;
237 uint16_t capture_kernel_drops
;
248 uint8_t *data
; /** Per function and thread data */
249 int datalen
; /** Length of per function and thread data */
253 * Init related members
256 /* thread specific socket */
262 /* socket buffer size */
265 const char *bpf_filter
;
279 struct tpacket_req req
;
280 #ifdef HAVE_TPACKET_V3
281 struct tpacket_req3 req3
;
285 char iface
[AFP_IFACE_NAME_LENGTH
];
286 /* IPS output iface */
287 char out_iface
[AFP_IFACE_NAME_LENGTH
];
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen
;
295 int map_fd
[MAX_MAPS
];
299 TmEcode
ReceiveAFP(ThreadVars
*, Packet
*, void *, PacketQueue
*, PacketQueue
*);
300 TmEcode
ReceiveAFPThreadInit(ThreadVars
*, const void *, void **);
301 void ReceiveAFPThreadExitStats(ThreadVars
*, void *);
302 TmEcode
ReceiveAFPThreadDeinit(ThreadVars
*, void *);
303 TmEcode
ReceiveAFPLoop(ThreadVars
*tv
, void *data
, void *slot
);
305 TmEcode
DecodeAFPThreadInit(ThreadVars
*, const void *, void **);
306 TmEcode
DecodeAFPThreadDeinit(ThreadVars
*tv
, void *data
);
307 TmEcode
DecodeAFP(ThreadVars
*, Packet
*, void *, PacketQueue
*, PacketQueue
*);
309 TmEcode
AFPSetBPFFilter(AFPThreadVars
*ptv
);
310 static int AFPGetIfnumByDev(int fd
, const char *ifname
, int verbose
);
311 static int AFPGetDevFlags(int fd
, const char *ifname
);
312 static int AFPDerefSocket(AFPPeer
* peer
);
313 static int AFPRefSocket(AFPPeer
* peer
);
316 * \brief Registration Function for RecieveAFP.
317 * \todo Unit tests are needed for this module.
319 void TmModuleReceiveAFPRegister (void)
321 tmm_modules
[TMM_RECEIVEAFP
].name
= "ReceiveAFP";
322 tmm_modules
[TMM_RECEIVEAFP
].ThreadInit
= ReceiveAFPThreadInit
;
323 tmm_modules
[TMM_RECEIVEAFP
].Func
= NULL
;
324 tmm_modules
[TMM_RECEIVEAFP
].PktAcqLoop
= ReceiveAFPLoop
;
325 tmm_modules
[TMM_RECEIVEAFP
].PktAcqBreakLoop
= NULL
;
326 tmm_modules
[TMM_RECEIVEAFP
].ThreadExitPrintStats
= ReceiveAFPThreadExitStats
;
327 tmm_modules
[TMM_RECEIVEAFP
].ThreadDeinit
= ReceiveAFPThreadDeinit
;
328 tmm_modules
[TMM_RECEIVEAFP
].RegisterTests
= NULL
;
329 tmm_modules
[TMM_RECEIVEAFP
].cap_flags
= SC_CAP_NET_RAW
;
330 tmm_modules
[TMM_RECEIVEAFP
].flags
= TM_FLAG_RECEIVE_TM
;
335 * \defgroup afppeers AFP peers list
337 * AF_PACKET has an IPS mode were interface are peered: packet from
338 * on interface are sent the peered interface and the other way. The ::AFPPeer
339 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
340 * information to be able to send packet on the interface.
341 * A element of the list must not be destroyed during the run of Suricata as it
342 * is used by ::Packet and other threads.
347 typedef struct AFPPeersList_
{
348 TAILQ_HEAD(, AFPPeer_
) peers
; /**< Head of list of fragments. */
351 int turn
; /**< Next value for initialisation order */
352 SC_ATOMIC_DECLARE(int, reached
); /**< Counter used to synchronize start */
356 * \brief Update the peer.
358 * Update the AFPPeer of a thread ie set new state, socket number
362 static void AFPPeerUpdate(AFPThreadVars
*ptv
)
364 if (ptv
->mpeer
== NULL
) {
367 (void)SC_ATOMIC_SET(ptv
->mpeer
->if_idx
, AFPGetIfnumByDev(ptv
->socket
, ptv
->iface
, 0));
368 (void)SC_ATOMIC_SET(ptv
->mpeer
->socket
, ptv
->socket
);
369 (void)SC_ATOMIC_SET(ptv
->mpeer
->state
, ptv
->afp_state
);
373 * \brief Clean and free ressource used by an ::AFPPeer
375 static void AFPPeerClean(AFPPeer
*peer
)
377 if (peer
->flags
& AFP_SOCK_PROTECT
)
378 SCMutexDestroy(&peer
->sock_protect
);
379 SC_ATOMIC_DESTROY(peer
->socket
);
380 SC_ATOMIC_DESTROY(peer
->if_idx
);
381 SC_ATOMIC_DESTROY(peer
->state
);
385 AFPPeersList peerslist
;
389 * \brief Init the global list of ::AFPPeer
391 TmEcode
AFPPeersListInit()
394 TAILQ_INIT(&peerslist
.peers
);
395 peerslist
.peered
= 0;
398 SC_ATOMIC_INIT(peerslist
.reached
);
399 (void) SC_ATOMIC_SET(peerslist
.reached
, 0);
400 SCReturnInt(TM_ECODE_OK
);
404 * \brief Check that all ::AFPPeer got a peer
406 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
408 TmEcode
AFPPeersListCheck()
410 #define AFP_PEERS_MAX_TRY 4
411 #define AFP_PEERS_WAIT 20000
414 while (try < AFP_PEERS_MAX_TRY
) {
415 if (peerslist
.cnt
!= peerslist
.peered
) {
416 usleep(AFP_PEERS_WAIT
);
418 SCReturnInt(TM_ECODE_OK
);
422 SCLogError(SC_ERR_AFP_CREATE
, "Threads number not equals");
423 SCReturnInt(TM_ECODE_FAILED
);
427 * \brief Declare a new AFP thread to AFP peers list.
429 static TmEcode
AFPPeersListAdd(AFPThreadVars
*ptv
)
432 AFPPeer
*peer
= SCMalloc(sizeof(AFPPeer
));
436 if (unlikely(peer
== NULL
)) {
437 SCReturnInt(TM_ECODE_FAILED
);
439 memset(peer
, 0, sizeof(AFPPeer
));
440 SC_ATOMIC_INIT(peer
->socket
);
441 SC_ATOMIC_INIT(peer
->sock_usage
);
442 SC_ATOMIC_INIT(peer
->if_idx
);
443 SC_ATOMIC_INIT(peer
->state
);
444 peer
->flags
= ptv
->flags
;
445 peer
->turn
= peerslist
.turn
++;
447 if (peer
->flags
& AFP_SOCK_PROTECT
) {
448 SCMutexInit(&peer
->sock_protect
, NULL
);
451 (void)SC_ATOMIC_SET(peer
->sock_usage
, 0);
452 (void)SC_ATOMIC_SET(peer
->state
, AFP_STATE_DOWN
);
453 strlcpy(peer
->iface
, ptv
->iface
, AFP_IFACE_NAME_LENGTH
);
455 /* add element to iface list */
456 TAILQ_INSERT_TAIL(&peerslist
.peers
, peer
, next
);
458 if (ptv
->copy_mode
!= AFP_COPY_MODE_NONE
) {
461 /* Iter to find a peer */
462 TAILQ_FOREACH(pitem
, &peerslist
.peers
, next
) {
465 if (strcmp(pitem
->iface
, ptv
->out_iface
))
469 mtu
= GetIfaceMTU(ptv
->iface
);
470 out_mtu
= GetIfaceMTU(ptv
->out_iface
);
471 if (mtu
!= out_mtu
) {
472 SCLogError(SC_ERR_AFP_CREATE
,
473 "MTU on %s (%d) and %s (%d) are not equal, "
474 "transmission of packets bigger than %d will fail.",
476 ptv
->out_iface
, out_mtu
,
477 (out_mtu
> mtu
) ? mtu
: out_mtu
);
479 peerslist
.peered
+= 2;
486 SCReturnInt(TM_ECODE_OK
);
489 static int AFPPeersListWaitTurn(AFPPeer
*peer
)
491 /* If turn is zero, we already have started threads once */
492 if (peerslist
.turn
== 0)
495 if (peer
->turn
== SC_ATOMIC_GET(peerslist
.reached
))
500 static void AFPPeersListReachedInc(void)
502 if (peerslist
.turn
== 0)
505 if (SC_ATOMIC_ADD(peerslist
.reached
, 1) == peerslist
.turn
) {
506 SCLogInfo("All AFP capture threads are running.");
507 (void)SC_ATOMIC_SET(peerslist
.reached
, 0);
508 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
515 static int AFPPeersListStarted(void)
517 return !peerslist
.turn
;
521 * \brief Clean the global peers list.
523 void AFPPeersListClean()
527 while ((pitem
= TAILQ_FIRST(&peerslist
.peers
))) {
528 TAILQ_REMOVE(&peerslist
.peers
, pitem
, next
);
538 * \brief Registration Function for DecodeAFP.
539 * \todo Unit tests are needed for this module.
541 void TmModuleDecodeAFPRegister (void)
543 tmm_modules
[TMM_DECODEAFP
].name
= "DecodeAFP";
544 tmm_modules
[TMM_DECODEAFP
].ThreadInit
= DecodeAFPThreadInit
;
545 tmm_modules
[TMM_DECODEAFP
].Func
= DecodeAFP
;
546 tmm_modules
[TMM_DECODEAFP
].ThreadExitPrintStats
= NULL
;
547 tmm_modules
[TMM_DECODEAFP
].ThreadDeinit
= DecodeAFPThreadDeinit
;
548 tmm_modules
[TMM_DECODEAFP
].RegisterTests
= NULL
;
549 tmm_modules
[TMM_DECODEAFP
].cap_flags
= 0;
550 tmm_modules
[TMM_DECODEAFP
].flags
= TM_FLAG_DECODE_TM
;
554 static int AFPCreateSocket(AFPThreadVars
*ptv
, char *devname
, int verbose
);
556 static inline void AFPDumpCounters(AFPThreadVars
*ptv
)
558 #ifdef PACKET_STATISTICS
559 struct tpacket_stats kstats
;
560 socklen_t len
= sizeof (struct tpacket_stats
);
561 if (getsockopt(ptv
->socket
, SOL_PACKET
, PACKET_STATISTICS
,
562 &kstats
, &len
) > -1) {
563 SCLogDebug("(%s) Kernel: Packets %" PRIu32
", dropped %" PRIu32
"",
565 kstats
.tp_packets
, kstats
.tp_drops
);
566 StatsAddUI64(ptv
->tv
, ptv
->capture_kernel_packets
, kstats
.tp_packets
);
567 StatsAddUI64(ptv
->tv
, ptv
->capture_kernel_drops
, kstats
.tp_drops
);
568 (void) SC_ATOMIC_ADD(ptv
->livedev
->drop
, (uint64_t) kstats
.tp_drops
);
569 (void) SC_ATOMIC_ADD(ptv
->livedev
->pkts
, (uint64_t) kstats
.tp_packets
);
575 * \brief AF packet read function.
577 * This function fills
578 * From here the packets are picked up by the DecodeAFP thread.
580 * \param user pointer to AFPThreadVars
581 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
583 static int AFPRead(AFPThreadVars
*ptv
)
586 /* XXX should try to use read that get directly to packet */
589 struct sockaddr_ll from
;
592 struct cmsghdr
*cmsg
;
595 char buf
[CMSG_SPACE(sizeof(struct tpacket_auxdata
))];
597 unsigned char aux_checksum
= 0;
599 msg
.msg_name
= &from
;
600 msg
.msg_namelen
= sizeof(from
);
603 msg
.msg_control
= &cmsg_buf
;
604 msg
.msg_controllen
= sizeof(cmsg_buf
);
608 offset
= SLL_HEADER_LEN
;
611 iov
.iov_len
= ptv
->datalen
- offset
;
612 iov
.iov_base
= ptv
->data
+ offset
;
614 caplen
= recvmsg(ptv
->socket
, &msg
, MSG_TRUNC
);
617 SCLogWarning(SC_ERR_AFP_READ
, "recvmsg failed with error code %" PRId32
,
619 SCReturnInt(AFP_READ_FAILURE
);
622 p
= PacketGetFromQueueOrAlloc();
624 SCReturnInt(AFP_FAILURE
);
626 PKT_SET_SRC(p
, PKT_SRC_WIRE
);
627 if (ptv
->flags
& AFP_BYPASS
) {
628 p
->BypassPacketsFlow
= AFPBypassCallback
;
629 #ifdef HAVE_PACKET_EBPF
630 p
->afp_v
.v4_map_fd
= ptv
->v4_map_fd
;
631 p
->afp_v
.v6_map_fd
= ptv
->v6_map_fd
;
634 if (ptv
->flags
& AFP_XDPBYPASS
) {
635 p
->BypassPacketsFlow
= AFPXDPBypassCallback
;
636 #ifdef HAVE_PACKET_EBPF
637 p
->afp_v
.v4_map_fd
= ptv
->v4_map_fd
;
638 p
->afp_v
.v6_map_fd
= ptv
->v6_map_fd
;
642 /* get timestamp of packet via ioctl */
643 if (ioctl(ptv
->socket
, SIOCGSTAMP
, &p
->ts
) == -1) {
644 SCLogWarning(SC_ERR_AFP_READ
, "recvmsg failed with error code %" PRId32
,
646 TmqhOutputPacketpool(ptv
->tv
, p
);
647 SCReturnInt(AFP_READ_FAILURE
);
651 p
->livedev
= ptv
->livedev
;
653 /* add forged header */
655 SllHdr
* hdrp
= (SllHdr
*)ptv
->data
;
656 /* XXX this is minimalist, but this seems enough */
657 hdrp
->sll_protocol
= from
.sll_protocol
;
660 p
->datalink
= ptv
->datalink
;
661 SET_PKT_LEN(p
, caplen
+ offset
);
662 if (PacketCopyData(p
, ptv
->data
, GET_PKT_LEN(p
)) == -1) {
663 TmqhOutputPacketpool(ptv
->tv
, p
);
664 SCReturnInt(AFP_FAILURE
);
666 SCLogDebug("pktlen: %" PRIu32
" (pkt %p, pkt data %p)",
667 GET_PKT_LEN(p
), p
, GET_PKT_DATA(p
));
669 /* We only check for checksum disable */
670 if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_DISABLE
) {
671 p
->flags
|= PKT_IGNORE_CHECKSUM
;
672 } else if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_AUTO
) {
673 if (ptv
->livedev
->ignore_checksum
) {
674 p
->flags
|= PKT_IGNORE_CHECKSUM
;
675 } else if (ChecksumAutoModeCheck(ptv
->pkts
,
676 SC_ATOMIC_GET(ptv
->livedev
->pkts
),
677 SC_ATOMIC_GET(ptv
->livedev
->invalid_checksums
))) {
678 ptv
->livedev
->ignore_checksum
= 1;
679 p
->flags
|= PKT_IGNORE_CHECKSUM
;
685 /* List is NULL if we don't have activated auxiliary data */
686 for (cmsg
= CMSG_FIRSTHDR(&msg
); cmsg
; cmsg
= CMSG_NXTHDR(&msg
, cmsg
)) {
687 struct tpacket_auxdata
*aux
;
689 if (cmsg
->cmsg_len
< CMSG_LEN(sizeof(struct tpacket_auxdata
)) ||
690 cmsg
->cmsg_level
!= SOL_PACKET
||
691 cmsg
->cmsg_type
!= PACKET_AUXDATA
)
694 aux
= (struct tpacket_auxdata
*)CMSG_DATA(cmsg
);
696 if (aux_checksum
&& (aux
->tp_status
& TP_STATUS_CSUMNOTREADY
)) {
697 p
->flags
|= PKT_IGNORE_CHECKSUM
;
702 if (TmThreadsSlotProcessPkt(ptv
->tv
, ptv
->slot
, p
) != TM_ECODE_OK
) {
703 TmqhOutputPacketpool(ptv
->tv
, p
);
704 SCReturnInt(AFP_FAILURE
);
706 SCReturnInt(AFP_READ_OK
);
710 * \brief AF packet write function.
712 * This function has to be called before the memory
713 * related to Packet in ring buffer is released.
715 * \param pointer to Packet
716 * \param version of capture: TPACKET_V2 or TPACKET_V3
717 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
720 static TmEcode
AFPWritePacket(Packet
*p
, int version
)
722 struct sockaddr_ll socket_address
;
727 uint16_t vlan_tci
= 0;
729 if (p
->afp_v
.copy_mode
== AFP_COPY_MODE_IPS
) {
730 if (PACKET_TEST_ACTION(p
, ACTION_DROP
)) {
735 if (SC_ATOMIC_GET(p
->afp_v
.peer
->state
) == AFP_STATE_DOWN
)
738 if (p
->ethh
== NULL
) {
739 SCLogWarning(SC_ERR_INVALID_VALUE
, "Should have an Ethernet header");
740 return TM_ECODE_FAILED
;
742 /* Index of the network device */
743 socket_address
.sll_ifindex
= SC_ATOMIC_GET(p
->afp_v
.peer
->if_idx
);
745 socket_address
.sll_halen
= ETH_ALEN
;
746 /* Destination MAC */
747 memcpy(socket_address
.sll_addr
, p
->ethh
, 6);
749 /* Send packet, locking the socket if necessary */
750 if (p
->afp_v
.peer
->flags
& AFP_SOCK_PROTECT
)
751 SCMutexLock(&p
->afp_v
.peer
->sock_protect
);
752 socket
= SC_ATOMIC_GET(p
->afp_v
.peer
->socket
);
754 h
.raw
= p
->afp_v
.relptr
;
756 if (version
== TPACKET_V2
) {
757 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
758 * the flag. It is not defined for older kernel so we go best effort
759 * and test for non zero value of the TCI header. */
760 if (h
.h2
->tp_status
& TP_STATUS_VLAN_VALID
|| h
.h2
->tp_vlan_tci
) {
761 vlan_tci
= h
.h2
->tp_vlan_tci
;
764 #ifdef HAVE_TPACKET_V3
765 if (h
.h3
->tp_status
& TP_STATUS_VLAN_VALID
|| h
.h3
->hv1
.tp_vlan_tci
) {
766 vlan_tci
= h
.h3
->hv1
.tp_vlan_tci
;
769 /* Should not get here */
775 pstart
= GET_PKT_DATA(p
) - VLAN_HEADER_LEN
;
776 plen
= GET_PKT_LEN(p
) + VLAN_HEADER_LEN
;
777 /* move ethernet addresses */
778 memmove(pstart
, GET_PKT_DATA(p
), 2 * ETH_ALEN
);
779 /* write vlan info */
780 *(uint16_t *)(pstart
+ 2 * ETH_ALEN
) = htons(0x8100);
781 *(uint16_t *)(pstart
+ 2 * ETH_ALEN
+ 2) = htons(vlan_tci
);
783 pstart
= GET_PKT_DATA(p
);
784 plen
= GET_PKT_LEN(p
);
787 if (sendto(socket
, pstart
, plen
, 0,
788 (struct sockaddr
*) &socket_address
,
789 sizeof(struct sockaddr_ll
)) < 0) {
790 SCLogWarning(SC_ERR_SOCKET
, "Sending packet failed on socket %d: %s",
793 if (p
->afp_v
.peer
->flags
& AFP_SOCK_PROTECT
)
794 SCMutexUnlock(&p
->afp_v
.peer
->sock_protect
);
795 return TM_ECODE_FAILED
;
797 if (p
->afp_v
.peer
->flags
& AFP_SOCK_PROTECT
)
798 SCMutexUnlock(&p
->afp_v
.peer
->sock_protect
);
803 static void AFPReleaseDataFromRing(Packet
*p
)
805 /* Need to be in copy mode and need to detect early release
806 where Ethernet header could not be set (and pseudo packet) */
807 if ((p
->afp_v
.copy_mode
!= AFP_COPY_MODE_NONE
) && !PKT_IS_PSEUDOPKT(p
)) {
808 AFPWritePacket(p
, TPACKET_V2
);
811 if (AFPDerefSocket(p
->afp_v
.mpeer
) == 0)
814 if (p
->afp_v
.relptr
) {
816 h
.raw
= p
->afp_v
.relptr
;
817 h
.h2
->tp_status
= TP_STATUS_KERNEL
;
821 AFPV_CLEANUP(&p
->afp_v
);
824 #ifdef HAVE_TPACKET_V3
825 static void AFPReleasePacketV3(Packet
*p
)
827 /* Need to be in copy mode and need to detect early release
828 where Ethernet header could not be set (and pseudo packet) */
829 if ((p
->afp_v
.copy_mode
!= AFP_COPY_MODE_NONE
) && !PKT_IS_PSEUDOPKT(p
)) {
830 AFPWritePacket(p
, TPACKET_V3
);
832 PacketFreeOrRelease(p
);
836 static void AFPReleasePacket(Packet
*p
)
838 AFPReleaseDataFromRing(p
);
839 PacketFreeOrRelease(p
);
843 * \brief AF packet read function for ring
845 * This function fills
846 * From here the packets are picked up by the DecodeAFP thread.
848 * \param user pointer to AFPThreadVars
849 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
851 static int AFPReadFromRing(AFPThreadVars
*ptv
)
855 uint8_t emergency_flush
= 0;
860 /* Loop till we have packets available */
862 if (unlikely(suricata_ctl_flags
!= 0)) {
866 /* Read packet from ring */
867 h
.raw
= (((union thdr
**)ptv
->ring_v2
)[ptv
->frame_offset
]);
869 SCReturnInt(AFP_FAILURE
);
872 if ((! h
.h2
->tp_status
) || (h
.h2
->tp_status
& TP_STATUS_USER_BUSY
)) {
873 if (read_pkts
== 0) {
874 if (loop_start
== -1) {
875 loop_start
= ptv
->frame_offset
;
876 } else if (unlikely(loop_start
== (int)ptv
->frame_offset
)) {
877 SCReturnInt(AFP_READ_OK
);
879 if (++ptv
->frame_offset
>= ptv
->req
.tp_frame_nr
) {
880 ptv
->frame_offset
= 0;
884 if ((emergency_flush
) && (ptv
->flags
& AFP_EMERGENCY_MODE
)) {
885 SCReturnInt(AFP_KERNEL_DROP
);
887 SCReturnInt(AFP_READ_OK
);
894 /* Our packet is still used by suricata, we exit read loop to
896 if (h
.h2
->tp_status
& TP_STATUS_USER_BUSY
) {
897 SCReturnInt(AFP_READ_OK
);
900 if ((ptv
->flags
& AFP_EMERGENCY_MODE
) && (emergency_flush
== 1)) {
901 h
.h2
->tp_status
= TP_STATUS_KERNEL
;
905 p
= PacketGetFromQueueOrAlloc();
907 SCReturnInt(AFP_FAILURE
);
909 PKT_SET_SRC(p
, PKT_SRC_WIRE
);
910 if (ptv
->flags
& AFP_BYPASS
) {
911 p
->BypassPacketsFlow
= AFPBypassCallback
;
912 #ifdef HAVE_PACKET_EBPF
913 p
->afp_v
.v4_map_fd
= ptv
->v4_map_fd
;
914 p
->afp_v
.v6_map_fd
= ptv
->v6_map_fd
;
917 if (ptv
->flags
& AFP_XDPBYPASS
) {
918 p
->BypassPacketsFlow
= AFPXDPBypassCallback
;
919 #ifdef HAVE_PACKET_EBPF
920 p
->afp_v
.v4_map_fd
= ptv
->v4_map_fd
;
921 p
->afp_v
.v6_map_fd
= ptv
->v6_map_fd
;
925 /* Suricata will treat packet so telling it is busy, this
926 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
928 h
.h2
->tp_status
|= TP_STATUS_USER_BUSY
;
931 p
->livedev
= ptv
->livedev
;
932 p
->datalink
= ptv
->datalink
;
934 if (h
.h2
->tp_len
> h
.h2
->tp_snaplen
) {
935 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
936 h
.h2
->tp_len
, h
.h2
->tp_snaplen
);
939 /* get vlan id from header */
940 if ((!(ptv
->flags
& AFP_VLAN_DISABLED
)) &&
941 (h
.h2
->tp_status
& TP_STATUS_VLAN_VALID
|| h
.h2
->tp_vlan_tci
)) {
942 p
->vlan_id
[0] = h
.h2
->tp_vlan_tci
& 0x0fff;
947 if (ptv
->flags
& AFP_ZERO_COPY
) {
948 if (PacketSetData(p
, (unsigned char*)h
.raw
+ h
.h2
->tp_mac
, h
.h2
->tp_snaplen
) == -1) {
949 TmqhOutputPacketpool(ptv
->tv
, p
);
950 SCReturnInt(AFP_FAILURE
);
952 p
->afp_v
.relptr
= h
.raw
;
953 p
->ReleasePacket
= AFPReleasePacket
;
954 p
->afp_v
.mpeer
= ptv
->mpeer
;
955 AFPRefSocket(ptv
->mpeer
);
957 p
->afp_v
.copy_mode
= ptv
->copy_mode
;
958 if (p
->afp_v
.copy_mode
!= AFP_COPY_MODE_NONE
) {
959 p
->afp_v
.peer
= ptv
->mpeer
->peer
;
961 p
->afp_v
.peer
= NULL
;
965 if (PacketCopyData(p
, (unsigned char*)h
.raw
+ h
.h2
->tp_mac
, h
.h2
->tp_snaplen
) == -1) {
966 TmqhOutputPacketpool(ptv
->tv
, p
);
967 SCReturnInt(AFP_FAILURE
);
972 p
->ts
.tv_sec
= h
.h2
->tp_sec
;
973 p
->ts
.tv_usec
= h
.h2
->tp_nsec
/1000;
974 SCLogDebug("pktlen: %" PRIu32
" (pkt %p, pkt data %p)",
975 GET_PKT_LEN(p
), p
, GET_PKT_DATA(p
));
977 /* We only check for checksum disable */
978 if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_DISABLE
) {
979 p
->flags
|= PKT_IGNORE_CHECKSUM
;
980 } else if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_AUTO
) {
981 if (ptv
->livedev
->ignore_checksum
) {
982 p
->flags
|= PKT_IGNORE_CHECKSUM
;
983 } else if (ChecksumAutoModeCheck(ptv
->pkts
,
984 SC_ATOMIC_GET(ptv
->livedev
->pkts
),
985 SC_ATOMIC_GET(ptv
->livedev
->invalid_checksums
))) {
986 ptv
->livedev
->ignore_checksum
= 1;
987 p
->flags
|= PKT_IGNORE_CHECKSUM
;
990 if (h
.h2
->tp_status
& TP_STATUS_CSUMNOTREADY
) {
991 p
->flags
|= PKT_IGNORE_CHECKSUM
;
994 if (h
.h2
->tp_status
& TP_STATUS_LOSING
) {
996 AFPDumpCounters(ptv
);
999 /* release frame if not in zero copy mode */
1000 if (!(ptv
->flags
& AFP_ZERO_COPY
)) {
1001 h
.h2
->tp_status
= TP_STATUS_KERNEL
;
1004 if (TmThreadsSlotProcessPkt(ptv
->tv
, ptv
->slot
, p
) != TM_ECODE_OK
) {
1005 h
.h2
->tp_status
= TP_STATUS_KERNEL
;
1006 if (++ptv
->frame_offset
>= ptv
->req
.tp_frame_nr
) {
1007 ptv
->frame_offset
= 0;
1009 TmqhOutputPacketpool(ptv
->tv
, p
);
1010 SCReturnInt(AFP_FAILURE
);
1014 if (++ptv
->frame_offset
>= ptv
->req
.tp_frame_nr
) {
1015 ptv
->frame_offset
= 0;
1016 /* Get out of loop to be sure we will reach maintenance tasks */
1017 SCReturnInt(AFP_READ_OK
);
1021 SCReturnInt(AFP_READ_OK
);
1024 #ifdef HAVE_TPACKET_V3
1025 static inline void AFPFlushBlock(struct tpacket_block_desc
*pbd
)
1027 pbd
->hdr
.bh1
.block_status
= TP_STATUS_KERNEL
;
1030 static inline int AFPParsePacketV3(AFPThreadVars
*ptv
, struct tpacket_block_desc
*pbd
, struct tpacket3_hdr
*ppd
)
1032 Packet
*p
= PacketGetFromQueueOrAlloc();
1034 SCReturnInt(AFP_FAILURE
);
1036 PKT_SET_SRC(p
, PKT_SRC_WIRE
);
1037 if (ptv
->flags
& AFP_BYPASS
) {
1038 p
->BypassPacketsFlow
= AFPBypassCallback
;
1039 #ifdef HAVE_PACKET_EBPF
1040 p
->afp_v
.v4_map_fd
= ptv
->v4_map_fd
;
1041 p
->afp_v
.v6_map_fd
= ptv
->v6_map_fd
;
1044 if (ptv
->flags
& AFP_XDPBYPASS
) {
1045 p
->BypassPacketsFlow
= AFPXDPBypassCallback
;
1046 #ifdef HAVE_PACKET_EBPF
1047 p
->afp_v
.v4_map_fd
= ptv
->v4_map_fd
;
1048 p
->afp_v
.v6_map_fd
= ptv
->v6_map_fd
;
1053 p
->livedev
= ptv
->livedev
;
1054 p
->datalink
= ptv
->datalink
;
1056 if ((!(ptv
->flags
& AFP_VLAN_DISABLED
)) &&
1057 (ppd
->tp_status
& TP_STATUS_VLAN_VALID
|| ppd
->hv1
.tp_vlan_tci
)) {
1058 p
->vlan_id
[0] = ppd
->hv1
.tp_vlan_tci
& 0x0fff;
1063 if (ptv
->flags
& AFP_ZERO_COPY
) {
1064 if (PacketSetData(p
, (unsigned char*)ppd
+ ppd
->tp_mac
, ppd
->tp_snaplen
) == -1) {
1065 TmqhOutputPacketpool(ptv
->tv
, p
);
1066 SCReturnInt(AFP_FAILURE
);
1068 p
->afp_v
.relptr
= ppd
;
1069 p
->ReleasePacket
= AFPReleasePacketV3
;
1070 p
->afp_v
.mpeer
= ptv
->mpeer
;
1071 AFPRefSocket(ptv
->mpeer
);
1073 p
->afp_v
.copy_mode
= ptv
->copy_mode
;
1074 if (p
->afp_v
.copy_mode
!= AFP_COPY_MODE_NONE
) {
1075 p
->afp_v
.peer
= ptv
->mpeer
->peer
;
1077 p
->afp_v
.peer
= NULL
;
1080 if (PacketCopyData(p
, (unsigned char*)ppd
+ ppd
->tp_mac
, ppd
->tp_snaplen
) == -1) {
1081 TmqhOutputPacketpool(ptv
->tv
, p
);
1082 SCReturnInt(AFP_FAILURE
);
1086 p
->ts
.tv_sec
= ppd
->tp_sec
;
1087 p
->ts
.tv_usec
= ppd
->tp_nsec
/1000;
1088 SCLogDebug("pktlen: %" PRIu32
" (pkt %p, pkt data %p)",
1089 GET_PKT_LEN(p
), p
, GET_PKT_DATA(p
));
1091 /* We only check for checksum disable */
1092 if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_DISABLE
) {
1093 p
->flags
|= PKT_IGNORE_CHECKSUM
;
1094 } else if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_AUTO
) {
1095 if (ptv
->livedev
->ignore_checksum
) {
1096 p
->flags
|= PKT_IGNORE_CHECKSUM
;
1097 } else if (ChecksumAutoModeCheck(ptv
->pkts
,
1098 SC_ATOMIC_GET(ptv
->livedev
->pkts
),
1099 SC_ATOMIC_GET(ptv
->livedev
->invalid_checksums
))) {
1100 ptv
->livedev
->ignore_checksum
= 1;
1101 p
->flags
|= PKT_IGNORE_CHECKSUM
;
1104 if (ppd
->tp_status
& TP_STATUS_CSUMNOTREADY
) {
1105 p
->flags
|= PKT_IGNORE_CHECKSUM
;
1109 if (TmThreadsSlotProcessPkt(ptv
->tv
, ptv
->slot
, p
) != TM_ECODE_OK
) {
1110 TmqhOutputPacketpool(ptv
->tv
, p
);
1111 SCReturnInt(AFP_FAILURE
);
1114 SCReturnInt(AFP_READ_OK
);
1117 static inline int AFPWalkBlock(AFPThreadVars
*ptv
, struct tpacket_block_desc
*pbd
)
1119 int num_pkts
= pbd
->hdr
.bh1
.num_pkts
, i
;
1122 ppd
= (uint8_t *)pbd
+ pbd
->hdr
.bh1
.offset_to_first_pkt
;
1123 for (i
= 0; i
< num_pkts
; ++i
) {
1124 if (unlikely(AFPParsePacketV3(ptv
, pbd
,
1125 (struct tpacket3_hdr
*)ppd
) == AFP_FAILURE
)) {
1126 SCReturnInt(AFP_READ_FAILURE
);
1128 ppd
= ppd
+ ((struct tpacket3_hdr
*)ppd
)->tp_next_offset
;
1131 SCReturnInt(AFP_READ_OK
);
1133 #endif /* HAVE_TPACKET_V3 */
1136 * \brief AF packet read function for ring
1138 * This function fills
1139 * From here the packets are picked up by the DecodeAFP thread.
1141 * \param user pointer to AFPThreadVars
1142 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1144 static int AFPReadFromRingV3(AFPThreadVars
*ptv
)
1146 #ifdef HAVE_TPACKET_V3
1147 struct tpacket_block_desc
*pbd
;
1149 /* Loop till we have packets available */
1151 if (unlikely(suricata_ctl_flags
!= 0)) {
1152 SCLogInfo("Exiting AFP V3 read loop");
1156 pbd
= (struct tpacket_block_desc
*) ptv
->ring_v3
[ptv
->frame_offset
].iov_base
;
1158 /* block is not ready to be read */
1159 if ((pbd
->hdr
.bh1
.block_status
& TP_STATUS_USER
) == 0) {
1160 SCReturnInt(AFP_READ_OK
);
1163 if (unlikely(AFPWalkBlock(ptv
, pbd
) != AFP_READ_OK
)) {
1165 SCReturnInt(AFP_READ_FAILURE
);
1169 ptv
->frame_offset
= (ptv
->frame_offset
+ 1) % ptv
->req3
.tp_block_nr
;
1170 /* return to maintenance task after one loop on the ring */
1171 if (ptv
->frame_offset
== 0) {
1172 SCReturnInt(AFP_READ_OK
);
1176 SCReturnInt(AFP_READ_OK
);
1180 * \brief Reference socket
1182 * \retval O in case of failure, 1 in case of success
1184 static int AFPRefSocket(AFPPeer
* peer
)
1186 if (unlikely(peer
== NULL
))
1189 (void)SC_ATOMIC_ADD(peer
->sock_usage
, 1);
1195 * \brief Dereference socket
1197 * \retval 1 if socket is still alive, 0 if not
1199 static int AFPDerefSocket(AFPPeer
* peer
)
1204 if (SC_ATOMIC_SUB(peer
->sock_usage
, 1) == 0) {
1205 if (SC_ATOMIC_GET(peer
->state
) == AFP_STATE_DOWN
) {
1206 SCLogInfo("Cleaning socket connected to '%s'", peer
->iface
);
1207 close(SC_ATOMIC_GET(peer
->socket
));
1214 static void AFPSwitchState(AFPThreadVars
*ptv
, int state
)
1216 ptv
->afp_state
= state
;
1217 ptv
->down_count
= 0;
1221 /* Do cleaning if switching to down state */
1222 if (state
== AFP_STATE_DOWN
) {
1223 #ifdef HAVE_TPACKET_V3
1224 if (ptv
->flags
& AFP_TPACKET_V3
) {
1225 if (!ptv
->ring_v3
) {
1226 SCFree(ptv
->ring_v3
);
1227 ptv
->ring_v3
= NULL
;
1232 /* only used in reading phase, we can free it */
1233 SCFree(ptv
->ring_v2
);
1234 ptv
->ring_v2
= NULL
;
1236 #ifdef HAVE_TPACKET_V3
1239 if (ptv
->socket
!= -1) {
1240 /* we need to wait for all packets to return data */
1241 if (SC_ATOMIC_SUB(ptv
->mpeer
->sock_usage
, 1) == 0) {
1242 SCLogInfo("Cleaning socket connected to '%s'", ptv
->iface
);
1243 munmap(ptv
->ring_buf
, ptv
->ring_buflen
);
1249 if (state
== AFP_STATE_UP
) {
1250 (void)SC_ATOMIC_SET(ptv
->mpeer
->sock_usage
, 1);
1254 static int AFPReadAndDiscard(AFPThreadVars
*ptv
, struct timeval
*synctv
,
1255 uint64_t *discarded_pkts
)
1257 struct sockaddr_ll from
;
1262 struct cmsghdr cmsg
;
1263 char buf
[CMSG_SPACE(sizeof(struct tpacket_auxdata
))];
1267 if (unlikely(suricata_ctl_flags
!= 0)) {
1271 msg
.msg_name
= &from
;
1272 msg
.msg_namelen
= sizeof(from
);
1275 msg
.msg_control
= &cmsg_buf
;
1276 msg
.msg_controllen
= sizeof(cmsg_buf
);
1279 iov
.iov_len
= ptv
->datalen
;
1280 iov
.iov_base
= ptv
->data
;
1282 (void)recvmsg(ptv
->socket
, &msg
, MSG_TRUNC
);
1284 if (ioctl(ptv
->socket
, SIOCGSTAMP
, &ts
) == -1) {
1289 if ((ts
.tv_sec
> synctv
->tv_sec
) ||
1290 (ts
.tv_sec
>= synctv
->tv_sec
&&
1291 ts
.tv_usec
> synctv
->tv_usec
)) {
1297 static int AFPReadAndDiscardFromRing(AFPThreadVars
*ptv
, struct timeval
*synctv
,
1298 uint64_t *discarded_pkts
)
1302 if (unlikely(suricata_ctl_flags
!= 0)) {
1306 #ifdef HAVE_TPACKET_V3
1307 if (ptv
->flags
& AFP_TPACKET_V3
) {
1308 struct tpacket_block_desc
*pbd
;
1309 pbd
= (struct tpacket_block_desc
*) ptv
->ring_v3
[ptv
->frame_offset
].iov_base
;
1310 *discarded_pkts
+= pbd
->hdr
.bh1
.num_pkts
;
1312 ptv
->frame_offset
= (ptv
->frame_offset
+ 1) % ptv
->req3
.tp_block_nr
;
1318 /* Read packet from ring */
1319 h
.raw
= (((union thdr
**)ptv
->ring_v2
)[ptv
->frame_offset
]);
1320 if (h
.raw
== NULL
) {
1323 (*discarded_pkts
)++;
1324 if (((time_t)h
.h2
->tp_sec
> synctv
->tv_sec
) ||
1325 ((time_t)h
.h2
->tp_sec
== synctv
->tv_sec
&&
1326 (suseconds_t
) (h
.h2
->tp_nsec
/ 1000) > synctv
->tv_usec
)) {
1330 h
.h2
->tp_status
= TP_STATUS_KERNEL
;
1331 if (++ptv
->frame_offset
>= ptv
->req
.tp_frame_nr
) {
1332 ptv
->frame_offset
= 0;
1340 /** \brief wait for all afpacket threads to fully init
1342 * Discard packets before all threads are ready, as the cluster
1343 * setup is not complete yet.
1345 * if AFPPeersListStarted() returns true init is complete
1347 * \retval r 1 = happy, otherwise unhappy
1349 static int AFPSynchronizeStart(AFPThreadVars
*ptv
, uint64_t *discarded_pkts
)
1352 struct timeval synctv
;
1355 fds
.fd
= ptv
->socket
;
1356 fds
.events
= POLLIN
;
1358 /* Set timeval to end of the world */
1359 synctv
.tv_sec
= 0xffffffff;
1360 synctv
.tv_usec
= 0xffffffff;
1363 r
= poll(&fds
, 1, POLL_TIMEOUT
);
1365 (fds
.revents
& (POLLHUP
|POLLRDHUP
|POLLERR
|POLLNVAL
))) {
1366 SCLogWarning(SC_ERR_AFP_READ
, "poll failed %02x",
1367 fds
.revents
& (POLLHUP
|POLLRDHUP
|POLLERR
|POLLNVAL
));
1370 if (AFPPeersListStarted() && synctv
.tv_sec
== (time_t) 0xffffffff) {
1371 gettimeofday(&synctv
, NULL
);
1373 if (ptv
->flags
& AFP_RING_MODE
) {
1374 r
= AFPReadAndDiscardFromRing(ptv
, &synctv
, discarded_pkts
);
1376 r
= AFPReadAndDiscard(ptv
, &synctv
, discarded_pkts
);
1378 SCLogDebug("Discarding on %s", ptv
->tv
->name
);
1381 SCLogDebug("Starting to read on %s", ptv
->tv
->name
);
1387 } else if (r
== 0 && AFPPeersListStarted()) {
1388 SCLogDebug("Starting to read on %s", ptv
->tv
->name
);
1390 } else if (r
< 0) { /* only exit on error */
1391 SCLogWarning(SC_ERR_AFP_READ
, "poll failed with retval %d", r
);
1399 * \brief Try to reopen socket
1401 * \retval 0 in case of success, negative if error occurs or a condition
1404 static int AFPTryReopen(AFPThreadVars
*ptv
)
1410 /* Don't reconnect till we have packet that did not release data */
1411 if (SC_ATOMIC_GET(ptv
->mpeer
->sock_usage
) != 0) {
1415 afp_activate_r
= AFPCreateSocket(ptv
, ptv
->iface
, 0);
1416 if (afp_activate_r
!= 0) {
1417 if (ptv
->down_count
% AFP_DOWN_COUNTER_INTERVAL
== 0) {
1418 SCLogWarning(SC_ERR_AFP_CREATE
, "Can not open iface '%s'",
1421 return afp_activate_r
;
1424 SCLogInfo("Interface '%s' is back", ptv
->iface
);
1429 * \brief Main AF_PACKET reading Loop function
1431 TmEcode
ReceiveAFPLoop(ThreadVars
*tv
, void *data
, void *slot
)
1435 AFPThreadVars
*ptv
= (AFPThreadVars
*)data
;
1438 TmSlot
*s
= (TmSlot
*)slot
;
1439 time_t last_dump
= 0;
1440 time_t current_time
;
1441 int (*AFPReadFunc
) (AFPThreadVars
*);
1442 uint64_t discarded_pkts
= 0;
1444 ptv
->slot
= s
->slot_next
;
1446 if (ptv
->flags
& AFP_RING_MODE
) {
1447 if (ptv
->flags
& AFP_TPACKET_V3
) {
1448 AFPReadFunc
= AFPReadFromRingV3
;
1450 AFPReadFunc
= AFPReadFromRing
;
1453 AFPReadFunc
= AFPRead
;
1456 if (ptv
->afp_state
== AFP_STATE_DOWN
) {
1457 /* Wait for our turn, threads before us must have opened the socket */
1458 while (AFPPeersListWaitTurn(ptv
->mpeer
)) {
1460 if (suricata_ctl_flags
!= 0) {
1464 r
= AFPCreateSocket(ptv
, ptv
->iface
, 1);
1467 case AFP_FATAL_ERROR
:
1468 SCLogError(SC_ERR_AFP_CREATE
, "Couldn't init AF_PACKET socket, fatal error");
1469 SCReturnInt(TM_ECODE_FAILED
);
1470 case AFP_RECOVERABLE_ERROR
:
1471 SCLogWarning(SC_ERR_AFP_CREATE
, "Couldn't init AF_PACKET socket, retrying soon");
1474 AFPPeersListReachedInc();
1476 if (ptv
->afp_state
== AFP_STATE_UP
) {
1477 SCLogDebug("Thread %s using socket %d", tv
->name
, ptv
->socket
);
1478 if ((ptv
->flags
& AFP_TPACKET_V3
) != 0) {
1479 AFPSynchronizeStart(ptv
, &discarded_pkts
);
1481 /* let's reset counter as we will start the capture at the
1482 * next function call */
1483 #ifdef PACKET_STATISTICS
1484 struct tpacket_stats kstats
;
1485 socklen_t len
= sizeof (struct tpacket_stats
);
1486 if (getsockopt(ptv
->socket
, SOL_PACKET
, PACKET_STATISTICS
,
1487 &kstats
, &len
) > -1) {
1489 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1490 ", dropped %" PRIu32
"",
1492 kstats
.tp_packets
, kstats
.tp_drops
);
1493 pkts
= kstats
.tp_packets
- discarded_pkts
- kstats
.tp_drops
;
1494 StatsAddUI64(ptv
->tv
, ptv
->capture_kernel_packets
, pkts
);
1495 (void) SC_ATOMIC_ADD(ptv
->livedev
->pkts
, pkts
);
1500 fds
.fd
= ptv
->socket
;
1501 fds
.events
= POLLIN
;
1504 /* Start by checking the state of our interface */
1505 if (unlikely(ptv
->afp_state
== AFP_STATE_DOWN
)) {
1509 usleep(AFP_RECONNECT_TIMEOUT
);
1510 if (suricata_ctl_flags
!= 0) {
1514 r
= AFPTryReopen(ptv
);
1515 fds
.fd
= ptv
->socket
;
1521 /* make sure we have at least one packet in the packet pool, to prevent
1522 * us from alloc'ing packets at line rate */
1525 r
= poll(&fds
, 1, POLL_TIMEOUT
);
1527 if (suricata_ctl_flags
!= 0) {
1532 (fds
.revents
& (POLLHUP
|POLLRDHUP
|POLLERR
|POLLNVAL
))) {
1533 if (fds
.revents
& (POLLHUP
| POLLRDHUP
)) {
1534 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
1536 } else if (fds
.revents
& POLLERR
) {
1538 /* Do a recv to get errno */
1539 if (recv(ptv
->socket
, &c
, sizeof c
, MSG_PEEK
) != -1)
1540 continue; /* what, no error? */
1541 SCLogError(SC_ERR_AFP_READ
,
1542 "Error reading data from iface '%s': (%d" PRIu32
") %s",
1543 ptv
->iface
, errno
, strerror(errno
));
1544 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
1546 } else if (fds
.revents
& POLLNVAL
) {
1547 SCLogError(SC_ERR_AFP_READ
, "Invalid polling request");
1548 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
1552 r
= AFPReadFunc(ptv
);
1555 /* Trigger one dump of stats every second */
1556 current_time
= time(NULL
);
1557 if (current_time
!= last_dump
) {
1558 AFPDumpCounters(ptv
);
1559 last_dump
= current_time
;
1562 case AFP_READ_FAILURE
:
1563 /* AFPRead in error: best to reset the socket */
1564 SCLogError(SC_ERR_AFP_READ
,
1565 "AFPRead error reading data from iface '%s': (%d" PRIu32
") %s",
1566 ptv
->iface
, errno
, strerror(errno
));
1567 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
1570 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
1571 SCReturnInt(TM_ECODE_FAILED
);
1573 case AFP_KERNEL_DROP
:
1574 AFPDumpCounters(ptv
);
1577 } else if (unlikely(r
== 0)) {
1578 /* poll timed out, lets see if we need to inject a fake packet */
1579 TmThreadsCaptureInjectPacket(tv
, ptv
->slot
, NULL
);
1581 } else if ((r
< 0) && (errno
!= EINTR
)) {
1582 SCLogError(SC_ERR_AFP_READ
, "Error reading data from iface '%s': (%d" PRIu32
") %s",
1584 errno
, strerror(errno
));
1585 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
1588 StatsSyncCountersIfSignalled(tv
);
1591 AFPDumpCounters(ptv
);
1592 StatsSyncCountersIfSignalled(tv
);
1593 SCReturnInt(TM_ECODE_OK
);
1596 static int AFPGetDevFlags(int fd
, const char *ifname
)
1600 memset(&ifr
, 0, sizeof(ifr
));
1601 strlcpy(ifr
.ifr_name
, ifname
, sizeof(ifr
.ifr_name
));
1603 if (ioctl(fd
, SIOCGIFFLAGS
, &ifr
) == -1) {
1604 SCLogError(SC_ERR_AFP_CREATE
, "Unable to find type for iface \"%s\": %s",
1605 ifname
, strerror(errno
));
1609 return ifr
.ifr_flags
;
1613 static int AFPGetIfnumByDev(int fd
, const char *ifname
, int verbose
)
1617 memset(&ifr
, 0, sizeof(ifr
));
1618 strlcpy(ifr
.ifr_name
, ifname
, sizeof(ifr
.ifr_name
));
1620 if (ioctl(fd
, SIOCGIFINDEX
, &ifr
) == -1) {
1622 SCLogError(SC_ERR_AFP_CREATE
, "Unable to find iface %s: %s",
1623 ifname
, strerror(errno
));
1627 return ifr
.ifr_ifindex
;
1630 static int AFPGetDevLinktype(int fd
, const char *ifname
)
1634 memset(&ifr
, 0, sizeof(ifr
));
1635 strlcpy(ifr
.ifr_name
, ifname
, sizeof(ifr
.ifr_name
));
1637 if (ioctl(fd
, SIOCGIFHWADDR
, &ifr
) == -1) {
1638 SCLogError(SC_ERR_AFP_CREATE
, "Unable to find type for iface \"%s\": %s",
1639 ifname
, strerror(errno
));
1643 switch (ifr
.ifr_hwaddr
.sa_family
) {
1644 case ARPHRD_LOOPBACK
:
1645 return LINKTYPE_ETHERNET
;
1648 return LINKTYPE_RAW
;
1650 return ifr
.ifr_hwaddr
.sa_family
;
1654 int AFPGetLinkType(const char *ifname
)
1658 int fd
= socket(AF_PACKET
, SOCK_RAW
, htons(ETH_P_ALL
));
1660 SCLogError(SC_ERR_AFP_CREATE
, "Couldn't create a AF_PACKET socket, error %s", strerror(errno
));
1661 return LINKTYPE_RAW
;
1664 ltype
= AFPGetDevLinktype(fd
, ifname
);
1670 static int AFPComputeRingParams(AFPThreadVars
*ptv
, int order
)
1672 /* Compute structure:
1673 Target is to store all pending packets
1674 with a size equal to MTU + auxdata
1675 And we keep a decent number of block
1678 Compute frame_size (aligned to be able to fit in block
1679 Check which block size we need. Blocksize is a 2^n * pagesize
1680 We then need to get order, big enough to have
1681 frame_size < block size
1682 Find number of frame per block (divide)
1686 described in packet_mmap.txt
1687 dependant on snaplen (need to use a variable ?)
1689 tp_hdrlen determine_version in daq_afpacket
1690 in V1: sizeof(struct tpacket_hdr);
1691 in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1692 frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1695 int tp_hdrlen
= sizeof(struct tpacket_hdr
);
1696 int snaplen
= default_packet_size
;
1699 snaplen
= GetIfaceMaxPacketSize(ptv
->iface
);
1701 SCLogWarning(SC_ERR_INVALID_VALUE
,
1702 "Unable to get MTU, setting snaplen to sane default of 1514");
1707 ptv
->req
.tp_frame_size
= TPACKET_ALIGN(snaplen
+TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen
) + sizeof(struct sockaddr_ll
) + ETH_HLEN
) - ETH_HLEN
);
1708 ptv
->req
.tp_block_size
= getpagesize() << order
;
1709 int frames_per_block
= ptv
->req
.tp_block_size
/ ptv
->req
.tp_frame_size
;
1710 if (frames_per_block
== 0) {
1711 SCLogError(SC_ERR_INVALID_VALUE
, "Frame size bigger than block size");
1714 ptv
->req
.tp_frame_nr
= ptv
->ring_size
;
1715 ptv
->req
.tp_block_nr
= ptv
->req
.tp_frame_nr
/ frames_per_block
+ 1;
1716 /* exact division */
1717 ptv
->req
.tp_frame_nr
= ptv
->req
.tp_block_nr
* frames_per_block
;
1718 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
1719 ptv
->req
.tp_block_size
, ptv
->req
.tp_block_nr
,
1720 ptv
->req
.tp_frame_size
, ptv
->req
.tp_frame_nr
);
1724 #ifdef HAVE_TPACKET_V3
1725 static int AFPComputeRingParamsV3(AFPThreadVars
*ptv
)
1727 ptv
->req3
.tp_block_size
= ptv
->block_size
;
1728 ptv
->req3
.tp_frame_size
= 2048;
1729 int frames_per_block
= 0;
1730 int tp_hdrlen
= sizeof(struct tpacket3_hdr
);
1731 int snaplen
= default_packet_size
;
1734 snaplen
= GetIfaceMaxPacketSize(ptv
->iface
);
1736 SCLogWarning(SC_ERR_INVALID_VALUE
,
1737 "Unable to get MTU, setting snaplen to sane default of 1514");
1742 ptv
->req
.tp_frame_size
= TPACKET_ALIGN(snaplen
+TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen
) + sizeof(struct sockaddr_ll
) + ETH_HLEN
) - ETH_HLEN
);
1743 frames_per_block
= ptv
->req3
.tp_block_size
/ ptv
->req3
.tp_frame_size
;
1745 if (frames_per_block
== 0) {
1746 SCLogError(SC_ERR_INVALID_VALUE
,
1747 "Block size is too small, it should be at least %d",
1748 ptv
->req3
.tp_frame_size
);
1751 ptv
->req3
.tp_block_nr
= ptv
->ring_size
/ frames_per_block
+ 1;
1752 /* exact division */
1753 ptv
->req3
.tp_frame_nr
= ptv
->req3
.tp_block_nr
* frames_per_block
;
1754 ptv
->req3
.tp_retire_blk_tov
= ptv
->block_timeout
;
1755 ptv
->req3
.tp_feature_req_word
= TP_FT_REQ_FILL_RXHASH
;
1756 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
1757 ptv
->req3
.tp_block_size
, ptv
->req3
.tp_block_nr
,
1758 ptv
->req3
.tp_frame_size
, ptv
->req3
.tp_frame_nr
,
1759 ptv
->req3
.tp_block_size
* ptv
->req3
.tp_block_nr
1765 static int AFPSetupRing(AFPThreadVars
*ptv
, char *devname
)
1768 unsigned int len
= sizeof(val
), i
;
1772 #ifdef HAVE_TPACKET_V3
1773 if (ptv
->flags
& AFP_TPACKET_V3
) {
1780 if (getsockopt(ptv
->socket
, SOL_PACKET
, PACKET_HDRLEN
, &val
, &len
) < 0) {
1781 if (errno
== ENOPROTOOPT
) {
1782 if (ptv
->flags
& AFP_TPACKET_V3
) {
1783 SCLogError(SC_ERR_AFP_CREATE
,
1784 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1786 SCLogError(SC_ERR_AFP_CREATE
,
1787 "Too old kernel giving up (need 2.6.27 at least)");
1790 SCLogError(SC_ERR_AFP_CREATE
, "Error when retrieving packet header len");
1791 return AFP_FATAL_ERROR
;
1795 #ifdef HAVE_TPACKET_V3
1796 if (ptv
->flags
& AFP_TPACKET_V3
) {
1800 if (setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_VERSION
, &val
,
1802 SCLogError(SC_ERR_AFP_CREATE
,
1803 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1805 return AFP_FATAL_ERROR
;
1808 #ifdef HAVE_HW_TIMESTAMPING
1809 int req
= SOF_TIMESTAMPING_RAW_HARDWARE
;
1810 if (setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_TIMESTAMP
, (void *) &req
,
1812 SCLogWarning(SC_ERR_AFP_CREATE
,
1813 "Can't activate hardware timestamping on packet socket: %s",
1818 /* Let's reserve head room so we can add the VLAN header in IPS
1819 * or TAP mode before write the packet */
1820 if (ptv
->copy_mode
!= AFP_COPY_MODE_NONE
) {
1821 /* Only one vlan is extracted from AFP header so
1822 * one VLAN header length is enough. */
1823 int reserve
= VLAN_HEADER_LEN
;
1824 if (setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_RESERVE
, (void *) &reserve
,
1825 sizeof(reserve
)) < 0) {
1826 SCLogError(SC_ERR_AFP_CREATE
,
1827 "Can't activate reserve on packet socket: %s",
1829 return AFP_FATAL_ERROR
;
1833 /* Allocate RX ring */
1834 #ifdef HAVE_TPACKET_V3
1835 if (ptv
->flags
& AFP_TPACKET_V3
) {
1836 if (AFPComputeRingParamsV3(ptv
) != 1) {
1837 return AFP_FATAL_ERROR
;
1839 r
= setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_RX_RING
,
1840 (void *) &ptv
->req3
, sizeof(ptv
->req3
));
1842 SCLogError(SC_ERR_MEM_ALLOC
,
1843 "Unable to allocate RX Ring for iface %s: (%d) %s",
1847 return AFP_FATAL_ERROR
;
1851 for (order
= AFP_BLOCK_SIZE_DEFAULT_ORDER
; order
>= 0; order
--) {
1852 if (AFPComputeRingParams(ptv
, order
) != 1) {
1853 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1854 return AFP_FATAL_ERROR
;
1857 r
= setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_RX_RING
,
1858 (void *) &ptv
->req
, sizeof(ptv
->req
));
1861 if (errno
== ENOMEM
) {
1862 SCLogInfo("Memory issue with ring parameters. Retrying.");
1865 SCLogError(SC_ERR_MEM_ALLOC
,
1866 "Unable to allocate RX Ring for iface %s: (%d) %s",
1870 return AFP_FATAL_ERROR
;
1876 SCLogError(SC_ERR_MEM_ALLOC
,
1877 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1879 return AFP_FATAL_ERROR
;
1881 #ifdef HAVE_TPACKET_V3
1885 /* Allocate the Ring */
1886 #ifdef HAVE_TPACKET_V3
1887 if (ptv
->flags
& AFP_TPACKET_V3
) {
1888 ptv
->ring_buflen
= ptv
->req3
.tp_block_nr
* ptv
->req3
.tp_block_size
;
1891 ptv
->ring_buflen
= ptv
->req
.tp_block_nr
* ptv
->req
.tp_block_size
;
1892 #ifdef HAVE_TPACKET_V3
1895 mmap_flag
= MAP_SHARED
;
1896 if (ptv
->flags
& AFP_MMAP_LOCKED
)
1897 mmap_flag
|= MAP_LOCKED
;
1898 ptv
->ring_buf
= mmap(0, ptv
->ring_buflen
, PROT_READ
|PROT_WRITE
,
1899 mmap_flag
, ptv
->socket
, 0);
1900 if (ptv
->ring_buf
== MAP_FAILED
) {
1901 SCLogError(SC_ERR_MEM_ALLOC
, "Unable to mmap, error %s",
1905 #ifdef HAVE_TPACKET_V3
1906 if (ptv
->flags
& AFP_TPACKET_V3
) {
1907 ptv
->ring_v3
= SCMalloc(ptv
->req3
.tp_block_nr
* sizeof(*ptv
->ring_v3
));
1908 if (!ptv
->ring_v3
) {
1909 SCLogError(SC_ERR_MEM_ALLOC
, "Unable to malloc ptv ring_v3");
1912 for (i
= 0; i
< ptv
->req3
.tp_block_nr
; ++i
) {
1913 ptv
->ring_v3
[i
].iov_base
= ptv
->ring_buf
+ (i
* ptv
->req3
.tp_block_size
);
1914 ptv
->ring_v3
[i
].iov_len
= ptv
->req3
.tp_block_size
;
1918 /* allocate a ring for each frame header pointer*/
1919 ptv
->ring_v2
= SCMalloc(ptv
->req
.tp_frame_nr
* sizeof (union thdr
*));
1920 if (ptv
->ring_v2
== NULL
) {
1921 SCLogError(SC_ERR_MEM_ALLOC
, "Unable to allocate frame buf");
1924 memset(ptv
->ring_v2
, 0, ptv
->req
.tp_frame_nr
* sizeof (union thdr
*));
1925 /* fill the header ring with proper frame ptr*/
1926 ptv
->frame_offset
= 0;
1927 for (i
= 0; i
< ptv
->req
.tp_block_nr
; ++i
) {
1928 void *base
= &(ptv
->ring_buf
[i
* ptv
->req
.tp_block_size
]);
1930 for (j
= 0; j
< ptv
->req
.tp_block_size
/ ptv
->req
.tp_frame_size
; ++j
, ++ptv
->frame_offset
) {
1931 (((union thdr
**)ptv
->ring_v2
)[ptv
->frame_offset
]) = base
;
1932 base
+= ptv
->req
.tp_frame_size
;
1935 ptv
->frame_offset
= 0;
1936 #ifdef HAVE_TPACKET_V3
1943 munmap(ptv
->ring_buf
, ptv
->ring_buflen
);
1945 SCFree(ptv
->ring_v2
);
1947 SCFree(ptv
->ring_v3
);
1949 /* Packet mmap does the cleaning when socket is closed */
1950 return AFP_FATAL_ERROR
;
1953 /** \brief test if we can use FANOUT. Older kernels like those in
1954 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1956 int AFPIsFanoutSupported(void)
1958 #ifdef HAVE_PACKET_FANOUT
1959 int fd
= socket(AF_PACKET
, SOCK_RAW
, htons(ETH_P_ALL
));
1963 uint16_t mode
= PACKET_FANOUT_HASH
| PACKET_FANOUT_FLAG_DEFRAG
;
1965 uint32_t option
= (mode
<< 16) | (id
& 0xffff);
1966 int r
= setsockopt(fd
, SOL_PACKET
, PACKET_FANOUT
,(void *)&option
, sizeof(option
));
1970 SCLogPerf("fanout not supported by kernel: %s", strerror(errno
));
1979 #ifdef HAVE_PACKET_EBPF
1981 static int SockFanoutSeteBPF(AFPThreadVars
*ptv
)
1983 int pfd
= ptv
->ebpf_lb_fd
;
1985 SCLogError(SC_ERR_INVALID_VALUE
,
1986 "Fanout file descriptor is invalid");
1990 if (setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_FANOUT_DATA
, &pfd
, sizeof(pfd
))) {
1991 SCLogError(SC_ERR_INVALID_VALUE
, "Error setting ebpf");
1994 SCLogInfo("Activated eBPF on socket");
1999 static int SetEbpfFilter(AFPThreadVars
*ptv
)
2001 int pfd
= ptv
->ebpf_filter_fd
;
2003 SCLogError(SC_ERR_INVALID_VALUE
,
2004 "Filter file descriptor is invalid");
2008 if (setsockopt(ptv
->socket
, SOL_SOCKET
, SO_ATTACH_BPF
, &pfd
, sizeof(pfd
))) {
2009 SCLogError(SC_ERR_INVALID_VALUE
, "Error setting ebpf: %s", strerror(errno
));
2012 SCLogInfo("Activated eBPF filter on socket");
2018 static int AFPCreateSocket(AFPThreadVars
*ptv
, char *devname
, int verbose
)
2021 int ret
= AFP_FATAL_ERROR
;
2022 struct packet_mreq sock_params
;
2023 struct sockaddr_ll bind_address
;
2027 ptv
->socket
= socket(AF_PACKET
, SOCK_RAW
, htons(ETH_P_ALL
));
2028 if (ptv
->socket
== -1) {
2029 SCLogError(SC_ERR_AFP_CREATE
, "Couldn't create a AF_PACKET socket, error %s", strerror(errno
));
2033 if_idx
= AFPGetIfnumByDev(ptv
->socket
, devname
, verbose
);
2040 memset(&bind_address
, 0, sizeof(bind_address
));
2041 bind_address
.sll_family
= AF_PACKET
;
2042 bind_address
.sll_protocol
= htons(ETH_P_ALL
);
2043 bind_address
.sll_ifindex
= if_idx
;
2044 if (bind_address
.sll_ifindex
== -1) {
2046 SCLogError(SC_ERR_AFP_CREATE
, "Couldn't find iface %s", devname
);
2047 ret
= AFP_RECOVERABLE_ERROR
;
2051 int if_flags
= AFPGetDevFlags(ptv
->socket
, ptv
->iface
);
2052 if (if_flags
== -1) {
2054 SCLogError(SC_ERR_AFP_READ
,
2055 "Couldn't get flags for interface '%s'",
2058 ret
= AFP_RECOVERABLE_ERROR
;
2060 } else if ((if_flags
& (IFF_UP
| IFF_RUNNING
)) == 0) {
2062 SCLogError(SC_ERR_AFP_READ
,
2063 "Interface '%s' is down",
2066 ret
= AFP_RECOVERABLE_ERROR
;
2070 if (ptv
->promisc
!= 0) {
2071 /* Force promiscuous mode */
2072 memset(&sock_params
, 0, sizeof(sock_params
));
2073 sock_params
.mr_type
= PACKET_MR_PROMISC
;
2074 sock_params
.mr_ifindex
= bind_address
.sll_ifindex
;
2075 r
= setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_ADD_MEMBERSHIP
,(void *)&sock_params
, sizeof(sock_params
));
2077 SCLogError(SC_ERR_AFP_CREATE
,
2078 "Couldn't switch iface %s to promiscuous, error %s",
2079 devname
, strerror(errno
));
2084 if (ptv
->checksum_mode
== CHECKSUM_VALIDATION_KERNEL
) {
2086 if (setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_AUXDATA
, &val
,
2087 sizeof(val
)) == -1 && errno
!= ENOPROTOOPT
) {
2088 SCLogWarning(SC_ERR_NO_AF_PACKET
,
2089 "'kernel' checksum mode not supported, falling back to full mode.");
2090 ptv
->checksum_mode
= CHECKSUM_VALIDATION_ENABLE
;
2094 /* set socket recv buffer size */
2095 if (ptv
->buffer_size
!= 0) {
2097 * Set the socket buffer size to the specified value.
2099 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv
->buffer_size
);
2100 if (setsockopt(ptv
->socket
, SOL_SOCKET
, SO_RCVBUF
,
2102 sizeof(ptv
->buffer_size
)) == -1) {
2103 SCLogError(SC_ERR_AFP_CREATE
,
2104 "Couldn't set buffer size to %d on iface %s, error %s",
2105 ptv
->buffer_size
, devname
, strerror(errno
));
2110 r
= bind(ptv
->socket
, (struct sockaddr
*)&bind_address
, sizeof(bind_address
));
2113 if (errno
== ENETDOWN
) {
2114 SCLogError(SC_ERR_AFP_CREATE
,
2115 "Couldn't bind AF_PACKET socket, iface %s is down",
2118 SCLogError(SC_ERR_AFP_CREATE
,
2119 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2120 devname
, strerror(errno
));
2123 ret
= AFP_RECOVERABLE_ERROR
;
2128 #ifdef HAVE_PACKET_FANOUT
2129 /* add binded socket to fanout group */
2130 if (ptv
->threads
> 1) {
2131 uint16_t mode
= ptv
->cluster_type
;
2132 uint16_t id
= ptv
->cluster_id
;
2133 uint32_t option
= (mode
<< 16) | (id
& 0xffff);
2134 r
= setsockopt(ptv
->socket
, SOL_PACKET
, PACKET_FANOUT
,(void *)&option
, sizeof(option
));
2136 SCLogError(SC_ERR_AFP_CREATE
,
2137 "Couldn't set fanout mode, error %s",
2144 #ifdef HAVE_PACKET_EBPF
2145 if (ptv
->cluster_type
== PACKET_FANOUT_EBPF
) {
2146 r
= SockFanoutSeteBPF(ptv
);
2148 SCLogError(SC_ERR_AFP_CREATE
,
2149 "Coudn't set EBPF, error %s",
2156 if (ptv
->flags
& AFP_RING_MODE
) {
2157 ret
= AFPSetupRing(ptv
, devname
);
2162 SCLogDebug("Using interface '%s' via socket %d", (char *)devname
, ptv
->socket
);
2164 ptv
->datalink
= AFPGetDevLinktype(ptv
->socket
, ptv
->iface
);
2165 switch (ptv
->datalink
) {
2172 TmEcode rc
= AFPSetBPFFilter(ptv
);
2173 if (rc
== TM_ECODE_FAILED
) {
2174 SCLogError(SC_ERR_AFP_CREATE
, "Set AF_PACKET bpf filter \"%s\" failed.", ptv
->bpf_filter
);
2175 ret
= AFP_FATAL_ERROR
;
2180 AFPSwitchState(ptv
, AFP_STATE_UP
);
2186 if (ptv
->flags
& AFP_TPACKET_V3
) {
2188 SCFree(ptv
->ring_v3
);
2189 ptv
->ring_v3
= NULL
;
2193 SCFree(ptv
->ring_v2
);
2194 ptv
->ring_v2
= NULL
;
2202 TmEcode
AFPSetBPFFilter(AFPThreadVars
*ptv
)
2204 struct bpf_program filter
;
2205 struct sock_fprog fcode
;
2208 #ifdef HAVE_PACKET_EBPF
2209 if (ptv
->ebpf_filter_fd
!= -1) {
2210 return SetEbpfFilter(ptv
);
2214 if (!ptv
->bpf_filter
)
2217 SCMutexLock(&afpacket_bpf_set_filter_lock
);
2219 SCLogInfo("Using BPF '%s' on iface '%s'",
2222 if (pcap_compile_nopcap(default_packet_size
, /* snaplen_arg */
2223 ptv
->datalink
, /* linktype_arg */
2224 &filter
, /* program */
2225 ptv
->bpf_filter
, /* const char *buf */
2229 SCLogError(SC_ERR_AFP_CREATE
, "Filter compilation failed.");
2230 SCMutexUnlock(&afpacket_bpf_set_filter_lock
);
2231 return TM_ECODE_FAILED
;
2233 SCMutexUnlock(&afpacket_bpf_set_filter_lock
);
2235 if (filter
.bf_insns
== NULL
) {
2236 SCLogError(SC_ERR_AFP_CREATE
, "Filter badly setup.");
2237 pcap_freecode(&filter
);
2238 return TM_ECODE_FAILED
;
2241 fcode
.len
= filter
.bf_len
;
2242 fcode
.filter
= (struct sock_filter
*)filter
.bf_insns
;
2244 rc
= setsockopt(ptv
->socket
, SOL_SOCKET
, SO_ATTACH_FILTER
, &fcode
, sizeof(fcode
));
2246 pcap_freecode(&filter
);
2248 SCLogError(SC_ERR_AFP_CREATE
, "Failed to attach filter: %s", strerror(errno
));
2249 return TM_ECODE_FAILED
;
2255 #ifdef HAVE_PACKET_EBPF
2257 * Insert a half flow in the kernel bypass table
2259 * \param mapfd file descriptor of the protocol bypass table
2260 * \param key data to use as key in the table
2261 * \param inittime time of creation of the entry (in monotonic clock)
2263 static int AFPInsertHalfFlow(int mapd
, void *key
, uint64_t inittime
)
2265 /* FIXME error handling */
2266 unsigned int nr_cpus
= UtilCpuGetNumProcessorsConfigured();
2267 struct pair value
[nr_cpus
];
2269 for (i
= 0; i
< nr_cpus
; i
++) {
2270 value
[i
].time
= inittime
;
2271 value
[i
].packets
= 0;
2274 SCLogDebug("Inserting element in eBPF mapping: %lu", inittime
);
2275 if (bpf_map_update_elem(mapd
, key
, value
, BPF_NOEXIST
) != 0) {
2281 SCLogError(SC_ERR_BPF
, "Can't update eBPF map: %s (%d)",
2291 static int AFPBypassCallback(Packet
*p
)
2293 #ifdef HAVE_PACKET_EBPF
2294 SCLogDebug("Calling af_packet callback function");
2295 /* Only bypass TCP and UDP */
2296 if (!(PKT_IS_TCP(p
) || PKT_IS_UDP(p
))) {
2300 /* Bypassing tunneled packets is currently not supported
2301 * because we can't discard the inner packet only due to
2302 * primitive parsing in eBPF */
2303 if (IS_TUNNEL_PKT(p
)) {
2306 struct timespec curtime
;
2307 uint64_t inittime
= 0;
2308 /* In eBPF, the function that we have use to get time return the
2309 * monotonic clock (the time since start of the computer). So we
2310 * can't use the timestamp of the packet. */
2311 if (clock_gettime(CLOCK_MONOTONIC
, &curtime
) == 0) {
2312 inittime
= curtime
.tv_sec
* 1000000000;
2314 if (PKT_IS_IPV4(p
)) {
2315 SCLogDebug("add an IPv4");
2316 if (p
->afp_v
.v4_map_fd
== -1) {
2319 struct flowv4_keys key
= {};
2320 key
.src
= htonl(GET_IPV4_SRC_ADDR_U32(p
));
2321 key
.dst
= htonl(GET_IPV4_DST_ADDR_U32(p
));
2322 key
.port16
[0] = GET_TCP_SRC_PORT(p
);
2323 key
.port16
[1] = GET_TCP_DST_PORT(p
);
2325 key
.ip_proto
= IPV4_GET_IPPROTO(p
);
2326 if (AFPInsertHalfFlow(p
->afp_v
.v4_map_fd
, &key
, inittime
) == 0) {
2329 key
.src
= htonl(GET_IPV4_DST_ADDR_U32(p
));
2330 key
.dst
= htonl(GET_IPV4_SRC_ADDR_U32(p
));
2331 key
.port16
[0] = GET_TCP_DST_PORT(p
);
2332 key
.port16
[1] = GET_TCP_SRC_PORT(p
);
2333 if (AFPInsertHalfFlow(p
->afp_v
.v4_map_fd
, &key
, inittime
) == 0) {
2338 /* For IPv6 case we don't handle extended header in eBPF */
2339 if (PKT_IS_IPV6(p
) &&
2340 ((IPV6_GET_NH(p
) == IPPROTO_TCP
) || (IPV6_GET_NH(p
) == IPPROTO_UDP
))) {
2342 if (p
->afp_v
.v6_map_fd
== -1) {
2345 SCLogDebug("add an IPv6");
2346 struct flowv6_keys key
= {};
2347 for (i
= 0; i
< 4; i
++) {
2348 key
.src
[i
] = ntohl(GET_IPV6_SRC_ADDR(p
)[i
]);
2349 key
.dst
[i
] = ntohl(GET_IPV6_DST_ADDR(p
)[i
]);
2351 key
.port16
[0] = GET_TCP_SRC_PORT(p
);
2352 key
.port16
[1] = GET_TCP_DST_PORT(p
);
2353 key
.ip_proto
= IPV6_GET_NH(p
);
2354 if (AFPInsertHalfFlow(p
->afp_v
.v6_map_fd
, &key
, inittime
) == 0) {
2357 for (i
= 0; i
< 4; i
++) {
2358 key
.src
[i
] = ntohl(GET_IPV6_DST_ADDR(p
)[i
]);
2359 key
.dst
[i
] = ntohl(GET_IPV6_SRC_ADDR(p
)[i
]);
2361 key
.port16
[0] = GET_TCP_DST_PORT(p
);
2362 key
.port16
[1] = GET_TCP_SRC_PORT(p
);
2363 if (AFPInsertHalfFlow(p
->afp_v
.v6_map_fd
, &key
, inittime
) == 0) {
2372 static int AFPXDPBypassCallback(Packet
*p
)
2374 #ifdef HAVE_PACKET_XDP
2375 SCLogDebug("Calling af_packet callback function");
2376 /* Only bypass TCP and UDP */
2377 if (!(PKT_IS_TCP(p
) || PKT_IS_UDP(p
))) {
2381 /* Bypassing tunneled packets is currently not supported
2382 * because we can't discard the inner packet only due to
2383 * primitive parsing in eBPF */
2384 if (IS_TUNNEL_PKT(p
)) {
2387 struct timespec curtime
;
2388 uint64_t inittime
= 0;
2389 if (clock_gettime(CLOCK_MONOTONIC
, &curtime
) == 0) {
2390 inittime
= curtime
.tv_sec
* 1000000000;
2392 if (PKT_IS_IPV4(p
)) {
2393 struct flowv4_keys key
= {};
2394 if (p
->afp_v
.v4_map_fd
== -1) {
2397 key
.src
= GET_IPV4_SRC_ADDR_U32(p
);
2398 key
.dst
= GET_IPV4_DST_ADDR_U32(p
);
2399 /* FIXME htons or not depending of XDP and af_packet eBPF */
2400 key
.port16
[0] = htons(GET_TCP_SRC_PORT(p
));
2401 key
.port16
[1] = htons(GET_TCP_DST_PORT(p
));
2402 key
.ip_proto
= IPV4_GET_IPPROTO(p
);
2403 if (AFPInsertHalfFlow(p
->afp_v
.v4_map_fd
, &key
, inittime
) == 0) {
2406 key
.src
= GET_IPV4_DST_ADDR_U32(p
);
2407 key
.dst
= GET_IPV4_SRC_ADDR_U32(p
);
2408 key
.port16
[0] = htons(GET_TCP_DST_PORT(p
));
2409 key
.port16
[1] = htons(GET_TCP_SRC_PORT(p
));
2410 if (AFPInsertHalfFlow(p
->afp_v
.v4_map_fd
, &key
, inittime
) == 0) {
2415 /* For IPv6 case we don't handle extended header in eBPF */
2416 if (PKT_IS_IPV6(p
) &&
2417 ((IPV6_GET_NH(p
) == IPPROTO_TCP
) || (IPV6_GET_NH(p
) == IPPROTO_UDP
))) {
2418 SCLogDebug("add an IPv6");
2419 if (p
->afp_v
.v6_map_fd
== -1) {
2423 struct flowv6_keys key
= {};
2424 for (i
= 0; i
< 4; i
++) {
2425 key
.src
[i
] = GET_IPV6_SRC_ADDR(p
)[i
];
2426 key
.dst
[i
] = GET_IPV6_DST_ADDR(p
)[i
];
2428 key
.port16
[0] = htons(GET_TCP_SRC_PORT(p
));
2429 key
.port16
[1] = htons(GET_TCP_DST_PORT(p
));
2430 key
.ip_proto
= IPV6_GET_NH(p
);
2431 if (AFPInsertHalfFlow(p
->afp_v
.v6_map_fd
, &key
, inittime
) == 0) {
2434 for (i
= 0; i
< 4; i
++) {
2435 key
.src
[i
] = GET_IPV6_DST_ADDR(p
)[i
];
2436 key
.dst
[i
] = GET_IPV6_SRC_ADDR(p
)[i
];
2438 key
.port16
[0] = htons(GET_TCP_DST_PORT(p
));
2439 key
.port16
[1] = htons(GET_TCP_SRC_PORT(p
));
2440 if (AFPInsertHalfFlow(p
->afp_v
.v6_map_fd
, &key
, inittime
) == 0) {
2450 * \brief Init function for ReceiveAFP.
2452 * \param tv pointer to ThreadVars
2453 * \param initdata pointer to the interface passed from the user
2454 * \param data pointer gets populated with AFPThreadVars
2456 * \todo Create a general AFP setup function.
2458 TmEcode
ReceiveAFPThreadInit(ThreadVars
*tv
, const void *initdata
, void **data
)
2461 AFPIfaceConfig
*afpconfig
= (AFPIfaceConfig
*)initdata
;
2463 if (initdata
== NULL
) {
2464 SCLogError(SC_ERR_INVALID_ARGUMENT
, "initdata == NULL");
2465 SCReturnInt(TM_ECODE_FAILED
);
2468 AFPThreadVars
*ptv
= SCMalloc(sizeof(AFPThreadVars
));
2469 if (unlikely(ptv
== NULL
)) {
2470 afpconfig
->DerefFunc(afpconfig
);
2471 SCReturnInt(TM_ECODE_FAILED
);
2473 memset(ptv
, 0, sizeof(AFPThreadVars
));
2478 strlcpy(ptv
->iface
, afpconfig
->iface
, AFP_IFACE_NAME_LENGTH
);
2479 ptv
->iface
[AFP_IFACE_NAME_LENGTH
- 1]= '\0';
2481 ptv
->livedev
= LiveGetDevice(ptv
->iface
);
2482 if (ptv
->livedev
== NULL
) {
2483 SCLogError(SC_ERR_INVALID_VALUE
, "Unable to find Live device");
2485 SCReturnInt(TM_ECODE_FAILED
);
2488 ptv
->buffer_size
= afpconfig
->buffer_size
;
2489 ptv
->ring_size
= afpconfig
->ring_size
;
2490 ptv
->block_size
= afpconfig
->block_size
;
2492 ptv
->promisc
= afpconfig
->promisc
;
2493 ptv
->checksum_mode
= afpconfig
->checksum_mode
;
2494 ptv
->bpf_filter
= NULL
;
2497 #ifdef HAVE_PACKET_FANOUT
2498 ptv
->cluster_type
= PACKET_FANOUT_LB
;
2499 ptv
->cluster_id
= 1;
2500 /* We only set cluster info if the number of reader threads is greater than 1 */
2501 if (afpconfig
->threads
> 1) {
2502 ptv
->cluster_id
= afpconfig
->cluster_id
;
2503 ptv
->cluster_type
= afpconfig
->cluster_type
;
2504 ptv
->threads
= afpconfig
->threads
;
2507 ptv
->flags
= afpconfig
->flags
;
2509 if (afpconfig
->bpf_filter
) {
2510 ptv
->bpf_filter
= afpconfig
->bpf_filter
;
2512 ptv
->ebpf_lb_fd
= afpconfig
->ebpf_lb_fd
;
2513 ptv
->ebpf_filter_fd
= afpconfig
->ebpf_filter_fd
;
2514 ptv
->xdp_mode
= afpconfig
->xdp_mode
;
2516 #ifdef HAVE_PACKET_EBPF
2517 if (ptv
->flags
& (AFP_BYPASS
|AFP_XDPBYPASS
)) {
2518 ptv
->v4_map_fd
= EBPFGetMapFDByName("flow_table_v4");
2519 if (ptv
->v4_map_fd
== -1) {
2520 SCLogError(SC_ERR_INVALID_VALUE
, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2522 ptv
->v6_map_fd
= EBPFGetMapFDByName("flow_table_v6");
2523 if (ptv
->v6_map_fd
== -1) {
2524 SCLogError(SC_ERR_INVALID_VALUE
, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2529 #ifdef PACKET_STATISTICS
2530 ptv
->capture_kernel_packets
= StatsRegisterCounter("capture.kernel_packets",
2532 ptv
->capture_kernel_drops
= StatsRegisterCounter("capture.kernel_drops",
2536 ptv
->copy_mode
= afpconfig
->copy_mode
;
2537 if (ptv
->copy_mode
!= AFP_COPY_MODE_NONE
) {
2538 strlcpy(ptv
->out_iface
, afpconfig
->out_iface
, AFP_IFACE_NAME_LENGTH
);
2539 ptv
->out_iface
[AFP_IFACE_NAME_LENGTH
- 1]= '\0';
2540 /* Warn about BPF filter consequence */
2541 if (ptv
->bpf_filter
) {
2542 SCLogWarning(SC_WARN_UNCOMMON
, "Enabling a BPF filter in IPS mode result"
2543 " in dropping all non matching packets.");
2548 if (AFPPeersListAdd(ptv
) == TM_ECODE_FAILED
) {
2550 afpconfig
->DerefFunc(afpconfig
);
2551 SCReturnInt(TM_ECODE_FAILED
);
2554 #define T_DATA_SIZE 70000
2555 ptv
->data
= SCMalloc(T_DATA_SIZE
);
2556 if (ptv
->data
== NULL
) {
2557 afpconfig
->DerefFunc(afpconfig
);
2559 SCReturnInt(TM_ECODE_FAILED
);
2561 ptv
->datalen
= T_DATA_SIZE
;
2564 *data
= (void *)ptv
;
2566 afpconfig
->DerefFunc(afpconfig
);
2568 /* A bit strange to have this here but we only have vlan information
2569 * during reading so we need to know if we want to keep vlan during
2570 * the capture phase */
2572 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool
)) == 1 && vlanbool
== 0) {
2573 ptv
->flags
|= AFP_VLAN_DISABLED
;
2576 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2577 * get the info from packet extended header but we will use a standard
2578 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2579 if (! SCKernelVersionIsAtLeast(3, 0)) {
2580 ptv
->flags
|= AFP_VLAN_DISABLED
;
2583 SCReturnInt(TM_ECODE_OK
);
2587 * \brief This function prints stats to the screen at exit.
2588 * \param tv pointer to ThreadVars
2589 * \param data pointer that gets cast into AFPThreadVars for ptv
2591 void ReceiveAFPThreadExitStats(ThreadVars
*tv
, void *data
)
2594 AFPThreadVars
*ptv
= (AFPThreadVars
*)data
;
2596 #ifdef PACKET_STATISTICS
2597 AFPDumpCounters(ptv
);
2598 SCLogPerf("(%s) Kernel: Packets %" PRIu64
", dropped %" PRIu64
"",
2600 StatsGetLocalCounterValue(tv
, ptv
->capture_kernel_packets
),
2601 StatsGetLocalCounterValue(tv
, ptv
->capture_kernel_drops
));
2606 * \brief DeInit function closes af packet socket at exit.
2607 * \param tv pointer to ThreadVars
2608 * \param data pointer that gets cast into AFPThreadVars for ptv
2610 TmEcode
ReceiveAFPThreadDeinit(ThreadVars
*tv
, void *data
)
2612 AFPThreadVars
*ptv
= (AFPThreadVars
*)data
;
2614 AFPSwitchState(ptv
, AFP_STATE_DOWN
);
2616 #ifdef HAVE_PACKET_XDP
2617 EBPFSetupXDP(ptv
->iface
, -1, ptv
->xdp_mode
);
2619 if (ptv
->data
!= NULL
) {
2625 ptv
->bpf_filter
= NULL
;
2626 if ((ptv
->flags
& AFP_TPACKET_V3
) && ptv
->ring_v3
) {
2627 SCFree(ptv
->ring_v3
);
2630 SCFree(ptv
->ring_v2
);
2634 SCReturnInt(TM_ECODE_OK
);
2638 * \brief This function passes off to link type decoders.
2640 * DecodeAFP reads packets from the PacketQueue and passes
2641 * them off to the proper link type decoder.
2643 * \param t pointer to ThreadVars
2644 * \param p pointer to the current packet
2645 * \param data pointer that gets cast into AFPThreadVars for ptv
2646 * \param pq pointer to the current PacketQueue
2648 TmEcode
DecodeAFP(ThreadVars
*tv
, Packet
*p
, void *data
, PacketQueue
*pq
, PacketQueue
*postpq
)
2651 DecodeThreadVars
*dtv
= (DecodeThreadVars
*)data
;
2653 /* XXX HACK: flow timeout can call us for injected pseudo packets
2654 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2655 if (p
->flags
& PKT_PSEUDO_STREAM_END
)
2658 /* update counters */
2659 DecodeUpdatePacketCounters(tv
, dtv
, p
);
2661 /* If suri has set vlan during reading, we increase vlan counter */
2663 StatsIncr(tv
, dtv
->counter_vlan
);
2666 /* call the decoder */
2667 switch (p
->datalink
) {
2668 case LINKTYPE_ETHERNET
:
2669 DecodeEthernet(tv
, dtv
, p
,GET_PKT_DATA(p
), GET_PKT_LEN(p
), pq
);
2671 case LINKTYPE_LINUX_SLL
:
2672 DecodeSll(tv
, dtv
, p
, GET_PKT_DATA(p
), GET_PKT_LEN(p
), pq
);
2675 DecodePPP(tv
, dtv
, p
, GET_PKT_DATA(p
), GET_PKT_LEN(p
), pq
);
2678 DecodeRaw(tv
, dtv
, p
, GET_PKT_DATA(p
), GET_PKT_LEN(p
), pq
);
2681 DecodeNull(tv
, dtv
, p
, GET_PKT_DATA(p
), GET_PKT_LEN(p
), pq
);
2684 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED
, "Error: datalink type %" PRId32
" not yet supported in module DecodeAFP", p
->datalink
);
2688 PacketDecodeFinalize(tv
, dtv
, p
);
2690 SCReturnInt(TM_ECODE_OK
);
2693 TmEcode
DecodeAFPThreadInit(ThreadVars
*tv
, const void *initdata
, void **data
)
2696 DecodeThreadVars
*dtv
= NULL
;
2698 dtv
= DecodeThreadVarsAlloc(tv
);
2701 SCReturnInt(TM_ECODE_FAILED
);
2703 DecodeRegisterPerfCounters(dtv
, tv
);
2705 *data
= (void *)dtv
;
2707 SCReturnInt(TM_ECODE_OK
);
2710 TmEcode
DecodeAFPThreadDeinit(ThreadVars
*tv
, void *data
)
2713 DecodeThreadVarsFree(tv
, data
);
2714 SCReturnInt(TM_ECODE_OK
);
2717 #endif /* HAVE_AF_PACKET */