]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
ebpf: fix bypass filter vlan
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
cde438f6 1/* Copyright (C) 2011-2018 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985 35#include "suricata-common.h"
e80b30c0 36#include "config.h"
c45d8985
EL
37#include "suricata.h"
38#include "decode.h"
39#include "packet-queue.h"
40#include "threads.h"
41#include "threadvars.h"
42#include "tm-queuehandlers.h"
43#include "tm-modules.h"
44#include "tm-threads.h"
45#include "tm-threads-common.h"
46#include "conf.h"
17a32bda 47#include "util-cpu.h"
c45d8985 48#include "util-debug.h"
51eb9605 49#include "util-device.h"
d65f4585 50#include "util-ebpf.h"
c45d8985
EL
51#include "util-error.h"
52#include "util-privs.h"
e80b30c0 53#include "util-optimize.h"
51eb9605 54#include "util-checksum.h"
ac56b1bf 55#include "util-ioctl.h"
2cd6e128 56#include "util-host-info.h"
c45d8985
EL
57#include "tmqh-packetpool.h"
58#include "source-af-packet.h"
34b3f194 59#include "runmodes.h"
c45d8985 60
e80b30c0 61#ifdef HAVE_AF_PACKET
472e061c
VJ
62
63#if HAVE_SYS_IOCTL_H
2bc0be6e 64#include <sys/ioctl.h>
472e061c
VJ
65#endif
66
06173267
EL
67#ifdef HAVE_PACKET_EBPF
68#include "util-ebpf.h"
69#include <bpf/libbpf.h>
70#include <bpf/bpf.h>
71#endif
72
91e1256b
EL
73struct bpf_program {
74 unsigned int bf_len;
75 struct bpf_insn *bf_insns;
76};
77
78#ifdef HAVE_PCAP_H
79#include <pcap.h>
80#endif
81
82#ifdef HAVE_PCAP_PCAP_H
83#include <pcap/pcap.h>
84#endif
85
28e9e4c8
EL
86#include "util-bpf.h"
87
472e061c 88#if HAVE_LINUX_IF_ETHER_H
c45d8985 89#include <linux/if_ether.h>
472e061c
VJ
90#endif
91
92#if HAVE_LINUX_IF_PACKET_H
c45d8985 93#include <linux/if_packet.h>
472e061c
VJ
94#endif
95
96#if HAVE_LINUX_IF_ARP_H
c45d8985 97#include <linux/if_arp.h>
472e061c 98#endif
f2a6fb8a 99
472e061c 100#if HAVE_LINUX_FILTER_H
f2a6fb8a 101#include <linux/filter.h>
e80b30c0 102#endif
c45d8985 103
472e061c 104#if HAVE_SYS_MMAN_H
49b7b00f 105#include <sys/mman.h>
472e061c
VJ
106#endif
107
a40f08a2
EL
108#ifdef HAVE_HW_TIMESTAMPING
109#include <linux/net_tstamp.h>
110#endif
111
472e061c 112#endif /* HAVE_AF_PACKET */
49b7b00f 113
c45d8985
EL
114extern int max_pending_packets;
115
e80b30c0
EL
116#ifndef HAVE_AF_PACKET
117
ab1200fb 118TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 119
8f1d7503
KS
120void TmModuleReceiveAFPRegister (void)
121{
e80b30c0
EL
122 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
124 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
125 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
126 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
127 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
128 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 129 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
130}
131
132/**
133 * \brief Registration Function for DecodeAFP.
e80b30c0 134 */
8f1d7503
KS
135void TmModuleDecodeAFPRegister (void)
136{
e80b30c0
EL
137 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
138 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
139 tmm_modules[TMM_DECODEAFP].Func = NULL;
140 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
141 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
142 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
143 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 144 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
145}
146
147/**
148 * \brief this function prints an error message and exits.
149 */
ab1200fb 150TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
151{
152 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
153 "support for AF_PACKET enabled, on Linux host please recompile "
154 "with --enable-af-packet", tv->name);
155 exit(EXIT_FAILURE);
156}
157
158#else /* We have AF_PACKET support */
159
c45d8985
EL
160#define AFP_IFACE_NAME_LENGTH 48
161
162#define AFP_STATE_DOWN 0
163#define AFP_STATE_UP 1
164
165#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 166#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
167
168#define POLL_TIMEOUT 100
169
4a1a0080
EL
170#ifndef TP_STATUS_USER_BUSY
171/* for new use latest bit available in tp_status */
172#define TP_STATUS_USER_BUSY (1 << 31)
173#endif
174
b603ad62
EL
175#ifndef TP_STATUS_VLAN_VALID
176#define TP_STATUS_VLAN_VALID (1 << 4)
177#endif
178
62e63e3f
EL
179enum {
180 AFP_READ_OK,
181 AFP_READ_FAILURE,
9efa4ace
EL
182 /** Error during treatment by other functions of Suricata */
183 AFP_SURI_FAILURE,
27b5136b 184 AFP_KERNEL_DROP,
62e63e3f
EL
185};
186
1992a227
EL
187enum {
188 AFP_FATAL_ERROR = 1,
189 AFP_RECOVERABLE_ERROR,
190};
191
49b7b00f
EL
192union thdr {
193 struct tpacket2_hdr *h2;
c2d0d938 194#ifdef HAVE_TPACKET_V3
bae1b03c 195 struct tpacket3_hdr *h3;
c2d0d938 196#endif
49b7b00f
EL
197 void *raw;
198};
199
06173267 200static int AFPBypassCallback(Packet *p);
8c880879 201static int AFPXDPBypassCallback(Packet *p);
06173267 202
91e1256b 203#define MAX_MAPS 32
c45d8985
EL
204/**
205 * \brief Structure to hold thread specific variables.
206 */
207typedef struct AFPThreadVars_
208{
69d0d484
VJ
209 union AFPRing {
210 char *v2;
211 struct iovec *v3;
212 } ring;
b797fd92 213
c45d8985 214 /* counters */
3ce39433 215 uint64_t pkts;
c45d8985 216
ff6365dd
EL
217 ThreadVars *tv;
218 TmSlot *slot;
9500d12c
EL
219 LiveDevice *livedev;
220 /* data link type for the thread */
b797fd92 221 uint32_t datalink;
9500d12c 222
d65f4585 223#ifdef HAVE_PACKET_EBPF
94a622cb 224 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 225 int v4_map_fd;
94a622cb 226 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
227 int v6_map_fd;
228#endif
229
9500d12c 230 unsigned int frame_offset;
ff6365dd 231
9500d12c
EL
232 ChecksumValidationMode checksum_mode;
233
b797fd92 234 /* references to packet and drop counters */
9500d12c
EL
235 uint16_t capture_kernel_packets;
236 uint16_t capture_kernel_drops;
9efa4ace 237 uint16_t capture_errors;
9500d12c
EL
238
239 /* handle state */
240 uint8_t afp_state;
241 uint8_t copy_mode;
4bfa3aea 242 unsigned int flags;
9500d12c
EL
243
244 /* IPS peer */
245 AFPPeer *mpeer;
246
247 /* no mmap mode */
ff6365dd
EL
248 uint8_t *data; /** Per function and thread data */
249 int datalen; /** Length of per function and thread data */
9500d12c 250 int cooked;
ff6365dd 251
9500d12c
EL
252 /*
253 * Init related members
254 */
51eb9605 255
9500d12c
EL
256 /* thread specific socket */
257 int socket;
b797fd92
EL
258
259 int ring_size;
fa902abe 260 int block_size;
234aefdf 261 int block_timeout;
e80b30c0
EL
262 /* socket buffer size */
263 int buffer_size;
fa902abe 264 /* Filter */
ab1200fb 265 const char *bpf_filter;
91e1256b
EL
266 int ebpf_lb_fd;
267 int ebpf_filter_fd;
9500d12c 268
df7dbe36 269 int promisc;
e80b30c0 270
9500d12c 271 int down_count;
662dccd8 272
e80b30c0
EL
273 int cluster_id;
274 int cluster_type;
c45d8985 275
fbca1a4e
EL
276 int threads;
277
69d0d484
VJ
278 union AFPTpacketReq {
279 struct tpacket_req v2;
c2d0d938 280#ifdef HAVE_TPACKET_V3
69d0d484 281 struct tpacket_req3 v3;
c2d0d938 282#endif
69d0d484 283 } req;
b797fd92
EL
284
285 char iface[AFP_IFACE_NAME_LENGTH];
286 /* IPS output iface */
287 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 288
cba41207
AG
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen;
291 uint8_t *ring_buf;
91e1256b 292
8c880879
EL
293 uint8_t xdp_mode;
294
36838017 295#ifdef HAVE_PACKET_EBPF
4cf53100 296 struct ebpf_timeout_config ebpf_t_config;
36838017 297#endif
315c29a8 298
c45d8985
EL
299} AFPThreadVars;
300
301TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
ab1200fb 302TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
c45d8985
EL
303void ReceiveAFPThreadExitStats(ThreadVars *, void *);
304TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
e80b30c0 305TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 306
ab1200fb 307TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
2864f9ee 308TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
c45d8985
EL
309TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
310
f2a6fb8a 311TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 312static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
313static int AFPGetDevFlags(int fd, const char *ifname);
314static int AFPDerefSocket(AFPPeer* peer);
315static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 316
19475165 317
c45d8985
EL
318/**
319 * \brief Registration Function for RecieveAFP.
320 * \todo Unit tests are needed for this module.
321 */
8f1d7503
KS
322void TmModuleReceiveAFPRegister (void)
323{
c45d8985
EL
324 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
325 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 326 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 327 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 328 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 329 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 330 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985
EL
331 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
332 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 333 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165 334
c45d8985
EL
335}
336
a6457262
EL
337
338/**
339 * \defgroup afppeers AFP peers list
340 *
341 * AF_PACKET has an IPS mode were interface are peered: packet from
342 * on interface are sent the peered interface and the other way. The ::AFPPeer
343 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
344 * information to be able to send packet on the interface.
345 * A element of the list must not be destroyed during the run of Suricata as it
346 * is used by ::Packet and other threads.
347 *
348 * @{
349 */
350
662dccd8
EL
351typedef struct AFPPeersList_ {
352 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
353 int cnt;
354 int peered;
60400163
EL
355 int turn; /**< Next value for initialisation order */
356 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
357} AFPPeersList;
358
359/**
a6457262
EL
360 * \brief Update the peer.
361 *
362 * Update the AFPPeer of a thread ie set new state, socket number
363 * or iface index.
364 *
662dccd8 365 */
ab1200fb 366static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
367{
368 if (ptv->mpeer == NULL) {
369 return;
370 }
662dccd8
EL
371 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
372 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
373 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
374}
375
a6457262
EL
376/**
377 * \brief Clean and free ressource used by an ::AFPPeer
378 */
ab1200fb 379static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
380{
381 if (peer->flags & AFP_SOCK_PROTECT)
382 SCMutexDestroy(&peer->sock_protect);
383 SC_ATOMIC_DESTROY(peer->socket);
384 SC_ATOMIC_DESTROY(peer->if_idx);
385 SC_ATOMIC_DESTROY(peer->state);
386 SCFree(peer);
387}
388
389AFPPeersList peerslist;
390
391
a6457262
EL
392/**
393 * \brief Init the global list of ::AFPPeer
394 */
662dccd8
EL
395TmEcode AFPPeersListInit()
396{
397 SCEnter();
398 TAILQ_INIT(&peerslist.peers);
399 peerslist.peered = 0;
400 peerslist.cnt = 0;
60400163
EL
401 peerslist.turn = 0;
402 SC_ATOMIC_INIT(peerslist.reached);
403 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
404 SCReturnInt(TM_ECODE_OK);
405}
406
a6457262
EL
407/**
408 * \brief Check that all ::AFPPeer got a peer
409 *
410 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
411 */
662dccd8
EL
412TmEcode AFPPeersListCheck()
413{
414#define AFP_PEERS_MAX_TRY 4
415#define AFP_PEERS_WAIT 20000
416 int try = 0;
417 SCEnter();
418 while (try < AFP_PEERS_MAX_TRY) {
419 if (peerslist.cnt != peerslist.peered) {
420 usleep(AFP_PEERS_WAIT);
421 } else {
422 SCReturnInt(TM_ECODE_OK);
423 }
424 try++;
425 }
426 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
427 SCReturnInt(TM_ECODE_FAILED);
428}
429
a6457262
EL
430/**
431 * \brief Declare a new AFP thread to AFP peers list.
432 */
ab1200fb 433static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
434{
435 SCEnter();
436 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
437 AFPPeer *pitem;
ac56b1bf 438 int mtu, out_mtu;
662dccd8 439
e176be6f 440 if (unlikely(peer == NULL)) {
662dccd8
EL
441 SCReturnInt(TM_ECODE_FAILED);
442 }
443 memset(peer, 0, sizeof(AFPPeer));
444 SC_ATOMIC_INIT(peer->socket);
13f13b6d 445 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
446 SC_ATOMIC_INIT(peer->if_idx);
447 SC_ATOMIC_INIT(peer->state);
448 peer->flags = ptv->flags;
60400163 449 peer->turn = peerslist.turn++;
662dccd8
EL
450
451 if (peer->flags & AFP_SOCK_PROTECT) {
452 SCMutexInit(&peer->sock_protect, NULL);
453 }
454
13f13b6d 455 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
456 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
457 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
458 ptv->mpeer = peer;
459 /* add element to iface list */
460 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 461
13f13b6d
EL
462 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
463 peerslist.cnt++;
464
465 /* Iter to find a peer */
466 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
467 if (pitem->peer)
468 continue;
469 if (strcmp(pitem->iface, ptv->out_iface))
470 continue;
471 peer->peer = pitem;
472 pitem->peer = peer;
473 mtu = GetIfaceMTU(ptv->iface);
474 out_mtu = GetIfaceMTU(ptv->out_iface);
475 if (mtu != out_mtu) {
476 SCLogError(SC_ERR_AFP_CREATE,
477 "MTU on %s (%d) and %s (%d) are not equal, "
478 "transmission of packets bigger than %d will fail.",
479 ptv->iface, mtu,
480 ptv->out_iface, out_mtu,
481 (out_mtu > mtu) ? mtu : out_mtu);
482 }
483 peerslist.peered += 2;
484 break;
ac56b1bf 485 }
662dccd8
EL
486 }
487
488 AFPPeerUpdate(ptv);
489
490 SCReturnInt(TM_ECODE_OK);
491}
492
ab1200fb 493static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 494{
b2691cbe
EL
495 /* If turn is zero, we already have started threads once */
496 if (peerslist.turn == 0)
497 return 0;
498
60400163
EL
499 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
500 return 0;
501 return 1;
502}
503
ab1200fb 504static void AFPPeersListReachedInc(void)
60400163 505{
b2691cbe
EL
506 if (peerslist.turn == 0)
507 return;
508
509 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
510 SCLogInfo("All AFP capture threads are running.");
511 (void)SC_ATOMIC_SET(peerslist.reached, 0);
512 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
513 * restarted.
514 */
515 peerslist.turn = 0;
516 }
60400163
EL
517}
518
ab1200fb 519static int AFPPeersListStarted(void)
919377d4
EL
520{
521 return !peerslist.turn;
522}
523
a6457262
EL
524/**
525 * \brief Clean the global peers list.
526 */
662dccd8
EL
527void AFPPeersListClean()
528{
529 AFPPeer *pitem;
530
531 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
532 TAILQ_REMOVE(&peerslist.peers, pitem, next);
533 AFPPeerClean(pitem);
534 }
535}
536
a6457262
EL
537/**
538 * @}
539 */
540
c45d8985
EL
541/**
542 * \brief Registration Function for DecodeAFP.
543 * \todo Unit tests are needed for this module.
544 */
8f1d7503
KS
545void TmModuleDecodeAFPRegister (void)
546{
c45d8985
EL
547 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
548 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
549 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
550 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 551 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985
EL
552 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
553 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 554 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
555}
556
662dccd8 557
e80b30c0
EL
558static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
559
e8a4a4c4 560static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 561{
6efd37a3 562#ifdef PACKET_STATISTICS
e8a4a4c4
EL
563 struct tpacket_stats kstats;
564 socklen_t len = sizeof (struct tpacket_stats);
565 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
566 &kstats, &len) > -1) {
567 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
568 ptv->tv->name,
569 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
570 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
571 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
572 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
573 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 574 }
e8a4a4c4 575#endif
6efd37a3 576}
c45d8985
EL
577
578/**
579 * \brief AF packet read function.
580 *
581 * This function fills
582 * From here the packets are picked up by the DecodeAFP thread.
583 *
584 * \param user pointer to AFPThreadVars
585 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
586 */
ab1200fb 587static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
588{
589 Packet *p = NULL;
590 /* XXX should try to use read that get directly to packet */
c45d8985
EL
591 int offset = 0;
592 int caplen;
593 struct sockaddr_ll from;
594 struct iovec iov;
595 struct msghdr msg;
c45d8985
EL
596 struct cmsghdr *cmsg;
597 union {
598 struct cmsghdr cmsg;
599 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
600 } cmsg_buf;
6efd37a3 601 unsigned char aux_checksum = 0;
c45d8985
EL
602
603 msg.msg_name = &from;
604 msg.msg_namelen = sizeof(from);
605 msg.msg_iov = &iov;
606 msg.msg_iovlen = 1;
c45d8985
EL
607 msg.msg_control = &cmsg_buf;
608 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
609 msg.msg_flags = 0;
610
611 if (ptv->cooked)
612 offset = SLL_HEADER_LEN;
613 else
614 offset = 0;
e80b30c0
EL
615 iov.iov_len = ptv->datalen - offset;
616 iov.iov_base = ptv->data + offset;
c45d8985
EL
617
618 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
619
620 if (caplen < 0) {
621 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
622 errno);
62e63e3f 623 SCReturnInt(AFP_READ_FAILURE);
c45d8985 624 }
ff6365dd
EL
625
626 p = PacketGetFromQueueOrAlloc();
c45d8985 627 if (p == NULL) {
9efa4ace 628 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 629 }
b33986c8 630 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
631 if (ptv->flags & AFP_BYPASS) {
632 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
633#ifdef HAVE_PACKET_EBPF
634 p->afp_v.v4_map_fd = ptv->v4_map_fd;
635 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 636 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 637#endif
06173267 638 }
8c880879
EL
639 if (ptv->flags & AFP_XDPBYPASS) {
640 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
641#ifdef HAVE_PACKET_EBPF
642 p->afp_v.v4_map_fd = ptv->v4_map_fd;
643 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 644 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 645#endif
8c880879 646 }
c45d8985
EL
647
648 /* get timestamp of packet via ioctl */
649 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
650 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
651 errno);
652 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 653 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
654 }
655
656 ptv->pkts++;
51eb9605 657 p->livedev = ptv->livedev;
c45d8985
EL
658
659 /* add forged header */
660 if (ptv->cooked) {
e80b30c0 661 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
662 /* XXX this is minimalist, but this seems enough */
663 hdrp->sll_protocol = from.sll_protocol;
664 }
665
666 p->datalink = ptv->datalink;
667 SET_PKT_LEN(p, caplen + offset);
e80b30c0 668 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 669 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 670 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 671 }
e80b30c0
EL
672 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
673 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
674
6062e00c
EL
675 /* We only check for checksum disable */
676 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
677 p->flags |= PKT_IGNORE_CHECKSUM;
678 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
679 if (ptv->livedev->ignore_checksum) {
680 p->flags |= PKT_IGNORE_CHECKSUM;
a565148f 681 } else if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
682 SC_ATOMIC_GET(ptv->livedev->pkts),
683 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
684 ptv->livedev->ignore_checksum = 1;
6062e00c 685 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 686 }
6062e00c 687 } else {
6efd37a3
EL
688 aux_checksum = 1;
689 }
6062e00c 690
6efd37a3
EL
691 /* List is NULL if we don't have activated auxiliary data */
692 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
693 struct tpacket_auxdata *aux;
f6ddaf33 694
6efd37a3
EL
695 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
696 cmsg->cmsg_level != SOL_PACKET ||
697 cmsg->cmsg_type != PACKET_AUXDATA)
698 continue;
f6ddaf33 699
6efd37a3
EL
700 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
701
702 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
703 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 704 }
6efd37a3 705 break;
f6ddaf33
EL
706 }
707
c469824b
EL
708 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
709 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 710 SCReturnInt(AFP_SURI_FAILURE);
c469824b 711 }
62e63e3f 712 SCReturnInt(AFP_READ_OK);
c45d8985
EL
713}
714
ecf59be4
EL
715/**
716 * \brief AF packet write function.
717 *
718 * This function has to be called before the memory
719 * related to Packet in ring buffer is released.
720 *
721 * \param pointer to Packet
722 * \param version of capture: TPACKET_V2 or TPACKET_V3
723 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
724 *
725 */
726static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
727{
728 struct sockaddr_ll socket_address;
729 int socket;
ecf59be4
EL
730 uint8_t *pstart;
731 size_t plen;
ee7e689b
AG
732 union thdr h;
733 uint16_t vlan_tci = 0;
662dccd8
EL
734
735 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
3f107fa1 736 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
662dccd8
EL
737 return TM_ECODE_OK;
738 }
739 }
740
741 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
742 return TM_ECODE_OK;
743
744 if (p->ethh == NULL) {
745 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
746 return TM_ECODE_FAILED;
747 }
748 /* Index of the network device */
749 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
750 /* Address length*/
751 socket_address.sll_halen = ETH_ALEN;
752 /* Destination MAC */
753 memcpy(socket_address.sll_addr, p->ethh, 6);
754
755 /* Send packet, locking the socket if necessary */
756 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
757 SCMutexLock(&p->afp_v.peer->sock_protect);
758 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 759
ee7e689b
AG
760 h.raw = p->afp_v.relptr;
761
ecf59be4 762 if (version == TPACKET_V2) {
ecf59be4
EL
763 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
764 * the flag. It is not defined for older kernel so we go best effort
765 * and test for non zero value of the TCI header. */
766 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
767 vlan_tci = h.h2->tp_vlan_tci;
768 }
769 } else {
770#ifdef HAVE_TPACKET_V3
771 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
772 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 773 }
ee7e689b
AG
774#else
775 /* Should not get here */
776 BUG_ON(1);
777#endif
778 }
779
780 if (vlan_tci != 0) {
781 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
782 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
783 /* move ethernet addresses */
784 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
785 /* write vlan info */
786 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
787 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
788 } else {
789 pstart = GET_PKT_DATA(p);
790 plen = GET_PKT_LEN(p);
791 }
792
793 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
794 (struct sockaddr*) &socket_address,
795 sizeof(struct sockaddr_ll)) < 0) {
796 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
797 socket,
798 strerror(errno));
799 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
800 SCMutexUnlock(&p->afp_v.peer->sock_protect);
801 return TM_ECODE_FAILED;
802 }
803 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
804 SCMutexUnlock(&p->afp_v.peer->sock_protect);
805
806 return TM_ECODE_OK;
807}
808
ab1200fb 809static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 810{
662dccd8
EL
811 /* Need to be in copy mode and need to detect early release
812 where Ethernet header could not be set (and pseudo packet) */
813 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 814 AFPWritePacket(p, TPACKET_V2);
662dccd8 815 }
13f13b6d
EL
816
817 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
680e941a 818 goto cleanup;
13f13b6d 819
2011a3f8
EL
820 if (p->afp_v.relptr) {
821 union thdr h;
822 h.raw = p->afp_v.relptr;
823 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 824 }
680e941a
EL
825
826cleanup:
827 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
828}
829
ecf59be4 830#ifdef HAVE_TPACKET_V3
ab1200fb 831static void AFPReleasePacketV3(Packet *p)
bae1b03c
EL
832{
833 /* Need to be in copy mode and need to detect early release
834 where Ethernet header could not be set (and pseudo packet) */
835 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 836 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
837 }
838 PacketFreeOrRelease(p);
839}
ecf59be4 840#endif
bae1b03c 841
ab1200fb 842static void AFPReleasePacket(Packet *p)
b076a26c
KS
843{
844 AFPReleaseDataFromRing(p);
845 PacketFreeOrRelease(p);
2011a3f8
EL
846}
847
49b7b00f
EL
848/**
849 * \brief AF packet read function for ring
850 *
851 * This function fills
852 * From here the packets are picked up by the DecodeAFP thread.
853 *
854 * \param user pointer to AFPThreadVars
855 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
856 */
ab1200fb 857static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f
EL
858{
859 Packet *p = NULL;
860 union thdr h;
27b5136b 861 uint8_t emergency_flush = 0;
4d8f70c6 862 int read_pkts = 0;
b26ec603 863 int loop_start = -1;
4d8f70c6 864
49b7b00f 865
a369f8c3
EL
866 /* Loop till we have packets available */
867 while (1) {
53c02334
AS
868 if (unlikely(suricata_ctl_flags != 0)) {
869 break;
870 }
871
a369f8c3 872 /* Read packet from ring */
69d0d484 873 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace
EL
874 if (unlikely(h.raw == NULL)) {
875 /* Impossible we reach this point in normal condition, so trigger
876 * a failure in reading */
877 SCReturnInt(AFP_READ_FAILURE);
34b3f194 878 }
662dccd8 879
82a2dd85 880 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
4d8f70c6 881 if (read_pkts == 0) {
b26ec603
EL
882 if (loop_start == -1) {
883 loop_start = ptv->frame_offset;
884 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
885 SCReturnInt(AFP_READ_OK);
886 }
69d0d484 887 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
b26ec603
EL
888 ptv->frame_offset = 0;
889 }
890 continue;
4d8f70c6 891 }
27b5136b
EL
892 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
893 SCReturnInt(AFP_KERNEL_DROP);
894 } else {
895 SCReturnInt(AFP_READ_OK);
896 }
897 }
4d8f70c6
EL
898
899 read_pkts++;
b26ec603 900 loop_start = -1;
4d8f70c6 901
4a1a0080
EL
902 /* Our packet is still used by suricata, we exit read loop to
903 * gain some time */
904 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
905 SCReturnInt(AFP_READ_OK);
906 }
907
27b5136b
EL
908 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
909 h.h2->tp_status = TP_STATUS_KERNEL;
910 goto next_frame;
a369f8c3
EL
911 }
912
913 p = PacketGetFromQueueOrAlloc();
914 if (p == NULL) {
9efa4ace 915 SCReturnInt(AFP_SURI_FAILURE);
34b3f194 916 }
b33986c8 917 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
918 if (ptv->flags & AFP_BYPASS) {
919 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 920#ifdef HAVE_PACKET_EBPF
6062c27e
EL
921 p->afp_v.v4_map_fd = ptv->v4_map_fd;
922 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 923 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 924#endif
06173267 925 }
8c880879
EL
926 if (ptv->flags & AFP_XDPBYPASS) {
927 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 928#ifdef HAVE_PACKET_EBPF
6062c27e
EL
929 p->afp_v.v4_map_fd = ptv->v4_map_fd;
930 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 931 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 932#endif
8c880879 933 }
49b7b00f 934
4a1a0080
EL
935 /* Suricata will treat packet so telling it is busy, this
936 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
937 * function. */
938 h.h2->tp_status |= TP_STATUS_USER_BUSY;
939
a369f8c3 940 ptv->pkts++;
a369f8c3 941 p->livedev = ptv->livedev;
a369f8c3 942 p->datalink = ptv->datalink;
d0940396 943
a369f8c3
EL
944 if (h.h2->tp_len > h.h2->tp_snaplen) {
945 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
946 h.h2->tp_len, h.h2->tp_snaplen);
947 }
71e47868
EL
948
949 /* get vlan id from header */
9500d12c 950 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
e871f713 951 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
01a8cc4e 952 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
71e47868
EL
953 p->vlan_idx = 1;
954 p->vlanh[0] = NULL;
955 }
956
a369f8c3
EL
957 if (ptv->flags & AFP_ZERO_COPY) {
958 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
959 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 960 SCReturnInt(AFP_SURI_FAILURE);
662dccd8 961 } else {
0f2b3406 962 p->afp_v.relptr = h.raw;
b076a26c 963 p->ReleasePacket = AFPReleasePacket;
5f12b234
EL
964 p->afp_v.mpeer = ptv->mpeer;
965 AFPRefSocket(ptv->mpeer);
0f2b3406
EL
966
967 p->afp_v.copy_mode = ptv->copy_mode;
968 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
969 p->afp_v.peer = ptv->mpeer->peer;
970 } else {
971 p->afp_v.peer = NULL;
662dccd8 972 }
a369f8c3
EL
973 }
974 } else {
975 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
9efa4ace
EL
976 /* As we can possibly fail to copy the data due to invalid data, let's
977 * skip this packet and switch to the next one.
978 */
979 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 980 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
9efa4ace
EL
981 ptv->frame_offset = 0;
982 }
a369f8c3 983 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 984 SCReturnInt(AFP_SURI_FAILURE);
a369f8c3
EL
985 }
986 }
d65f4585 987
a369f8c3
EL
988 /* Timestamp */
989 p->ts.tv_sec = h.h2->tp_sec;
990 p->ts.tv_usec = h.h2->tp_nsec/1000;
991 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
992 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
993
994 /* We only check for checksum disable */
995 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
996 p->flags |= PKT_IGNORE_CHECKSUM;
997 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
998 if (ptv->livedev->ignore_checksum) {
999 p->flags |= PKT_IGNORE_CHECKSUM;
1000 } else if (ChecksumAutoModeCheck(ptv->pkts,
1001 SC_ATOMIC_GET(ptv->livedev->pkts),
1002 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1003 ptv->livedev->ignore_checksum = 1;
1004 p->flags |= PKT_IGNORE_CHECKSUM;
1005 }
1006 } else {
1007 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
49b7b00f 1008 p->flags |= PKT_IGNORE_CHECKSUM;
a369f8c3 1009 }
ee6ba099
EL
1010 }
1011 if (h.h2->tp_status & TP_STATUS_LOSING) {
1012 emergency_flush = 1;
e8a4a4c4 1013 AFPDumpCounters(ptv);
a369f8c3
EL
1014 }
1015
5f12b234
EL
1016 /* release frame if not in zero copy mode */
1017 if (!(ptv->flags & AFP_ZERO_COPY)) {
1018 h.h2->tp_status = TP_STATUS_KERNEL;
1019 }
1020
a369f8c3
EL
1021 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1022 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1023 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
a369f8c3
EL
1024 ptv->frame_offset = 0;
1025 }
1026 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1027 SCReturnInt(AFP_SURI_FAILURE);
49b7b00f 1028 }
49b7b00f 1029
27b5136b 1030next_frame:
69d0d484 1031 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 1032 ptv->frame_offset = 0;
350d7619
EL
1033 /* Get out of loop to be sure we will reach maintenance tasks */
1034 SCReturnInt(AFP_READ_OK);
34b3f194 1035 }
34b3f194
EL
1036 }
1037
49b7b00f
EL
1038 SCReturnInt(AFP_READ_OK);
1039}
1040
f947539d 1041#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1042static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1043{
1044 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1045}
1046
1047static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1048{
1049 Packet *p = PacketGetFromQueueOrAlloc();
1050 if (p == NULL) {
9efa4ace 1051 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1052 }
1053 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
1054 if (ptv->flags & AFP_BYPASS) {
1055 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 1056#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1057 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1058 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1059 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1060#endif
e98b5e49 1061 } else if (ptv->flags & AFP_XDPBYPASS) {
8c880879 1062 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 1063#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1064 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1065 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1066 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1067#endif
8c880879 1068 }
bae1b03c
EL
1069
1070 ptv->pkts++;
bae1b03c
EL
1071 p->livedev = ptv->livedev;
1072 p->datalink = ptv->datalink;
1073
e41a9d63
AG
1074 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1075 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1076 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1077 p->vlan_idx = 1;
1078 p->vlanh[0] = NULL;
1079 }
1080
bae1b03c
EL
1081 if (ptv->flags & AFP_ZERO_COPY) {
1082 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1083 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1084 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c 1085 }
310b27a1 1086 p->afp_v.relptr = ppd;
bae1b03c
EL
1087 p->ReleasePacket = AFPReleasePacketV3;
1088 p->afp_v.mpeer = ptv->mpeer;
1089 AFPRefSocket(ptv->mpeer);
1090
1091 p->afp_v.copy_mode = ptv->copy_mode;
1092 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1093 p->afp_v.peer = ptv->mpeer->peer;
1094 } else {
1095 p->afp_v.peer = NULL;
1096 }
1097 } else {
1098 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1099 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1100 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1101 }
1102 }
1103 /* Timestamp */
1104 p->ts.tv_sec = ppd->tp_sec;
1105 p->ts.tv_usec = ppd->tp_nsec/1000;
1106 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1107 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1108
1109 /* We only check for checksum disable */
1110 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1111 p->flags |= PKT_IGNORE_CHECKSUM;
1112 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1113 if (ptv->livedev->ignore_checksum) {
1114 p->flags |= PKT_IGNORE_CHECKSUM;
1115 } else if (ChecksumAutoModeCheck(ptv->pkts,
1116 SC_ATOMIC_GET(ptv->livedev->pkts),
1117 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1118 ptv->livedev->ignore_checksum = 1;
1119 p->flags |= PKT_IGNORE_CHECKSUM;
1120 }
1121 } else {
1122 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1123 p->flags |= PKT_IGNORE_CHECKSUM;
1124 }
1125 }
1126
1127 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
bae1b03c 1128 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1129 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1130 }
1131
1132 SCReturnInt(AFP_READ_OK);
1133}
1134
1135static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1136{
1137 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1138 uint8_t *ppd;
9efa4ace 1139 int ret = 0;
bae1b03c
EL
1140
1141 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1142 for (i = 0; i < num_pkts; ++i) {
9efa4ace
EL
1143 ret = AFPParsePacketV3(ptv, pbd,
1144 (struct tpacket3_hdr *)ppd);
1145 switch (ret) {
1146 case AFP_READ_OK:
1147 break;
1148 case AFP_SURI_FAILURE:
1149 /* Internal error but let's just continue and
1150 * treat thenext packet */
1151 break;
1152 case AFP_READ_FAILURE:
1153 SCReturnInt(AFP_READ_FAILURE);
1154 default:
1155 SCReturnInt(ret);
5f84b55d 1156 }
bae1b03c
EL
1157 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1158 }
1159
1160 SCReturnInt(AFP_READ_OK);
1161}
f947539d 1162#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1163
1164/**
1165 * \brief AF packet read function for ring
1166 *
1167 * This function fills
1168 * From here the packets are picked up by the DecodeAFP thread.
1169 *
1170 * \param user pointer to AFPThreadVars
1171 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1172 */
ab1200fb 1173static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1174{
c2d0d938 1175#ifdef HAVE_TPACKET_V3
bae1b03c 1176 struct tpacket_block_desc *pbd;
9efa4ace 1177 int ret = 0;
bae1b03c
EL
1178
1179 /* Loop till we have packets available */
1180 while (1) {
1181 if (unlikely(suricata_ctl_flags != 0)) {
1182 SCLogInfo("Exiting AFP V3 read loop");
1183 break;
1184 }
1185
69d0d484 1186 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1187
1188 /* block is not ready to be read */
1189 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1190 SCReturnInt(AFP_READ_OK);
1191 }
1192
9efa4ace
EL
1193 ret = AFPWalkBlock(ptv, pbd);
1194 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1195 AFPFlushBlock(pbd);
9efa4ace 1196 SCReturnInt(ret);
bae1b03c
EL
1197 }
1198
1199 AFPFlushBlock(pbd);
69d0d484 1200 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1201 /* return to maintenance task after one loop on the ring */
1202 if (ptv->frame_offset == 0) {
1203 SCReturnInt(AFP_READ_OK);
1204 }
1205 }
c2d0d938 1206#endif
bae1b03c
EL
1207 SCReturnInt(AFP_READ_OK);
1208}
1209
13f13b6d
EL
1210/**
1211 * \brief Reference socket
1212 *
1213 * \retval O in case of failure, 1 in case of success
1214 */
1215static int AFPRefSocket(AFPPeer* peer)
1216{
1217 if (unlikely(peer == NULL))
1218 return 0;
1219
1220 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1221 return 1;
1222}
1223
1224
1225/**
1226 * \brief Dereference socket
1227 *
1228 * \retval 1 if socket is still alive, 0 if not
1229 */
1230static int AFPDerefSocket(AFPPeer* peer)
1231{
4424f5a2
EL
1232 if (peer == NULL)
1233 return 1;
1234
13f13b6d
EL
1235 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1236 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1237 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1238 close(SC_ATOMIC_GET(peer->socket));
1239 return 0;
1240 }
1241 }
1242 return 1;
1243}
1244
ab1200fb 1245static void AFPSwitchState(AFPThreadVars *ptv, int state)
13f13b6d
EL
1246{
1247 ptv->afp_state = state;
1248 ptv->down_count = 0;
49b7b00f 1249
13f13b6d
EL
1250 AFPPeerUpdate(ptv);
1251
1252 /* Do cleaning if switching to down state */
1253 if (state == AFP_STATE_DOWN) {
5f84b55d
EL
1254#ifdef HAVE_TPACKET_V3
1255 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1256 if (!ptv->ring.v3) {
1257 SCFree(ptv->ring.v3);
1258 ptv->ring.v3 = NULL;
5f84b55d
EL
1259 }
1260 } else {
1261#endif
69d0d484 1262 if (ptv->ring.v2) {
5f84b55d 1263 /* only used in reading phase, we can free it */
69d0d484
VJ
1264 SCFree(ptv->ring.v2);
1265 ptv->ring.v2 = NULL;
5f84b55d
EL
1266 }
1267#ifdef HAVE_TPACKET_V3
13f13b6d 1268 }
5f84b55d 1269#endif
13f13b6d
EL
1270 if (ptv->socket != -1) {
1271 /* we need to wait for all packets to return data */
1272 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
b86e1762 1273 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
cba41207 1274 munmap(ptv->ring_buf, ptv->ring_buflen);
13f13b6d
EL
1275 close(ptv->socket);
1276 ptv->socket = -1;
1277 }
1278 }
1279 }
1280 if (state == AFP_STATE_UP) {
1281 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1282 }
1283}
49b7b00f 1284
7fea0ec6
EL
1285static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1286 uint64_t *discarded_pkts)
919377d4
EL
1287{
1288 struct sockaddr_ll from;
1289 struct iovec iov;
1290 struct msghdr msg;
1291 struct timeval ts;
1292 union {
1293 struct cmsghdr cmsg;
1294 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1295 } cmsg_buf;
1296
1297
1298 if (unlikely(suricata_ctl_flags != 0)) {
1299 return 1;
1300 }
1301
1302 msg.msg_name = &from;
1303 msg.msg_namelen = sizeof(from);
1304 msg.msg_iov = &iov;
1305 msg.msg_iovlen = 1;
1306 msg.msg_control = &cmsg_buf;
1307 msg.msg_controllen = sizeof(cmsg_buf);
1308 msg.msg_flags = 0;
1309
1310 iov.iov_len = ptv->datalen;
1311 iov.iov_base = ptv->data;
1312
339f0665 1313 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1314
1315 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1316 /* FIXME */
1317 return -1;
1318 }
1319
1320 if ((ts.tv_sec > synctv->tv_sec) ||
1321 (ts.tv_sec >= synctv->tv_sec &&
1322 ts.tv_usec > synctv->tv_usec)) {
1323 return 1;
1324 }
1325 return 0;
1326}
1327
7fea0ec6
EL
1328static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1329 uint64_t *discarded_pkts)
919377d4
EL
1330{
1331 union thdr h;
1332
1333 if (unlikely(suricata_ctl_flags != 0)) {
1334 return 1;
1335 }
1336
f947539d 1337#ifdef HAVE_TPACKET_V3
bae1b03c 1338 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1339 int ret = 0;
7fea0ec6 1340 struct tpacket_block_desc *pbd;
69d0d484 1341 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1342 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1343 struct tpacket3_hdr *ppd =
1344 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1345 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1346 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1347 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1348 ret = 1;
1349 }
7fea0ec6 1350 AFPFlushBlock(pbd);
69d0d484 1351 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1352 return ret;
f947539d
VJ
1353
1354 } else
1355#endif
1356 {
7fea0ec6 1357 /* Read packet from ring */
69d0d484 1358 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1359 if (h.raw == NULL) {
1360 return -1;
1361 }
1362 (*discarded_pkts)++;
1363 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1364 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1365 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1366 return 1;
1367 }
919377d4 1368
7fea0ec6 1369 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1370 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1371 ptv->frame_offset = 0;
1372 }
919377d4
EL
1373 }
1374
1375
1376 return 0;
1377}
1378
806844d8
VJ
1379/** \brief wait for all afpacket threads to fully init
1380 *
1381 * Discard packets before all threads are ready, as the cluster
1382 * setup is not complete yet.
1383 *
1384 * if AFPPeersListStarted() returns true init is complete
1385 *
1386 * \retval r 1 = happy, otherwise unhappy
1387 */
7fea0ec6 1388static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1389{
919377d4 1390 struct timeval synctv;
806844d8
VJ
1391 struct pollfd fds;
1392
1393 fds.fd = ptv->socket;
1394 fds.events = POLLIN;
919377d4
EL
1395
1396 /* Set timeval to end of the world */
1397 synctv.tv_sec = 0xffffffff;
1398 synctv.tv_usec = 0xffffffff;
1399
1400 while (1) {
8709a20d 1401 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1402 if (r > 0 &&
1403 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1404 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1405 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1406 return 0;
1407 } else if (r > 0) {
1408 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1409 gettimeofday(&synctv, NULL);
1410 }
1411 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1412 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1413 } else {
7fea0ec6 1414 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1415 }
1416 SCLogDebug("Discarding on %s", ptv->tv->name);
1417 switch (r) {
1418 case 1:
9f7ba071 1419 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1420 return 1;
1421 case -1:
1422 return r;
1423 }
1424 /* no packets */
1425 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1426 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1427 return 1;
43b6cbd4 1428 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1429 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1430 return 0;
919377d4
EL
1431 }
1432 }
1433 return 1;
1434}
1435
13f13b6d
EL
1436/**
1437 * \brief Try to reopen socket
1438 *
1439 * \retval 0 in case of success, negative if error occurs or a condition
1440 * is not met.
1441 */
c45d8985
EL
1442static int AFPTryReopen(AFPThreadVars *ptv)
1443{
13f13b6d
EL
1444 ptv->down_count++;
1445
13f13b6d
EL
1446 /* Don't reconnect till we have packet that did not release data */
1447 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1448 return -1;
1449 }
c45d8985 1450
8709a20d 1451 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1452 if (afp_activate_r != 0) {
13f13b6d
EL
1453 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1454 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1455 ptv->iface);
1456 }
c45d8985
EL
1457 return afp_activate_r;
1458 }
1459
3bea3b39 1460 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1461 return 0;
1462}
1463
e80b30c0
EL
1464/**
1465 * \brief Main AF_PACKET reading Loop function
1466 */
1467TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1468{
34581ce9
AS
1469 SCEnter();
1470
e80b30c0 1471 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1472 struct pollfd fds;
1473 int r;
34581ce9 1474 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1475 time_t last_dump = 0;
49612128 1476 time_t current_time;
5f400785 1477 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1478 uint64_t discarded_pkts = 0;
e80b30c0 1479
34581ce9 1480 ptv->slot = s->slot_next;
e80b30c0 1481
5f400785 1482 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1483 if (ptv->flags & AFP_TPACKET_V3) {
1484 AFPReadFunc = AFPReadFromRingV3;
1485 } else {
1486 AFPReadFunc = AFPReadFromRing;
1487 }
5f400785
EL
1488 } else {
1489 AFPReadFunc = AFPRead;
1490 }
1491
60400163
EL
1492 if (ptv->afp_state == AFP_STATE_DOWN) {
1493 /* Wait for our turn, threads before us must have opened the socket */
1494 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1495 usleep(1000);
1992a227
EL
1496 if (suricata_ctl_flags != 0) {
1497 break;
1498 }
60400163
EL
1499 }
1500 r = AFPCreateSocket(ptv, ptv->iface, 1);
1501 if (r < 0) {
1992a227
EL
1502 switch (-r) {
1503 case AFP_FATAL_ERROR:
1504 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1505 SCReturnInt(TM_ECODE_FAILED);
1506 case AFP_RECOVERABLE_ERROR:
1507 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1508 }
60400163
EL
1509 }
1510 AFPPeersListReachedInc();
1511 }
1512 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1513 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1514 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1515 /* let's reset counter as we will start the capture at the
1516 * next function call */
1517#ifdef PACKET_STATISTICS
1518 struct tpacket_stats kstats;
1519 socklen_t len = sizeof (struct tpacket_stats);
1520 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1521 &kstats, &len) > -1) {
1522 uint64_t pkts = 0;
1523 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1524 ", dropped %" PRIu32 "",
1525 ptv->tv->name,
1526 kstats.tp_packets, kstats.tp_drops);
1527 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1528 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1529 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1530 }
1531#endif
60400163
EL
1532 }
1533
e80b30c0
EL
1534 fds.fd = ptv->socket;
1535 fds.events = POLLIN;
1536
1537 while (1) {
1538 /* Start by checking the state of our interface */
1539 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1540 int dbreak = 0;
662dccd8 1541
e80b30c0
EL
1542 do {
1543 usleep(AFP_RECONNECT_TIMEOUT);
1544 if (suricata_ctl_flags != 0) {
1545 dbreak = 1;
1546 break;
1547 }
1548 r = AFPTryReopen(ptv);
09e709d1 1549 fds.fd = ptv->socket;
e80b30c0
EL
1550 } while (r < 0);
1551 if (dbreak == 1)
1552 break;
1553 }
1554
1555 /* make sure we have at least one packet in the packet pool, to prevent
1556 * us from alloc'ing packets at line rate */
3c6e01f6 1557 PacketPoolWait();
e80b30c0
EL
1558
1559 r = poll(&fds, 1, POLL_TIMEOUT);
1560
1561 if (suricata_ctl_flags != 0) {
1562 break;
1563 }
1564
1565 if (r > 0 &&
1566 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1567 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1568 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1569 continue;
ff6365dd 1570 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1571 char c;
1572 /* Do a recv to get errno */
1573 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1574 continue; /* what, no error? */
3bea3b39 1575 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1576 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1577 ptv->iface, errno, strerror(errno));
13f13b6d 1578 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1579 continue;
ff6365dd 1580 } else if (fds.revents & POLLNVAL) {
e80b30c0 1581 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1582 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1583 continue;
1584 }
1585 } else if (r > 0) {
5f400785 1586 r = AFPReadFunc(ptv);
62e63e3f 1587 switch (r) {
27adbfa8
EL
1588 case AFP_READ_OK:
1589 /* Trigger one dump of stats every second */
49612128
EL
1590 current_time = time(NULL);
1591 if (current_time != last_dump) {
27adbfa8 1592 AFPDumpCounters(ptv);
49612128 1593 last_dump = current_time;
27adbfa8
EL
1594 }
1595 break;
62e63e3f
EL
1596 case AFP_READ_FAILURE:
1597 /* AFPRead in error: best to reset the socket */
3bea3b39 1598 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1599 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1600 ptv->iface, errno, strerror(errno));
13f13b6d 1601 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1602 continue;
9efa4ace
EL
1603 case AFP_SURI_FAILURE:
1604 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1605 break;
27b5136b 1606 case AFP_KERNEL_DROP:
e8a4a4c4 1607 AFPDumpCounters(ptv);
27b5136b 1608 break;
e80b30c0 1609 }
11099cfa 1610 } else if (unlikely(r == 0)) {
f53e687b
EL
1611 /* Trigger one dump of stats every second */
1612 current_time = time(NULL);
1613 if (current_time != last_dump) {
1614 AFPDumpCounters(ptv);
1615 last_dump = current_time;
1616 }
ce71bf1f
VJ
1617 /* poll timed out, lets see handle our timeout path */
1618 TmThreadsCaptureHandleTimeout(tv, ptv->slot, NULL);
11099cfa 1619
e80b30c0 1620 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1621 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1622 ptv->iface,
e80b30c0 1623 errno, strerror(errno));
13f13b6d 1624 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1625 continue;
1626 }
752f03e7 1627 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1628 }
1629
4e561d6b 1630 AFPDumpCounters(ptv);
752f03e7 1631 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1632 SCReturnInt(TM_ECODE_OK);
1633}
1634
13f13b6d
EL
1635static int AFPGetDevFlags(int fd, const char *ifname)
1636{
1637 struct ifreq ifr;
1638
1639 memset(&ifr, 0, sizeof(ifr));
1640 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1641
1642 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1643 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1644 ifname, strerror(errno));
1645 return -1;
1646 }
1647
1648 return ifr.ifr_flags;
1649}
1650
1651
e80b30c0 1652static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1653{
1654 struct ifreq ifr;
1655
1656 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1657 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1658
1659 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1660 if (verbose)
1661 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1662 ifname, strerror(errno));
c45d8985
EL
1663 return -1;
1664 }
1665
1666 return ifr.ifr_ifindex;
1667}
1668
e80b30c0 1669static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1670{
1671 struct ifreq ifr;
1672
1673 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1674 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1675
1676 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1677 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1678 ifname, strerror(errno));
1679 return -1;
1680 }
1681
e80b30c0
EL
1682 switch (ifr.ifr_hwaddr.sa_family) {
1683 case ARPHRD_LOOPBACK:
1684 return LINKTYPE_ETHERNET;
1685 case ARPHRD_PPP:
11eb1d7c 1686 case ARPHRD_NONE:
e80b30c0
EL
1687 return LINKTYPE_RAW;
1688 default:
1689 return ifr.ifr_hwaddr.sa_family;
1690 }
c45d8985
EL
1691}
1692
b7bf299e
EL
1693int AFPGetLinkType(const char *ifname)
1694{
1695 int ltype;
1696
1697 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1698 if (fd == -1) {
1699 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1700 return LINKTYPE_RAW;
1701 }
1702
1703 ltype = AFPGetDevLinktype(fd, ifname);
1704 close(fd);
1705
1706 return ltype;
1707}
1708
49b7b00f
EL
1709static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1710{
1711 /* Compute structure:
1712 Target is to store all pending packets
1713 with a size equal to MTU + auxdata
1714 And we keep a decent number of block
1715
1716 To do so:
1717 Compute frame_size (aligned to be able to fit in block
1718 Check which block size we need. Blocksize is a 2^n * pagesize
1719 We then need to get order, big enough to have
1720 frame_size < block size
1721 Find number of frame per block (divide)
1722 Fill in packet_req
1723
1724 Compute frame size:
1725 described in packet_mmap.txt
1726 dependant on snaplen (need to use a variable ?)
1727snaplen: MTU ?
1728tp_hdrlen determine_version in daq_afpacket
1729in V1: sizeof(struct tpacket_hdr);
1730in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1731frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1732
1733 */
1734 int tp_hdrlen = sizeof(struct tpacket_hdr);
1735 int snaplen = default_packet_size;
1736
03032457
EL
1737 if (snaplen == 0) {
1738 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1739 if (snaplen <= 0) {
1740 SCLogWarning(SC_ERR_INVALID_VALUE,
1741 "Unable to get MTU, setting snaplen to sane default of 1514");
1742 snaplen = 1514;
1743 }
1744 }
1745
69d0d484
VJ
1746 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1747 ptv->req.v2.tp_block_size = getpagesize() << order;
1748 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1749 if (frames_per_block == 0) {
bae1b03c 1750 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1751 return -1;
1752 }
69d0d484
VJ
1753 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1754 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1755 /* exact division */
69d0d484 1756 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1757 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1758 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1759 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1760 return 1;
1761}
1762
c2d0d938 1763#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1764static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1765{
69d0d484
VJ
1766 ptv->req.v3.tp_block_size = ptv->block_size;
1767 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1768 int frames_per_block = 0;
1769 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1770 int snaplen = default_packet_size;
1771
1772 if (snaplen == 0) {
1773 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1774 if (snaplen <= 0) {
1775 SCLogWarning(SC_ERR_INVALID_VALUE,
1776 "Unable to get MTU, setting snaplen to sane default of 1514");
1777 snaplen = 1514;
1778 }
1779 }
1780
69d0d484
VJ
1781 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1782 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1783
1784 if (frames_per_block == 0) {
1785 SCLogError(SC_ERR_INVALID_VALUE,
1786 "Block size is too small, it should be at least %d",
69d0d484 1787 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1788 return -1;
1789 }
69d0d484 1790 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1791 /* exact division */
69d0d484
VJ
1792 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1793 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1794 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1795 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1796 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1797 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1798 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1799 );
1800 return 1;
1801}
c2d0d938 1802#endif
bae1b03c 1803
c7bde9df
EL
1804static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1805{
1806 int val;
1807 unsigned int len = sizeof(val), i;
c7bde9df 1808 int order;
f5c20191 1809 int r, mmap_flag;
c7bde9df 1810
c2d0d938 1811#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1812 if (ptv->flags & AFP_TPACKET_V3) {
1813 val = TPACKET_V3;
f947539d 1814 } else
c2d0d938 1815#endif
f947539d 1816 {
c7bde9df
EL
1817 val = TPACKET_V2;
1818 }
1819 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1820 if (errno == ENOPROTOOPT) {
1821 if (ptv->flags & AFP_TPACKET_V3) {
1822 SCLogError(SC_ERR_AFP_CREATE,
1823 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1824 } else {
1825 SCLogError(SC_ERR_AFP_CREATE,
1826 "Too old kernel giving up (need 2.6.27 at least)");
1827 }
1828 }
1829 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1830 return AFP_FATAL_ERROR;
1831 }
1832
f947539d
VJ
1833 val = TPACKET_V2;
1834#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1835 if (ptv->flags & AFP_TPACKET_V3) {
1836 val = TPACKET_V3;
c7bde9df 1837 }
f947539d 1838#endif
c7bde9df
EL
1839 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1840 sizeof(val)) < 0) {
1841 SCLogError(SC_ERR_AFP_CREATE,
1842 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1843 strerror(errno));
1844 return AFP_FATAL_ERROR;
1845 }
1846
a40f08a2
EL
1847#ifdef HAVE_HW_TIMESTAMPING
1848 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1849 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1850 sizeof(req)) < 0) {
1851 SCLogWarning(SC_ERR_AFP_CREATE,
1852 "Can't activate hardware timestamping on packet socket: %s",
1853 strerror(errno));
1854 }
1855#endif
1856
ecf59be4
EL
1857 /* Let's reserve head room so we can add the VLAN header in IPS
1858 * or TAP mode before write the packet */
1859 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1860 /* Only one vlan is extracted from AFP header so
1861 * one VLAN header length is enough. */
1862 int reserve = VLAN_HEADER_LEN;
1863 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1864 sizeof(reserve)) < 0) {
1865 SCLogError(SC_ERR_AFP_CREATE,
1866 "Can't activate reserve on packet socket: %s",
1867 strerror(errno));
1868 return AFP_FATAL_ERROR;
1869 }
1870 }
1871
c7bde9df 1872 /* Allocate RX ring */
c2d0d938 1873#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1874 if (ptv->flags & AFP_TPACKET_V3) {
1875 if (AFPComputeRingParamsV3(ptv) != 1) {
1876 return AFP_FATAL_ERROR;
1877 }
1878 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1879 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1880 if (r < 0) {
1881 SCLogError(SC_ERR_MEM_ALLOC,
1882 "Unable to allocate RX Ring for iface %s: (%d) %s",
1883 devname,
1884 errno,
1885 strerror(errno));
1886 return AFP_FATAL_ERROR;
1887 }
1888 } else {
c2d0d938 1889#endif
fa902abe 1890 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1891 if (AFPComputeRingParams(ptv, order) != 1) {
1892 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1893 return AFP_FATAL_ERROR;
1894 }
1895
1896 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1897 (void *) &ptv->req, sizeof(ptv->req));
1898
1899 if (r < 0) {
1900 if (errno == ENOMEM) {
1901 SCLogInfo("Memory issue with ring parameters. Retrying.");
1902 continue;
1903 }
1904 SCLogError(SC_ERR_MEM_ALLOC,
1905 "Unable to allocate RX Ring for iface %s: (%d) %s",
1906 devname,
1907 errno,
1908 strerror(errno));
1909 return AFP_FATAL_ERROR;
1910 } else {
1911 break;
1912 }
1913 }
1914 if (order < 0) {
1915 SCLogError(SC_ERR_MEM_ALLOC,
1916 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1917 devname);
1918 return AFP_FATAL_ERROR;
1919 }
c2d0d938 1920#ifdef HAVE_TPACKET_V3
c7bde9df 1921 }
c2d0d938 1922#endif
c7bde9df
EL
1923
1924 /* Allocate the Ring */
c2d0d938 1925#ifdef HAVE_TPACKET_V3
c7bde9df 1926 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1927 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1928 } else {
c2d0d938 1929#endif
69d0d484 1930 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1931#ifdef HAVE_TPACKET_V3
c7bde9df 1932 }
c2d0d938 1933#endif
f5c20191
EL
1934 mmap_flag = MAP_SHARED;
1935 if (ptv->flags & AFP_MMAP_LOCKED)
1936 mmap_flag |= MAP_LOCKED;
cba41207 1937 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1938 mmap_flag, ptv->socket, 0);
cba41207 1939 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1940 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1941 strerror(errno));
c7bde9df
EL
1942 goto mmap_err;
1943 }
c2d0d938 1944#ifdef HAVE_TPACKET_V3
c7bde9df 1945 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1946 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1947 if (!ptv->ring.v3) {
1948 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1949 goto postmmap_err;
c7bde9df 1950 }
69d0d484
VJ
1951 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1952 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1953 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1954 }
1955 } else {
c2d0d938 1956#endif
c7bde9df 1957 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1958 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1959 if (ptv->ring.v2 == NULL) {
c7bde9df 1960 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1961 goto postmmap_err;
c7bde9df 1962 }
69d0d484 1963 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1964 /* fill the header ring with proper frame ptr*/
1965 ptv->frame_offset = 0;
69d0d484
VJ
1966 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1967 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1968 unsigned int j;
69d0d484
VJ
1969 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1970 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1971 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
1972 }
1973 }
1974 ptv->frame_offset = 0;
c2d0d938 1975#ifdef HAVE_TPACKET_V3
c7bde9df 1976 }
c2d0d938 1977#endif
c7bde9df
EL
1978
1979 return 0;
1980
291af719 1981postmmap_err:
cba41207 1982 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
1983 if (ptv->ring.v2)
1984 SCFree(ptv->ring.v2);
1985 if (ptv->ring.v3)
1986 SCFree(ptv->ring.v3);
c7bde9df
EL
1987mmap_err:
1988 /* Packet mmap does the cleaning when socket is closed */
1989 return AFP_FATAL_ERROR;
1990}
1991
402bdf9b
VJ
1992/** \brief test if we can use FANOUT. Older kernels like those in
1993 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1994 */
1995int AFPIsFanoutSupported(void)
1996{
1997#ifdef HAVE_PACKET_FANOUT
1998 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
1999 if (fd < 0)
2000 return 0;
402bdf9b 2001
6227d095
VJ
2002 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
2003 uint16_t id = 1;
2004 uint32_t option = (mode << 16) | (id & 0xffff);
2005 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2006 close(fd);
2007
2008 if (r < 0) {
2009 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2010 return 0;
402bdf9b 2011 }
6227d095
VJ
2012 return 1;
2013#else
402bdf9b 2014 return 0;
6227d095 2015#endif
402bdf9b
VJ
2016}
2017
91e1256b
EL
2018#ifdef HAVE_PACKET_EBPF
2019
2020static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2021{
2022 int pfd = ptv->ebpf_lb_fd;
2023 if (pfd == -1) {
2024 SCLogError(SC_ERR_INVALID_VALUE,
2025 "Fanout file descriptor is invalid");
2026 return -1;
2027 }
2028
2029 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2030 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2031 return -1;
2032 }
2033 SCLogInfo("Activated eBPF on socket");
2034
2035 return 0;
2036}
2037
2038static int SetEbpfFilter(AFPThreadVars *ptv)
2039{
2040 int pfd = ptv->ebpf_filter_fd;
2041 if (pfd == -1) {
2042 SCLogError(SC_ERR_INVALID_VALUE,
2043 "Filter file descriptor is invalid");
2044 return -1;
2045 }
2046
2047 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2048 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2049 return -1;
2050 }
2051 SCLogInfo("Activated eBPF filter on socket");
2052
2053 return 0;
2054}
2055#endif
2056
e80b30c0 2057static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2058{
2059 int r;
1992a227 2060 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2061 struct packet_mreq sock_params;
2062 struct sockaddr_ll bind_address;
662dccd8 2063 int if_idx;
49b7b00f 2064
c45d8985
EL
2065 /* open socket */
2066 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2067 if (ptv->socket == -1) {
e80b30c0 2068 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2069 goto error;
c45d8985 2070 }
cba41207 2071
662dccd8 2072 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2073
2074 if (if_idx == -1) {
fcd5e138 2075 goto socket_err;
cba41207
AG
2076 }
2077
c45d8985
EL
2078 /* bind socket */
2079 memset(&bind_address, 0, sizeof(bind_address));
2080 bind_address.sll_family = AF_PACKET;
2081 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2082 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2083 if (bind_address.sll_ifindex == -1) {
2084 if (verbose)
e80b30c0 2085 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2086 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2087 goto socket_err;
2088 }
2089
cba41207
AG
2090 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2091 if (if_flags == -1) {
2092 if (verbose) {
2093 SCLogError(SC_ERR_AFP_READ,
2094 "Couldn't get flags for interface '%s'",
2095 ptv->iface);
2096 }
2097 ret = AFP_RECOVERABLE_ERROR;
2098 goto socket_err;
2099 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2100 if (verbose) {
2101 SCLogError(SC_ERR_AFP_READ,
2102 "Interface '%s' is down",
2103 ptv->iface);
2104 }
2105 ret = AFP_RECOVERABLE_ERROR;
2106 goto socket_err;
2107 }
2108
13f13b6d
EL
2109 if (ptv->promisc != 0) {
2110 /* Force promiscuous mode */
2111 memset(&sock_params, 0, sizeof(sock_params));
2112 sock_params.mr_type = PACKET_MR_PROMISC;
2113 sock_params.mr_ifindex = bind_address.sll_ifindex;
2114 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2115 if (r < 0) {
2116 SCLogError(SC_ERR_AFP_CREATE,
2117 "Couldn't switch iface %s to promiscuous, error %s",
2118 devname, strerror(errno));
c7bde9df 2119 goto socket_err;
13f13b6d
EL
2120 }
2121 }
2122
2123 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2124 int val = 1;
2125 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2126 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2127 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2128 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2129 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2130 }
2131 }
2132
2133 /* set socket recv buffer size */
2134 if (ptv->buffer_size != 0) {
2135 /*
2136 * Set the socket buffer size to the specified value.
2137 */
b3bf7a57 2138 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2139 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2140 &ptv->buffer_size,
2141 sizeof(ptv->buffer_size)) == -1) {
2142 SCLogError(SC_ERR_AFP_CREATE,
2143 "Couldn't set buffer size to %d on iface %s, error %s",
2144 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2145 goto socket_err;
13f13b6d
EL
2146 }
2147 }
2148
2149 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2150 if (r < 0) {
2151 if (verbose) {
2152 if (errno == ENETDOWN) {
2153 SCLogError(SC_ERR_AFP_CREATE,
2154 "Couldn't bind AF_PACKET socket, iface %s is down",
2155 devname);
2156 } else {
2157 SCLogError(SC_ERR_AFP_CREATE,
2158 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2159 devname, strerror(errno));
2160 }
2161 }
1992a227 2162 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2163 goto socket_err;
13f13b6d
EL
2164 }
2165
91e1256b 2166
238ff231
EL
2167#ifdef HAVE_PACKET_FANOUT
2168 /* add binded socket to fanout group */
2169 if (ptv->threads > 1) {
238ff231
EL
2170 uint16_t mode = ptv->cluster_type;
2171 uint16_t id = ptv->cluster_id;
4111331a 2172 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2173 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2174 if (r < 0) {
2175 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2176 "Couldn't set fanout mode, error %s",
238ff231 2177 strerror(errno));
c7bde9df 2178 goto socket_err;
238ff231
EL
2179 }
2180 }
2181#endif
2182
91e1256b
EL
2183#ifdef HAVE_PACKET_EBPF
2184 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2185 r = SockFanoutSeteBPF(ptv);
2186 if (r < 0) {
2187 SCLogError(SC_ERR_AFP_CREATE,
2188 "Coudn't set EBPF, error %s",
2189 strerror(errno));
2190 goto socket_err;
2191 }
2192 }
2193#endif
2194
49b7b00f 2195 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2196 ret = AFPSetupRing(ptv, devname);
2197 if (ret != 0)
13f13b6d 2198 goto socket_err;
49b7b00f
EL
2199 }
2200
86a3f064 2201 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2202
c85ee1e3
EL
2203 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2204 switch (ptv->datalink) {
2205 case ARPHRD_PPP:
2206 case ARPHRD_ATM:
2207 ptv->cooked = 1;
619414c5 2208 break;
c85ee1e3
EL
2209 }
2210
f47df5a6 2211 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2212 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2213 ret = AFP_FATAL_ERROR;
2214 goto socket_err;
f2a6fb8a
EL
2215 }
2216
49b7b00f 2217 /* Init is ok */
13f13b6d 2218 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2219 return 0;
13f13b6d 2220
13f13b6d
EL
2221socket_err:
2222 close(ptv->socket);
2223 ptv->socket = -1;
f47df5a6 2224 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2225 if (ptv->ring.v3) {
2226 SCFree(ptv->ring.v3);
2227 ptv->ring.v3 = NULL;
f47df5a6
VJ
2228 }
2229 } else {
69d0d484
VJ
2230 if (ptv->ring.v2) {
2231 SCFree(ptv->ring.v2);
2232 ptv->ring.v2 = NULL;
f47df5a6
VJ
2233 }
2234 }
2235
13f13b6d 2236error:
1992a227 2237 return -ret;
c45d8985
EL
2238}
2239
f2a6fb8a
EL
2240TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2241{
2242 struct bpf_program filter;
2243 struct sock_fprog fcode;
2244 int rc;
2245
91e1256b
EL
2246#ifdef HAVE_PACKET_EBPF
2247 if (ptv->ebpf_filter_fd != -1) {
2248 return SetEbpfFilter(ptv);
2249 }
2250#endif
2251
f2a6fb8a
EL
2252 if (!ptv->bpf_filter)
2253 return TM_ECODE_OK;
2254
f2a6fb8a
EL
2255 SCLogInfo("Using BPF '%s' on iface '%s'",
2256 ptv->bpf_filter,
2257 ptv->iface);
28e9e4c8
EL
2258
2259 char errbuf[PCAP_ERRBUF_SIZE];
2260 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2261 ptv->datalink, /* linktype_arg */
2262 &filter, /* program */
2263 ptv->bpf_filter, /* const char *buf */
cc82ef06 2264 1, /* optimize */
28e9e4c8
EL
2265 0, /* mask */
2266 errbuf,
2267 sizeof(errbuf)) == -1) {
2268 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2269 ptv->bpf_filter,
2270 errbuf);
f2a6fb8a
EL
2271 return TM_ECODE_FAILED;
2272 }
2273
2274 fcode.len = filter.bf_len;
2275 fcode.filter = (struct sock_filter*)filter.bf_insns;
2276
2277 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2278
28e9e4c8 2279 SCBPFFree(&filter);
f2a6fb8a
EL
2280 if(rc == -1) {
2281 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2282 return TM_ECODE_FAILED;
2283 }
2284
f2a6fb8a
EL
2285 return TM_ECODE_OK;
2286}
2287
06173267
EL
2288#ifdef HAVE_PACKET_EBPF
2289/**
2290 * Insert a half flow in the kernel bypass table
2291 *
2292 * \param mapfd file descriptor of the protocol bypass table
2293 * \param key data to use as key in the table
315c29a8
EL
2294 * \param pkts_cnt packet count for the half flow
2295 * \param bytes_cnt bytes count for the half flow
2598078e 2296 * \return 0 in case of error, 1 if success
06173267 2297 */
315c29a8
EL
2298static int AFPInsertHalfFlow(int mapd, void *key, uint32_t hash,
2299 uint64_t pkts_cnt, uint64_t bytes_cnt,
2300 unsigned int nr_cpus)
06173267 2301{
651a27e4 2302 BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
17a32bda 2303 unsigned int i;
1e729f05
EL
2304
2305 if (mapd == -1) {
2306 return 0;
2307 }
2308
94a622cb 2309 /* We use a per CPU structure so we have to set an array of values as the kernel
6ab1cbcb
EL
2310 * is not duplicating the data on each CPU by itself. */
2311 for (i = 0; i < nr_cpus; i++) {
651a27e4
EL
2312 BPF_PERCPU(value, i).packets = 0;
2313 BPF_PERCPU(value, i).bytes = 0;
2314 BPF_PERCPU(value, i).hash = hash;
17a32bda 2315 }
17a32bda
EL
2316 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2317 switch (errno) {
3379311e 2318 /* no more place in the hash */
17a32bda 2319 case E2BIG:
17a32bda 2320 return 0;
3379311e
EL
2321 /* if we already have the key then bypass is a success */
2322 case EEXIST:
2323 return 1;
2324 /* Not supposed to be there so issue a error */
17a32bda
EL
2325 default:
2326 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2327 strerror(errno),
2328 errno);
2329 return 0;
06173267 2330 }
17a32bda
EL
2331 }
2332 return 1;
06173267
EL
2333}
2334#endif
2335
2598078e 2336/**
94a622cb
EL
2337 * Bypass function for AF_PACKET capture in eBPF mode
2338 *
2339 * This function creates two half flows in the map shared with the kernel
2340 * to trigger bypass.
2341 *
2342 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2343 * This table contains the list of half flows to bypass. The in-kernel filter
2344 * will skip/drop the packet if they belong to a flow in one of the flows
2345 * table.
2346 *
2347 * \param p the packet belonging to the flow to bypass
2348 * \return 0 if unable to bypass, 1 if success
2598078e 2349 */
06173267
EL
2350static int AFPBypassCallback(Packet *p)
2351{
2352#ifdef HAVE_PACKET_EBPF
2353 SCLogDebug("Calling af_packet callback function");
2354 /* Only bypass TCP and UDP */
2355 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2356 return 0;
2357 }
2358
2359 /* Bypassing tunneled packets is currently not supported
2360 * because we can't discard the inner packet only due to
2361 * primitive parsing in eBPF */
2362 if (IS_TUNNEL_PKT(p)) {
2363 return 0;
2364 }
06173267 2365 if (PKT_IS_IPV4(p)) {
d65f4585 2366 SCLogDebug("add an IPv4");
eff10fce
EL
2367 if (p->afp_v.v4_map_fd == -1) {
2368 return 0;
2369 }
06173267
EL
2370 struct flowv4_keys key = {};
2371 key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2372 key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2373 key.port16[0] = GET_TCP_SRC_PORT(p);
2374 key.port16[1] = GET_TCP_DST_PORT(p);
c1fd0da5
EL
2375 key.vlan_id[0] = p->vlan_id[0];
2376 key.vlan_id[1] = p->vlan_id[1];
8c880879 2377
06173267 2378 key.ip_proto = IPV4_GET_IPPROTO(p);
315c29a8
EL
2379 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
2380 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2381 return 0;
2382 }
2383 key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2384 key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2385 key.port16[0] = GET_TCP_DST_PORT(p);
2386 key.port16[1] = GET_TCP_SRC_PORT(p);
315c29a8
EL
2387 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
2388 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2389 return 0;
2390 }
315c29a8 2391 EBPFUpdateFlow(p->flow, p, NULL);
06173267
EL
2392 return 1;
2393 }
2394 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2395 if (PKT_IS_IPV6(p) &&
06173267 2396 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2397 int i;
eff10fce
EL
2398 if (p->afp_v.v6_map_fd == -1) {
2399 return 0;
2400 }
06173267 2401 SCLogDebug("add an IPv6");
06173267
EL
2402 struct flowv6_keys key = {};
2403 for (i = 0; i < 4; i++) {
2404 key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2405 key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2406 }
2407 key.port16[0] = GET_TCP_SRC_PORT(p);
2408 key.port16[1] = GET_TCP_DST_PORT(p);
c1fd0da5
EL
2409 key.vlan_id[0] = p->vlan_id[0];
2410 key.vlan_id[1] = p->vlan_id[1];
06173267 2411 key.ip_proto = IPV6_GET_NH(p);
b481f290 2412 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
315c29a8 2413 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2414 return 0;
2415 }
2416 for (i = 0; i < 4; i++) {
2417 key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2418 key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2419 }
2420 key.port16[0] = GET_TCP_DST_PORT(p);
2421 key.port16[1] = GET_TCP_SRC_PORT(p);
b481f290 2422 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
315c29a8 2423 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2424 return 0;
2425 }
315c29a8 2426 EBPFUpdateFlow(p->flow, p, NULL);
06173267
EL
2427 return 1;
2428 }
2429#endif
2430 return 0;
2431}
2432
94a622cb
EL
2433/**
2434 * Bypass function for AF_PACKET capture in XDP mode
2435 *
2436 * This function creates two half flows in the map shared with the kernel
2437 * to trigger bypass. This function is similar to AFPBypassCallback() but
2438 * the bytes order is changed for some data due to the way we get the data
2439 * in the XDP case.
2440 *
2441 * \param p the packet belonging to the flow to bypass
2442 * \return 0 if unable to bypass, 1 if success
2443 */
8c880879
EL
2444static int AFPXDPBypassCallback(Packet *p)
2445{
2446#ifdef HAVE_PACKET_XDP
2447 SCLogDebug("Calling af_packet callback function");
2448 /* Only bypass TCP and UDP */
2449 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2450 return 0;
2451 }
2452
2453 /* Bypassing tunneled packets is currently not supported
2454 * because we can't discard the inner packet only due to
2455 * primitive parsing in eBPF */
2456 if (IS_TUNNEL_PKT(p)) {
2457 return 0;
2458 }
8c880879 2459 if (PKT_IS_IPV4(p)) {
8c880879 2460 struct flowv4_keys key = {};
eff10fce
EL
2461 if (p->afp_v.v4_map_fd == -1) {
2462 return 0;
2463 }
315c29a8
EL
2464 key.src = p->flow->src.addr_data32[0];
2465 key.dst = p->flow->dst.addr_data32[0];
94a622cb 2466 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2467 * (as in eBPF filter) so we need to pass from host to network order */
315c29a8
EL
2468 key.port16[0] = htons(p->flow->sp);
2469 key.port16[1] = htons(p->flow->dp);
c1fd0da5
EL
2470 key.vlan_id[0] = p->vlan_id[0];
2471 key.vlan_id[1] = p->vlan_id[1];
8c880879 2472 key.ip_proto = IPV4_GET_IPPROTO(p);
315c29a8
EL
2473 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
2474 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2475 return 0;
2476 }
315c29a8
EL
2477 key.src = p->flow->dst.addr_data32[0];
2478 key.dst = p->flow->src.addr_data32[0];
2479 key.port16[0] = htons(p->flow->dp);
2480 key.port16[1] = htons(p->flow->sp);
2481 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
2482 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2483 return 0;
2484 }
2485 return 1;
2486 }
2487 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2488 if (PKT_IS_IPV6(p) &&
8c880879 2489 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2490 SCLogDebug("add an IPv6");
eff10fce
EL
2491 if (p->afp_v.v6_map_fd == -1) {
2492 return 0;
2493 }
d65f4585 2494 int i;
8c880879
EL
2495 struct flowv6_keys key = {};
2496 for (i = 0; i < 4; i++) {
2497 key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2498 key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2499 }
2500 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2501 key.port16[1] = htons(GET_TCP_DST_PORT(p));
c1fd0da5
EL
2502 key.vlan_id[0] = p->vlan_id[0];
2503 key.vlan_id[1] = p->vlan_id[1];
8c880879 2504 key.ip_proto = IPV6_GET_NH(p);
b481f290 2505 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
315c29a8 2506 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2507 return 0;
2508 }
2509 for (i = 0; i < 4; i++) {
2510 key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2511 key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2512 }
2513 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2514 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
b481f290 2515 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
315c29a8 2516 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2517 return 0;
2518 }
2519 return 1;
2520 }
2521#endif
2522 return 0;
2523}
2524
c45d8985
EL
2525/**
2526 * \brief Init function for ReceiveAFP.
2527 *
2528 * \param tv pointer to ThreadVars
2529 * \param initdata pointer to the interface passed from the user
2530 * \param data pointer gets populated with AFPThreadVars
2531 *
2532 * \todo Create a general AFP setup function.
2533 */
ab1200fb 2534TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2535{
c45d8985 2536 SCEnter();
ab1200fb 2537 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2538
c45d8985
EL
2539 if (initdata == NULL) {
2540 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2541 SCReturnInt(TM_ECODE_FAILED);
2542 }
2543
2544 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2545 if (unlikely(ptv == NULL)) {
45d5c3ca 2546 afpconfig->DerefFunc(afpconfig);
c45d8985 2547 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2548 }
c45d8985
EL
2549 memset(ptv, 0, sizeof(AFPThreadVars));
2550
2551 ptv->tv = tv;
2552 ptv->cooked = 0;
2553
fbca1a4e 2554 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2555 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2556
51eb9605
EL
2557 ptv->livedev = LiveGetDevice(ptv->iface);
2558 if (ptv->livedev == NULL) {
2559 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2560 SCFree(ptv);
51eb9605
EL
2561 SCReturnInt(TM_ECODE_FAILED);
2562 }
2563
fbca1a4e 2564 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2565 ptv->ring_size = afpconfig->ring_size;
fa902abe 2566 ptv->block_size = afpconfig->block_size;
8baf64f5 2567 ptv->block_timeout = afpconfig->block_timeout;
e80b30c0 2568
df7dbe36 2569 ptv->promisc = afpconfig->promisc;
6062e00c 2570 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2571 ptv->bpf_filter = NULL;
df7dbe36 2572
fbca1a4e 2573 ptv->threads = 1;
e80b30c0
EL
2574#ifdef HAVE_PACKET_FANOUT
2575 ptv->cluster_type = PACKET_FANOUT_LB;
2576 ptv->cluster_id = 1;
2577 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2578 if (afpconfig->threads > 1) {
9d882116
VJ
2579 ptv->cluster_id = afpconfig->cluster_id;
2580 ptv->cluster_type = afpconfig->cluster_type;
2581 ptv->threads = afpconfig->threads;
e80b30c0
EL
2582 }
2583#endif
49b7b00f 2584 ptv->flags = afpconfig->flags;
e80b30c0 2585
f2a6fb8a
EL
2586 if (afpconfig->bpf_filter) {
2587 ptv->bpf_filter = afpconfig->bpf_filter;
2588 }
91e1256b
EL
2589 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2590 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2591 ptv->xdp_mode = afpconfig->xdp_mode;
36838017 2592#ifdef HAVE_PACKET_EBPF
4cf53100 2593 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
f2a6fb8a 2594
d65f4585 2595 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2596 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585
EL
2597 if (ptv->v4_map_fd == -1) {
2598 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2599 }
126488f7 2600 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585
EL
2601 if (ptv->v6_map_fd == -1) {
2602 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2603 }
2604 }
4cf53100 2605 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
d65f4585
EL
2606#endif
2607
6efd37a3 2608#ifdef PACKET_STATISTICS
1ef786e7
VJ
2609 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2610 ptv->tv);
2611 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2612 ptv->tv);
9efa4ace
EL
2613 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2614 ptv->tv);
6efd37a3
EL
2615#endif
2616
662dccd8
EL
2617 ptv->copy_mode = afpconfig->copy_mode;
2618 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2619 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2620 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2621 /* Warn about BPF filter consequence */
2622 if (ptv->bpf_filter) {
2623 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2624 " in dropping all non matching packets.");
2625 }
662dccd8 2626 }
c85ee1e3 2627
b7e78d33 2628
0581a23f
EL
2629 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2630 SCFree(ptv);
2631 afpconfig->DerefFunc(afpconfig);
2632 SCReturnInt(TM_ECODE_FAILED);
2633 }
2634
e80b30c0
EL
2635#define T_DATA_SIZE 70000
2636 ptv->data = SCMalloc(T_DATA_SIZE);
2637 if (ptv->data == NULL) {
45d5c3ca 2638 afpconfig->DerefFunc(afpconfig);
6019ae3d 2639 SCFree(ptv);
e80b30c0 2640 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2641 }
e80b30c0
EL
2642 ptv->datalen = T_DATA_SIZE;
2643#undef T_DATA_SIZE
2644
c45d8985 2645 *data = (void *)ptv;
fbca1a4e 2646
45d5c3ca 2647 afpconfig->DerefFunc(afpconfig);
71e47868
EL
2648
2649 /* A bit strange to have this here but we only have vlan information
2650 * during reading so we need to know if we want to keep vlan during
2651 * the capture phase */
2652 int vlanbool = 0;
2653 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
9500d12c 2654 ptv->flags |= AFP_VLAN_DISABLED;
71e47868
EL
2655 }
2656
2cd6e128
EL
2657 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2658 * get the info from packet extended header but we will use a standard
2659 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2660 if (! SCKernelVersionIsAtLeast(3, 0)) {
9500d12c 2661 ptv->flags |= AFP_VLAN_DISABLED;
2cd6e128
EL
2662 }
2663
c45d8985
EL
2664 SCReturnInt(TM_ECODE_OK);
2665}
2666
2667/**
2668 * \brief This function prints stats to the screen at exit.
2669 * \param tv pointer to ThreadVars
2670 * \param data pointer that gets cast into AFPThreadVars for ptv
2671 */
8f1d7503
KS
2672void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2673{
c45d8985
EL
2674 SCEnter();
2675 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2676
2677#ifdef PACKET_STATISTICS
e8a4a4c4 2678 AFPDumpCounters(ptv);
b3bf7a57 2679 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2680 tv->name,
752f03e7
VJ
2681 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2682 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2683#endif
c45d8985
EL
2684}
2685
2686/**
2687 * \brief DeInit function closes af packet socket at exit.
2688 * \param tv pointer to ThreadVars
2689 * \param data pointer that gets cast into AFPThreadVars for ptv
2690 */
8f1d7503
KS
2691TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2692{
c45d8985
EL
2693 AFPThreadVars *ptv = (AFPThreadVars *)data;
2694
13f13b6d
EL
2695 AFPSwitchState(ptv, AFP_STATE_DOWN);
2696
8c880879 2697#ifdef HAVE_PACKET_XDP
4cf53100
EL
2698 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2699 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2700 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2701 }
8c880879 2702#endif
e80b30c0
EL
2703 if (ptv->data != NULL) {
2704 SCFree(ptv->data);
2705 ptv->data = NULL;
2706 }
2707 ptv->datalen = 0;
2708
f2a6fb8a 2709 ptv->bpf_filter = NULL;
69d0d484
VJ
2710 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2711 SCFree(ptv->ring.v3);
ce59ec5d 2712 } else {
69d0d484
VJ
2713 if (ptv->ring.v2)
2714 SCFree(ptv->ring.v2);
ce59ec5d 2715 }
f2a6fb8a 2716
7127ae2b 2717 SCFree(ptv);
c45d8985
EL
2718 SCReturnInt(TM_ECODE_OK);
2719}
2720
2721/**
2722 * \brief This function passes off to link type decoders.
2723 *
2724 * DecodeAFP reads packets from the PacketQueue and passes
2725 * them off to the proper link type decoder.
2726 *
2727 * \param t pointer to ThreadVars
2728 * \param p pointer to the current packet
2729 * \param data pointer that gets cast into AFPThreadVars for ptv
2730 * \param pq pointer to the current PacketQueue
2731 */
2732TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2733{
2734 SCEnter();
2735 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2736
f7b1aefa
VJ
2737 /* XXX HACK: flow timeout can call us for injected pseudo packets
2738 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2739 if (p->flags & PKT_PSEUDO_STREAM_END)
2740 return TM_ECODE_OK;
2741
c45d8985 2742 /* update counters */
14466a80 2743 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2744
1fb7c0dd
EL
2745 /* If suri has set vlan during reading, we increase vlan counter */
2746 if (p->vlan_idx) {
1c0b4ee0 2747 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2748 }
2749
c45d8985 2750 /* call the decoder */
49dbb455 2751 switch (p->datalink) {
c45d8985
EL
2752 case LINKTYPE_ETHERNET:
2753 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2754 break;
49dbb455
VJ
2755 case LINKTYPE_LINUX_SLL:
2756 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2757 break;
c45d8985
EL
2758 case LINKTYPE_PPP:
2759 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2760 break;
2761 case LINKTYPE_RAW:
f67aa5de 2762 case LINKTYPE_GRE_OVER_IP:
c45d8985
EL
2763 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2764 break;
49dbb455
VJ
2765 case LINKTYPE_NULL:
2766 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2767 break;
c45d8985
EL
2768 default:
2769 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2770 break;
2771 }
2772
3088b6ac 2773 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2774
c45d8985
EL
2775 SCReturnInt(TM_ECODE_OK);
2776}
2777
ab1200fb 2778TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2779{
2780 SCEnter();
2781 DecodeThreadVars *dtv = NULL;
2782
5f307aca 2783 dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2784
2785 if (dtv == NULL)
2786 SCReturnInt(TM_ECODE_FAILED);
2787
2788 DecodeRegisterPerfCounters(dtv, tv);
2789
2790 *data = (void *)dtv;
2791
2792 SCReturnInt(TM_ECODE_OK);
2793}
2794
2864f9ee
VJ
2795TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2796{
2797 if (data != NULL)
98c88d51 2798 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2799 SCReturnInt(TM_ECODE_OK);
2800}
2801
e80b30c0 2802#endif /* HAVE_AF_PACKET */
c45d8985 2803/* eof */
a6457262
EL
2804/**
2805 * @}
2806 */