]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
af-packet: add send error counter
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
8b08b034 1/* Copyright (C) 2011-2021 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985
EL
35#include "suricata-common.h"
36#include "suricata.h"
37#include "decode.h"
38#include "packet-queue.h"
39#include "threads.h"
40#include "threadvars.h"
41#include "tm-queuehandlers.h"
42#include "tm-modules.h"
43#include "tm-threads.h"
44#include "tm-threads-common.h"
45#include "conf.h"
17a32bda 46#include "util-cpu.h"
c45d8985 47#include "util-debug.h"
51eb9605 48#include "util-device.h"
d65f4585 49#include "util-ebpf.h"
c45d8985
EL
50#include "util-error.h"
51#include "util-privs.h"
e80b30c0 52#include "util-optimize.h"
51eb9605 53#include "util-checksum.h"
ac56b1bf 54#include "util-ioctl.h"
2cd6e128 55#include "util-host-info.h"
c45d8985
EL
56#include "tmqh-packetpool.h"
57#include "source-af-packet.h"
34b3f194 58#include "runmodes.h"
b07bda7a 59#include "flow-storage.h"
3f8e15f7 60#include "util-validate.h"
c45d8985 61
e80b30c0 62#ifdef HAVE_AF_PACKET
472e061c
VJ
63
64#if HAVE_SYS_IOCTL_H
2bc0be6e 65#include <sys/ioctl.h>
472e061c
VJ
66#endif
67
b37554e0
EL
68#if HAVE_LINUX_SOCKIOS_H
69#include <linux/sockios.h>
70#endif
71
06173267
EL
72#ifdef HAVE_PACKET_EBPF
73#include "util-ebpf.h"
74#include <bpf/libbpf.h>
75#include <bpf/bpf.h>
76#endif
77
91e1256b
EL
78struct bpf_program {
79 unsigned int bf_len;
80 struct bpf_insn *bf_insns;
81};
82
83#ifdef HAVE_PCAP_H
84#include <pcap.h>
85#endif
86
87#ifdef HAVE_PCAP_PCAP_H
88#include <pcap/pcap.h>
89#endif
90
28e9e4c8
EL
91#include "util-bpf.h"
92
472e061c 93#if HAVE_LINUX_IF_ETHER_H
c45d8985 94#include <linux/if_ether.h>
472e061c
VJ
95#endif
96
97#if HAVE_LINUX_IF_PACKET_H
c45d8985 98#include <linux/if_packet.h>
472e061c
VJ
99#endif
100
101#if HAVE_LINUX_IF_ARP_H
c45d8985 102#include <linux/if_arp.h>
472e061c 103#endif
f2a6fb8a 104
472e061c 105#if HAVE_LINUX_FILTER_H
f2a6fb8a 106#include <linux/filter.h>
e80b30c0 107#endif
c45d8985 108
472e061c 109#if HAVE_SYS_MMAN_H
49b7b00f 110#include <sys/mman.h>
472e061c
VJ
111#endif
112
a40f08a2
EL
113#ifdef HAVE_HW_TIMESTAMPING
114#include <linux/net_tstamp.h>
115#endif
116
472e061c 117#endif /* HAVE_AF_PACKET */
49b7b00f 118
c45d8985
EL
119extern int max_pending_packets;
120
e80b30c0
EL
121#ifndef HAVE_AF_PACKET
122
ab1200fb 123TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 124
8f1d7503
KS
125void TmModuleReceiveAFPRegister (void)
126{
e80b30c0
EL
127 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
128 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
129 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
130 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
131 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
e80b30c0 132 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 133 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
134}
135
136/**
137 * \brief Registration Function for DecodeAFP.
e80b30c0 138 */
8f1d7503
KS
139void TmModuleDecodeAFPRegister (void)
140{
e80b30c0
EL
141 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
142 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
143 tmm_modules[TMM_DECODEAFP].Func = NULL;
144 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
145 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
e80b30c0 146 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 147 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
148}
149
150/**
151 * \brief this function prints an error message and exits.
152 */
ab1200fb 153TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
154{
155 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
156 "support for AF_PACKET enabled, on Linux host please recompile "
157 "with --enable-af-packet", tv->name);
158 exit(EXIT_FAILURE);
159}
160
161#else /* We have AF_PACKET support */
162
c45d8985
EL
163#define AFP_IFACE_NAME_LENGTH 48
164
165#define AFP_STATE_DOWN 0
166#define AFP_STATE_UP 1
167
168#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 169#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
170
171#define POLL_TIMEOUT 100
172
8b08b034
VJ
173/* kernel flags defined for RX ring tp_status */
174#ifndef TP_STATUS_KERNEL
175#define TP_STATUS_KERNEL 0
176#endif
177#ifndef TP_STATUS_USER
178#define TP_STATUS_USER BIT_U32(0)
179#endif
180#ifndef TP_STATUS_COPY
181#define TP_STATUS_COPY BIT_U32(1)
182#endif
183#ifndef TP_STATUS_LOSING
184#define TP_STATUS_LOSING BIT_U32(2)
185#endif
186#ifndef TP_STATUS_CSUMNOTREADY
187#define TP_STATUS_CSUMNOTREADY BIT_U32(3)
188#endif
189#ifndef TP_STATUS_VLAN_VALID
190#define TP_STATUS_VLAN_VALID BIT_U32(4)
191#endif
192#ifndef TP_STATUS_BLK_TMO
193#define TP_STATUS_BLK_TMO BIT_U32(5)
194#endif
195#ifndef TP_STATUS_VLAN_TPID_VALID
196#define TP_STATUS_VLAN_TPID_VALID BIT_U32(6)
197#endif
198#ifndef TP_STATUS_CSUM_VALID
199#define TP_STATUS_CSUM_VALID BIT_U32(7)
200#endif
201
202#ifndef TP_STATUS_TS_SOFTWARE
203#define TP_STATUS_TS_SOFTWARE BIT_U32(29)
204#endif
205#ifndef TP_STATUS_TS_SYS_HARDWARE
206#define TP_STATUS_TS_SYS_HARDWARE BIT_U32(30) /* kernel comment says: "deprecated, never set" */
207#endif
208#ifndef TP_STATUS_TS_RAW_HARDWARE
209#define TP_STATUS_TS_RAW_HARDWARE BIT_U32(31)
210#endif
211
4a1a0080 212#ifndef TP_STATUS_USER_BUSY
ad862fff
VJ
213/* HACK special setting in the tp_status field for frames we are
214 * still working on. This can happen in autofp mode where the
215 * capture thread goes around the ring and finds a frame that still
216 * hasn't been released by a worker thread.
217 *
218 * We use bits 29, 30, 31. 29 and 31 are software and hardware
219 * timestamps. 30 should not be set by the kernel at all. Combined
220 * they should never be set on the rx-ring together.
221 *
222 * The excessive casting is for handling the fact that the kernel
223 * defines almost all of these as int flags, not unsigned ints. */
224#define TP_STATUS_USER_BUSY \
225 (uint32_t)((uint32_t)TP_STATUS_TS_SOFTWARE | (uint32_t)TP_STATUS_TS_SYS_HARDWARE | \
226 (uint32_t)TP_STATUS_TS_RAW_HARDWARE)
4a1a0080 227#endif
ad862fff
VJ
228#define FRAME_BUSY(tp_status) \
229 (((uint32_t)(tp_status) & (uint32_t)TP_STATUS_USER_BUSY) == (uint32_t)TP_STATUS_USER_BUSY)
4a1a0080 230
62e63e3f
EL
231enum {
232 AFP_READ_OK,
233 AFP_READ_FAILURE,
9efa4ace
EL
234 /** Error during treatment by other functions of Suricata */
235 AFP_SURI_FAILURE,
27b5136b 236 AFP_KERNEL_DROP,
62e63e3f
EL
237};
238
1992a227
EL
239enum {
240 AFP_FATAL_ERROR = 1,
241 AFP_RECOVERABLE_ERROR,
242};
243
49b7b00f
EL
244union thdr {
245 struct tpacket2_hdr *h2;
c2d0d938 246#ifdef HAVE_TPACKET_V3
bae1b03c 247 struct tpacket3_hdr *h3;
c2d0d938 248#endif
49b7b00f
EL
249 void *raw;
250};
251
5e05fedc 252#ifdef HAVE_PACKET_EBPF
06173267 253static int AFPBypassCallback(Packet *p);
8c880879 254static int AFPXDPBypassCallback(Packet *p);
5e05fedc 255#endif
06173267 256
91e1256b 257#define MAX_MAPS 32
c45d8985
EL
258/**
259 * \brief Structure to hold thread specific variables.
260 */
261typedef struct AFPThreadVars_
262{
69d0d484
VJ
263 union AFPRing {
264 char *v2;
265 struct iovec *v3;
266 } ring;
b797fd92 267
c45d8985 268 /* counters */
3ce39433 269 uint64_t pkts;
c45d8985 270
ff6365dd
EL
271 ThreadVars *tv;
272 TmSlot *slot;
9500d12c
EL
273 LiveDevice *livedev;
274 /* data link type for the thread */
b797fd92 275 uint32_t datalink;
9500d12c 276
d65f4585 277#ifdef HAVE_PACKET_EBPF
94a622cb 278 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 279 int v4_map_fd;
94a622cb 280 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
281 int v6_map_fd;
282#endif
283
9500d12c 284 unsigned int frame_offset;
ff6365dd 285
9500d12c
EL
286 ChecksumValidationMode checksum_mode;
287
b797fd92 288 /* references to packet and drop counters */
9500d12c
EL
289 uint16_t capture_kernel_packets;
290 uint16_t capture_kernel_drops;
9efa4ace 291 uint16_t capture_errors;
a022648b 292 uint16_t afpacket_spin;
cad0ff9e
VJ
293 uint16_t capture_afp_poll;
294 uint16_t capture_afp_poll_signal;
295 uint16_t capture_afp_poll_timeout;
296 uint16_t capture_afp_poll_data;
297 uint16_t capture_afp_poll_err;
39bf623f
VJ
298 uint16_t capture_afp_send_err;
299
300 uint64_t send_errors_logged; /**< snapshot of send errors logged. */
9500d12c
EL
301
302 /* handle state */
303 uint8_t afp_state;
304 uint8_t copy_mode;
4bfa3aea 305 unsigned int flags;
9500d12c
EL
306
307 /* IPS peer */
308 AFPPeer *mpeer;
309
9500d12c
EL
310 /*
311 * Init related members
312 */
51eb9605 313
9500d12c
EL
314 /* thread specific socket */
315 int socket;
b797fd92
EL
316
317 int ring_size;
fa902abe 318 int block_size;
234aefdf 319 int block_timeout;
e80b30c0
EL
320 /* socket buffer size */
321 int buffer_size;
fa902abe 322 /* Filter */
ab1200fb 323 const char *bpf_filter;
9500d12c 324
df7dbe36 325 int promisc;
e80b30c0 326
9500d12c 327 int down_count;
662dccd8 328
df0ed6fd 329 uint16_t cluster_id;
e80b30c0 330 int cluster_type;
c45d8985 331
fbca1a4e
EL
332 int threads;
333
69d0d484
VJ
334 union AFPTpacketReq {
335 struct tpacket_req v2;
c2d0d938 336#ifdef HAVE_TPACKET_V3
69d0d484 337 struct tpacket_req3 v3;
c2d0d938 338#endif
69d0d484 339 } req;
b797fd92
EL
340
341 char iface[AFP_IFACE_NAME_LENGTH];
342 /* IPS output iface */
343 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 344
cba41207
AG
345 /* mmap'ed ring buffer */
346 unsigned int ring_buflen;
347 uint8_t *ring_buf;
91e1256b 348
36838017 349#ifdef HAVE_PACKET_EBPF
5e05fedc
VJ
350 uint8_t xdp_mode;
351 int ebpf_lb_fd;
352 int ebpf_filter_fd;
4cf53100 353 struct ebpf_timeout_config ebpf_t_config;
36838017 354#endif
315c29a8 355
c45d8985
EL
356} AFPThreadVars;
357
15e3bdb7
VJ
358static TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
359static void ReceiveAFPThreadExitStats(ThreadVars *, void *);
360static TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
361static TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 362
15e3bdb7
VJ
363static TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
364static TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
f8aed4ce 365static TmEcode DecodeAFP(ThreadVars *, Packet *, void *);
c45d8985 366
15e3bdb7 367static TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 368static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
369static int AFPGetDevFlags(int fd, const char *ifname);
370static int AFPDerefSocket(AFPPeer* peer);
371static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 372
19475165 373
c45d8985
EL
374/**
375 * \brief Registration Function for RecieveAFP.
376 * \todo Unit tests are needed for this module.
377 */
8f1d7503
KS
378void TmModuleReceiveAFPRegister (void)
379{
c45d8985
EL
380 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
381 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 382 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 383 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 384 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 385 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 386 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985 387 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 388 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165 389
c45d8985
EL
390}
391
a6457262
EL
392/**
393 * \defgroup afppeers AFP peers list
394 *
395 * AF_PACKET has an IPS mode were interface are peered: packet from
396 * on interface are sent the peered interface and the other way. The ::AFPPeer
397 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
398 * information to be able to send packet on the interface.
399 * A element of the list must not be destroyed during the run of Suricata as it
400 * is used by ::Packet and other threads.
401 *
402 * @{
403 */
404
662dccd8
EL
405typedef struct AFPPeersList_ {
406 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
407 int cnt;
408 int peered;
60400163
EL
409 int turn; /**< Next value for initialisation order */
410 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
411} AFPPeersList;
412
413/**
a6457262
EL
414 * \brief Update the peer.
415 *
416 * Update the AFPPeer of a thread ie set new state, socket number
417 * or iface index.
418 *
662dccd8 419 */
ab1200fb 420static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
421{
422 if (ptv->mpeer == NULL) {
423 return;
424 }
662dccd8
EL
425 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
426 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
427 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
428}
429
a6457262
EL
430/**
431 * \brief Clean and free ressource used by an ::AFPPeer
432 */
ab1200fb 433static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
434{
435 if (peer->flags & AFP_SOCK_PROTECT)
436 SCMutexDestroy(&peer->sock_protect);
662dccd8
EL
437 SCFree(peer);
438}
439
440AFPPeersList peerslist;
441
442
a6457262
EL
443/**
444 * \brief Init the global list of ::AFPPeer
445 */
662dccd8
EL
446TmEcode AFPPeersListInit()
447{
448 SCEnter();
449 TAILQ_INIT(&peerslist.peers);
450 peerslist.peered = 0;
451 peerslist.cnt = 0;
60400163
EL
452 peerslist.turn = 0;
453 SC_ATOMIC_INIT(peerslist.reached);
454 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
455 SCReturnInt(TM_ECODE_OK);
456}
457
a6457262
EL
458/**
459 * \brief Check that all ::AFPPeer got a peer
460 *
461 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
462 */
662dccd8
EL
463TmEcode AFPPeersListCheck()
464{
465#define AFP_PEERS_MAX_TRY 4
466#define AFP_PEERS_WAIT 20000
467 int try = 0;
468 SCEnter();
469 while (try < AFP_PEERS_MAX_TRY) {
470 if (peerslist.cnt != peerslist.peered) {
471 usleep(AFP_PEERS_WAIT);
472 } else {
473 SCReturnInt(TM_ECODE_OK);
474 }
475 try++;
476 }
477 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
478 SCReturnInt(TM_ECODE_FAILED);
479}
480
a6457262
EL
481/**
482 * \brief Declare a new AFP thread to AFP peers list.
483 */
ab1200fb 484static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
485{
486 SCEnter();
487 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
488 AFPPeer *pitem;
ac56b1bf 489 int mtu, out_mtu;
662dccd8 490
e176be6f 491 if (unlikely(peer == NULL)) {
662dccd8
EL
492 SCReturnInt(TM_ECODE_FAILED);
493 }
494 memset(peer, 0, sizeof(AFPPeer));
495 SC_ATOMIC_INIT(peer->socket);
13f13b6d 496 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
497 SC_ATOMIC_INIT(peer->if_idx);
498 SC_ATOMIC_INIT(peer->state);
499 peer->flags = ptv->flags;
60400163 500 peer->turn = peerslist.turn++;
662dccd8
EL
501
502 if (peer->flags & AFP_SOCK_PROTECT) {
503 SCMutexInit(&peer->sock_protect, NULL);
504 }
505
13f13b6d 506 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
507 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
508 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
509 ptv->mpeer = peer;
510 /* add element to iface list */
511 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 512
13f13b6d
EL
513 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
514 peerslist.cnt++;
515
516 /* Iter to find a peer */
517 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
518 if (pitem->peer)
519 continue;
520 if (strcmp(pitem->iface, ptv->out_iface))
521 continue;
522 peer->peer = pitem;
523 pitem->peer = peer;
524 mtu = GetIfaceMTU(ptv->iface);
525 out_mtu = GetIfaceMTU(ptv->out_iface);
526 if (mtu != out_mtu) {
527 SCLogError(SC_ERR_AFP_CREATE,
528 "MTU on %s (%d) and %s (%d) are not equal, "
529 "transmission of packets bigger than %d will fail.",
530 ptv->iface, mtu,
531 ptv->out_iface, out_mtu,
532 (out_mtu > mtu) ? mtu : out_mtu);
533 }
534 peerslist.peered += 2;
535 break;
ac56b1bf 536 }
662dccd8
EL
537 }
538
539 AFPPeerUpdate(ptv);
540
541 SCReturnInt(TM_ECODE_OK);
542}
543
ab1200fb 544static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 545{
b2691cbe
EL
546 /* If turn is zero, we already have started threads once */
547 if (peerslist.turn == 0)
548 return 0;
549
60400163
EL
550 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
551 return 0;
552 return 1;
553}
554
ab1200fb 555static void AFPPeersListReachedInc(void)
60400163 556{
b2691cbe
EL
557 if (peerslist.turn == 0)
558 return;
559
531ff3dd 560 if ((SC_ATOMIC_ADD(peerslist.reached, 1) + 1) == peerslist.turn) {
b2691cbe
EL
561 SCLogInfo("All AFP capture threads are running.");
562 (void)SC_ATOMIC_SET(peerslist.reached, 0);
563 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
564 * restarted.
565 */
566 peerslist.turn = 0;
567 }
60400163
EL
568}
569
ab1200fb 570static int AFPPeersListStarted(void)
919377d4
EL
571{
572 return !peerslist.turn;
573}
574
a6457262
EL
575/**
576 * \brief Clean the global peers list.
577 */
662dccd8
EL
578void AFPPeersListClean()
579{
580 AFPPeer *pitem;
581
582 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
583 TAILQ_REMOVE(&peerslist.peers, pitem, next);
584 AFPPeerClean(pitem);
585 }
586}
587
a6457262
EL
588/**
589 * @}
590 */
591
c45d8985
EL
592/**
593 * \brief Registration Function for DecodeAFP.
594 * \todo Unit tests are needed for this module.
595 */
8f1d7503
KS
596void TmModuleDecodeAFPRegister (void)
597{
c45d8985
EL
598 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
599 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
600 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
601 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 602 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985 603 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 604 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
605}
606
662dccd8 607
e80b30c0
EL
608static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
609
e8a4a4c4 610static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 611{
6efd37a3 612#ifdef PACKET_STATISTICS
e8a4a4c4
EL
613 struct tpacket_stats kstats;
614 socklen_t len = sizeof (struct tpacket_stats);
615 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
616 &kstats, &len) > -1) {
617 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
618 ptv->tv->name,
619 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
620 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
621 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
622 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
623 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
39bf623f
VJ
624
625 const uint64_t value = SC_ATOMIC_GET(ptv->mpeer->send_errors);
626 if (value > ptv->send_errors_logged) {
627 StatsAddUI64(ptv->tv, ptv->capture_afp_send_err, value - ptv->send_errors_logged);
628 ptv->send_errors_logged = value;
629 }
6efd37a3 630 }
e8a4a4c4 631#endif
6efd37a3 632}
c45d8985 633
ecf59be4
EL
634/**
635 * \brief AF packet write function.
636 *
637 * This function has to be called before the memory
638 * related to Packet in ring buffer is released.
639 *
640 * \param pointer to Packet
641 * \param version of capture: TPACKET_V2 or TPACKET_V3
642 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
643 *
644 */
dab03672 645static void AFPWritePacket(Packet *p, int version)
662dccd8
EL
646{
647 struct sockaddr_ll socket_address;
648 int socket;
649
650 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
4c7eb644 651 if (PacketTestAction(p, ACTION_DROP)) {
dab03672 652 return;
662dccd8
EL
653 }
654 }
655
662dccd8
EL
656 if (p->ethh == NULL) {
657 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
dab03672 658 return;
662dccd8 659 }
dab03672 660
662dccd8
EL
661 /* Index of the network device */
662 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
663 /* Address length*/
664 socket_address.sll_halen = ETH_ALEN;
665 /* Destination MAC */
666 memcpy(socket_address.sll_addr, p->ethh, 6);
667
668 /* Send packet, locking the socket if necessary */
669 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
670 SCMutexLock(&p->afp_v.peer->sock_protect);
671 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 672
2fab3ff0
VJ
673 if (sendto(socket, GET_PKT_DATA(p), GET_PKT_LEN(p), 0, (struct sockaddr *)&socket_address,
674 sizeof(struct sockaddr_ll)) < 0) {
c7ad3f8d
VJ
675 if (SC_ATOMIC_ADD(p->afp_v.peer->send_errors, 1) == 0) {
676 SCLogWarning(SC_ERR_SOCKET, "sending packet failed on socket %d: %s", socket,
677 strerror(errno));
678 }
662dccd8
EL
679 }
680 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
681 SCMutexUnlock(&p->afp_v.peer->sock_protect);
662dccd8
EL
682}
683
ab1200fb 684static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 685{
3f8e15f7
VJ
686 DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
687
662dccd8
EL
688 /* Need to be in copy mode and need to detect early release
689 where Ethernet header could not be set (and pseudo packet) */
3f8e15f7 690 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
ecf59be4 691 AFPWritePacket(p, TPACKET_V2);
662dccd8 692 }
13f13b6d 693
3f79f452
VJ
694 BUG_ON(p->afp_v.relptr == NULL);
695
696 union thdr h;
697 h.raw = p->afp_v.relptr;
698 h.h2->tp_status = TP_STATUS_KERNEL;
680e941a 699
12252ba7
VJ
700 (void)AFPDerefSocket(p->afp_v.mpeer);
701
680e941a 702 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
703}
704
ecf59be4 705#ifdef HAVE_TPACKET_V3
ab1200fb 706static void AFPReleasePacketV3(Packet *p)
bae1b03c 707{
3f8e15f7
VJ
708 DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
709
bae1b03c
EL
710 /* Need to be in copy mode and need to detect early release
711 where Ethernet header could not be set (and pseudo packet) */
3f8e15f7 712 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
ecf59be4 713 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
714 }
715 PacketFreeOrRelease(p);
716}
ecf59be4 717#endif
bae1b03c 718
ab1200fb 719static void AFPReleasePacket(Packet *p)
b076a26c
KS
720{
721 AFPReleaseDataFromRing(p);
722 PacketFreeOrRelease(p);
2011a3f8
EL
723}
724
a022648b
VJ
725/** \internal
726 * \brief recoverable error - release packet and
727 * return AFP_SURI_FAILURE
728 */
729static inline int AFPSuriFailure(AFPThreadVars *ptv, union thdr h)
730{
731 h.h2->tp_status = TP_STATUS_KERNEL;
732 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
733 ptv->frame_offset = 0;
734 }
735 SCReturnInt(AFP_SURI_FAILURE);
736}
737
738static inline void AFPReadApplyBypass(const AFPThreadVars *ptv, Packet *p)
739{
5e05fedc 740#ifdef HAVE_PACKET_EBPF
a022648b
VJ
741 if (ptv->flags & AFP_BYPASS) {
742 p->BypassPacketsFlow = AFPBypassCallback;
a022648b
VJ
743 p->afp_v.v4_map_fd = ptv->v4_map_fd;
744 p->afp_v.v6_map_fd = ptv->v6_map_fd;
745 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
a022648b
VJ
746 }
747 if (ptv->flags & AFP_XDPBYPASS) {
748 p->BypassPacketsFlow = AFPXDPBypassCallback;
a022648b
VJ
749 p->afp_v.v4_map_fd = ptv->v4_map_fd;
750 p->afp_v.v6_map_fd = ptv->v6_map_fd;
751 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
a022648b 752 }
5e05fedc 753#endif
a022648b
VJ
754}
755
756/** \internal
757 * \brief setup packet for AFPReadFromRing
758 */
ace349d4 759static void AFPReadFromRingSetupPacket(
a022648b
VJ
760 AFPThreadVars *ptv, union thdr h, const unsigned int tp_status, Packet *p)
761{
762 PKT_SET_SRC(p, PKT_SRC_WIRE);
763
ad862fff
VJ
764 /* flag the packet as TP_STATUS_USER_BUSY, which is ignore by the kernel, but
765 * acts as an indicator that we've reached a frame that is not yet released by
766 * us in autofp mode. It will be cleared when the frame gets released to the kernel. */
a022648b
VJ
767 h.h2->tp_status |= TP_STATUS_USER_BUSY;
768 p->livedev = ptv->livedev;
769 p->datalink = ptv->datalink;
770 ptv->pkts++;
771
772 AFPReadApplyBypass(ptv, p);
773
774 if (h.h2->tp_len > h.h2->tp_snaplen) {
775 SCLogDebug("Packet length (%d) > snaplen (%d), truncating", h.h2->tp_len, h.h2->tp_snaplen);
776 }
777
778 /* get vlan id from header */
779 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
780 (tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
781 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
782 p->vlan_idx = 1;
2fab3ff0 783 p->afp_v.vlan_tci = h.h2->tp_vlan_tci;
a022648b
VJ
784 }
785
2cbfcce0 786 (void)PacketSetData(p, (unsigned char *)h.raw + h.h2->tp_mac, h.h2->tp_snaplen);
a022648b 787
558930a1 788 p->ReleasePacket = AFPReleasePacket;
e9c6ad19 789 p->afp_v.relptr = h.raw;
8a5b945c
VJ
790 if (ptv->flags & AFP_NEED_PEER) {
791 p->afp_v.mpeer = ptv->mpeer;
792 AFPRefSocket(ptv->mpeer);
793 } else {
794 p->afp_v.mpeer = NULL;
795 }
558930a1 796 p->afp_v.copy_mode = ptv->copy_mode;
e9c6ad19 797 p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
558930a1 798
a022648b
VJ
799 /* Timestamp */
800 p->ts.tv_sec = h.h2->tp_sec;
801 p->ts.tv_usec = h.h2->tp_nsec / 1000;
802 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)", GET_PKT_LEN(p), p, GET_PKT_DATA(p));
803
804 /* We only check for checksum disable */
805 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
806 p->flags |= PKT_IGNORE_CHECKSUM;
807 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
808 if (ChecksumAutoModeCheck(ptv->pkts, SC_ATOMIC_GET(ptv->livedev->pkts),
809 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
810 ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
811 p->flags |= PKT_IGNORE_CHECKSUM;
812 }
813 } else {
814 if (tp_status & TP_STATUS_CSUMNOTREADY) {
815 p->flags |= PKT_IGNORE_CHECKSUM;
816 }
817 }
a022648b
VJ
818}
819
820static inline int AFPReadFromRingWaitForPacket(AFPThreadVars *ptv)
821{
822 union thdr h;
823 struct timeval start_time;
824 gettimeofday(&start_time, NULL);
825 uint64_t busy_loop_iter = 0;
826
827 /* busy wait loop until we have packets available */
828 while (1) {
829 if (unlikely(suricata_ctl_flags != 0)) {
830 break;
831 }
832 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
833 if (unlikely(h.raw == NULL)) {
834 return AFP_READ_FAILURE;
835 }
836 const unsigned int tp_status = h.h2->tp_status;
837 if (tp_status == TP_STATUS_KERNEL) {
838 busy_loop_iter++;
839
840 struct timeval cur_time;
841 memset(&cur_time, 0, sizeof(cur_time));
842 uint64_t milliseconds =
843 ((cur_time.tv_sec - start_time.tv_sec) * 1000) +
844 (((1000000 + cur_time.tv_usec - start_time.tv_usec) / 1000) - 1000);
845 if (milliseconds > 1000) {
846 break;
847 }
848 continue;
849 }
850 break;
851 }
852 if (busy_loop_iter) {
853 StatsAddUI64(ptv->tv, ptv->afpacket_spin, busy_loop_iter);
854 }
855 return AFP_READ_OK;
856}
857
49b7b00f
EL
858/**
859 * \brief AF packet read function for ring
860 *
861 * This function fills
862 * From here the packets are picked up by the DecodeAFP thread.
863 *
864 * \param user pointer to AFPThreadVars
865 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
866 */
ab1200fb 867static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f 868{
49b7b00f 869 union thdr h;
a022648b
VJ
870 bool emergency_flush = false;
871 const unsigned int start_pos = ptv->frame_offset;
4d8f70c6 872
a022648b
VJ
873 /* poll() told us there are frames, so lets wait for at least
874 * one frame to become available. */
875 if (AFPReadFromRingWaitForPacket(ptv) != AFP_READ_OK)
876 return AFP_READ_FAILURE;
49b7b00f 877
a022648b 878 /* process the frames in the ring */
a369f8c3 879 while (1) {
53c02334
AS
880 if (unlikely(suricata_ctl_flags != 0)) {
881 break;
882 }
69d0d484 883 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace 884 if (unlikely(h.raw == NULL)) {
a022648b 885 return AFP_READ_FAILURE;
34b3f194 886 }
a022648b
VJ
887 const unsigned int tp_status = h.h2->tp_status;
888 /* if we find a kernel frame we are done */
889 if (unlikely(tp_status == TP_STATUS_KERNEL)) {
890 break;
27b5136b 891 }
a022648b 892 /* if in autofp mode the frame is still busy, return to poll */
ad862fff 893 if (unlikely(FRAME_BUSY(tp_status))) {
a022648b 894 break;
4a1a0080 895 }
a022648b 896 emergency_flush |= ((tp_status & TP_STATUS_LOSING) != 0);
4a1a0080 897
a022648b 898 if ((ptv->flags & AFP_EMERGENCY_MODE) && emergency_flush) {
27b5136b
EL
899 h.h2->tp_status = TP_STATUS_KERNEL;
900 goto next_frame;
a369f8c3
EL
901 }
902
a022648b 903 Packet *p = PacketGetFromQueueOrAlloc();
a369f8c3 904 if (p == NULL) {
a022648b 905 return AFPSuriFailure(ptv, h);
8c880879 906 }
ace349d4 907 AFPReadFromRingSetupPacket(ptv, h, tp_status, p);
5f12b234 908
a369f8c3 909 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
a022648b 910 return AFPSuriFailure(ptv, h);
49b7b00f 911 }
27b5136b 912next_frame:
69d0d484 913 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 914 ptv->frame_offset = 0;
350d7619 915 /* Get out of loop to be sure we will reach maintenance tasks */
a022648b
VJ
916 if (ptv->frame_offset == start_pos)
917 break;
34b3f194 918 }
34b3f194 919 }
a022648b
VJ
920 if (emergency_flush) {
921 AFPDumpCounters(ptv);
922 }
49b7b00f
EL
923 SCReturnInt(AFP_READ_OK);
924}
925
f947539d 926#ifdef HAVE_TPACKET_V3
bae1b03c
EL
927static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
928{
929 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
930}
931
932static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
933{
934 Packet *p = PacketGetFromQueueOrAlloc();
935 if (p == NULL) {
9efa4ace 936 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
937 }
938 PKT_SET_SRC(p, PKT_SRC_WIRE);
a022648b
VJ
939
940 AFPReadApplyBypass(ptv, p);
bae1b03c
EL
941
942 ptv->pkts++;
bae1b03c
EL
943 p->livedev = ptv->livedev;
944 p->datalink = ptv->datalink;
945
bcc03f17 946 if ((ptv->flags & AFP_VLAN_IN_HEADER) &&
e41a9d63
AG
947 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
948 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
949 p->vlan_idx = 1;
2fab3ff0 950 p->afp_v.vlan_tci = ppd->hv1.tp_vlan_tci;
e41a9d63
AG
951 }
952
2cbfcce0
VJ
953 (void)PacketSetData(p, (unsigned char *)ppd + ppd->tp_mac, ppd->tp_snaplen);
954
558930a1 955 p->ReleasePacket = AFPReleasePacketV3;
e9c6ad19
VJ
956 p->afp_v.relptr = NULL;
957 p->afp_v.mpeer = NULL;
558930a1 958 p->afp_v.copy_mode = ptv->copy_mode;
e9c6ad19 959 p->afp_v.peer = (p->afp_v.copy_mode == AFP_COPY_MODE_NONE) ? NULL : ptv->mpeer->peer;
558930a1 960
bae1b03c
EL
961 /* Timestamp */
962 p->ts.tv_sec = ppd->tp_sec;
963 p->ts.tv_usec = ppd->tp_nsec/1000;
964 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
965 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
966
967 /* We only check for checksum disable */
968 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
969 p->flags |= PKT_IGNORE_CHECKSUM;
970 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
39577507 971 if (ChecksumAutoModeCheck(ptv->pkts,
bae1b03c
EL
972 SC_ATOMIC_GET(ptv->livedev->pkts),
973 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
39577507 974 ptv->checksum_mode = CHECKSUM_VALIDATION_DISABLE;
bae1b03c
EL
975 p->flags |= PKT_IGNORE_CHECKSUM;
976 }
977 } else {
978 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
979 p->flags |= PKT_IGNORE_CHECKSUM;
980 }
981 }
982
983 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
9efa4ace 984 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
985 }
986
987 SCReturnInt(AFP_READ_OK);
988}
989
990static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
991{
e63db9d1
VJ
992 const int num_pkts = pbd->hdr.bh1.num_pkts;
993 uint8_t *ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
994
995 for (int i = 0; i < num_pkts; ++i) {
996 int ret = AFPParsePacketV3(ptv, pbd, (struct tpacket3_hdr *)ppd);
9efa4ace
EL
997 switch (ret) {
998 case AFP_READ_OK:
999 break;
1000 case AFP_SURI_FAILURE:
1001 /* Internal error but let's just continue and
1002 * treat thenext packet */
1003 break;
1004 case AFP_READ_FAILURE:
1005 SCReturnInt(AFP_READ_FAILURE);
1006 default:
1007 SCReturnInt(ret);
5f84b55d 1008 }
bae1b03c
EL
1009 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1010 }
1011
1012 SCReturnInt(AFP_READ_OK);
1013}
f947539d 1014#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1015
1016/**
1017 * \brief AF packet read function for ring
1018 *
1019 * This function fills
1020 * From here the packets are picked up by the DecodeAFP thread.
1021 *
1022 * \param user pointer to AFPThreadVars
1023 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1024 */
ab1200fb 1025static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1026{
c2d0d938 1027#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1028 /* Loop till we have packets available */
1029 while (1) {
1030 if (unlikely(suricata_ctl_flags != 0)) {
1031 SCLogInfo("Exiting AFP V3 read loop");
1032 break;
1033 }
1034
e63db9d1
VJ
1035 struct tpacket_block_desc *pbd =
1036 (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1037
1038 /* block is not ready to be read */
1039 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1040 SCReturnInt(AFP_READ_OK);
1041 }
1042
e63db9d1 1043 int ret = AFPWalkBlock(ptv, pbd);
9efa4ace 1044 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1045 AFPFlushBlock(pbd);
9efa4ace 1046 SCReturnInt(ret);
bae1b03c
EL
1047 }
1048
1049 AFPFlushBlock(pbd);
69d0d484 1050 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1051 /* return to maintenance task after one loop on the ring */
1052 if (ptv->frame_offset == 0) {
1053 SCReturnInt(AFP_READ_OK);
1054 }
1055 }
c2d0d938 1056#endif
bae1b03c
EL
1057 SCReturnInt(AFP_READ_OK);
1058}
1059
13f13b6d
EL
1060/**
1061 * \brief Reference socket
1062 *
1063 * \retval O in case of failure, 1 in case of success
1064 */
1065static int AFPRefSocket(AFPPeer* peer)
1066{
1067 if (unlikely(peer == NULL))
1068 return 0;
1069
1070 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1071 return 1;
1072}
1073
1074
1075/**
1076 * \brief Dereference socket
1077 *
1078 * \retval 1 if socket is still alive, 0 if not
1079 */
1080static int AFPDerefSocket(AFPPeer* peer)
1081{
4424f5a2
EL
1082 if (peer == NULL)
1083 return 1;
1084
531ff3dd 1085 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 1) {
12252ba7 1086 return 0;
13f13b6d
EL
1087 }
1088 return 1;
1089}
1090
12252ba7 1091static void AFPCloseSocket(AFPThreadVars *ptv)
13f13b6d 1092{
12252ba7
VJ
1093 if (ptv->mpeer != NULL)
1094 BUG_ON(SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0);
49b7b00f 1095
12252ba7 1096 if (ptv->flags & AFP_TPACKET_V3) {
5f84b55d 1097#ifdef HAVE_TPACKET_V3
12252ba7
VJ
1098 if (ptv->ring.v3) {
1099 SCFree(ptv->ring.v3);
1100 ptv->ring.v3 = NULL;
13f13b6d 1101 }
5f84b55d 1102#endif
12252ba7
VJ
1103 } else {
1104 if (ptv->ring.v2) {
1105 /* only used in reading phase, we can free it */
1106 SCFree(ptv->ring.v2);
1107 ptv->ring.v2 = NULL;
13f13b6d
EL
1108 }
1109 }
12252ba7
VJ
1110 if (ptv->socket != -1) {
1111 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
1112 munmap(ptv->ring_buf, ptv->ring_buflen);
1113 close(ptv->socket);
1114 ptv->socket = -1;
1115 }
1116}
1117
1118static void AFPSwitchState(AFPThreadVars *ptv, int state)
1119{
1120 ptv->afp_state = state;
1121 ptv->down_count = 0;
1122
1123 if (state == AFP_STATE_DOWN) {
1124 /* cleanup is done on thread cleanup or try reopen
1125 * as there may still be packets in autofp that
1126 * are referencing us */
1127 (void)SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1);
1128 }
13f13b6d 1129 if (state == AFP_STATE_UP) {
12252ba7
VJ
1130 AFPPeerUpdate(ptv);
1131 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
13f13b6d
EL
1132 }
1133}
49b7b00f 1134
7fea0ec6
EL
1135static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1136 uint64_t *discarded_pkts)
919377d4 1137{
919377d4
EL
1138 if (unlikely(suricata_ctl_flags != 0)) {
1139 return 1;
1140 }
1141
f947539d 1142#ifdef HAVE_TPACKET_V3
bae1b03c 1143 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1144 int ret = 0;
e63db9d1
VJ
1145 struct tpacket_block_desc *pbd =
1146 (struct tpacket_block_desc *)ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1147 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1148 struct tpacket3_hdr *ppd =
1149 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1150 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1151 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1152 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1153 ret = 1;
1154 }
7fea0ec6 1155 AFPFlushBlock(pbd);
69d0d484 1156 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1157 return ret;
f947539d
VJ
1158
1159 } else
1160#endif
1161 {
7fea0ec6 1162 /* Read packet from ring */
e63db9d1 1163 union thdr h;
69d0d484 1164 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1165 if (h.raw == NULL) {
1166 return -1;
1167 }
a022648b
VJ
1168 if (h.h2->tp_status == TP_STATUS_KERNEL)
1169 return 0;
1170
7fea0ec6
EL
1171 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1172 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1173 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1174 return 1;
1175 }
919377d4 1176
a022648b 1177 (*discarded_pkts)++;
7fea0ec6 1178 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1179 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1180 ptv->frame_offset = 0;
1181 }
919377d4
EL
1182 }
1183
919377d4
EL
1184 return 0;
1185}
1186
806844d8
VJ
1187/** \brief wait for all afpacket threads to fully init
1188 *
1189 * Discard packets before all threads are ready, as the cluster
1190 * setup is not complete yet.
1191 *
1192 * if AFPPeersListStarted() returns true init is complete
1193 *
1194 * \retval r 1 = happy, otherwise unhappy
1195 */
7fea0ec6 1196static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1197{
919377d4 1198 struct timeval synctv;
806844d8
VJ
1199 struct pollfd fds;
1200
1201 fds.fd = ptv->socket;
1202 fds.events = POLLIN;
919377d4
EL
1203
1204 /* Set timeval to end of the world */
1205 synctv.tv_sec = 0xffffffff;
1206 synctv.tv_usec = 0xffffffff;
1207
1208 while (1) {
8709a20d 1209 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1210 if (r > 0 &&
1211 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1212 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1213 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1214 return 0;
1215 } else if (r > 0) {
1216 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1217 gettimeofday(&synctv, NULL);
1218 }
b9189946 1219 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8
VJ
1220 SCLogDebug("Discarding on %s", ptv->tv->name);
1221 switch (r) {
1222 case 1:
9f7ba071 1223 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1224 return 1;
1225 case -1:
1226 return r;
1227 }
1228 /* no packets */
1229 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1230 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1231 return 1;
43b6cbd4 1232 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1233 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1234 return 0;
919377d4
EL
1235 }
1236 }
1237 return 1;
1238}
1239
13f13b6d
EL
1240/**
1241 * \brief Try to reopen socket
1242 *
1243 * \retval 0 in case of success, negative if error occurs or a condition
1244 * is not met.
1245 */
c45d8985
EL
1246static int AFPTryReopen(AFPThreadVars *ptv)
1247{
13f13b6d
EL
1248 ptv->down_count++;
1249
13f13b6d
EL
1250 /* Don't reconnect till we have packet that did not release data */
1251 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1252 return -1;
1253 }
c45d8985 1254
12252ba7
VJ
1255 /* ref cnt 0, we can close the old socket */
1256 AFPCloseSocket(ptv);
1257
8709a20d 1258 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1259 if (afp_activate_r != 0) {
13f13b6d
EL
1260 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1261 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1262 ptv->iface);
1263 }
c45d8985
EL
1264 return afp_activate_r;
1265 }
1266
3bea3b39 1267 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1268 return 0;
1269}
1270
e80b30c0
EL
1271/**
1272 * \brief Main AF_PACKET reading Loop function
1273 */
1274TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1275{
34581ce9
AS
1276 SCEnter();
1277
e80b30c0 1278 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1279 struct pollfd fds;
1280 int r;
34581ce9 1281 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1282 time_t last_dump = 0;
49612128 1283 time_t current_time;
5f400785 1284 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1285 uint64_t discarded_pkts = 0;
e80b30c0 1286
34581ce9 1287 ptv->slot = s->slot_next;
e80b30c0 1288
b9189946
VJ
1289 if (ptv->flags & AFP_TPACKET_V3) {
1290 AFPReadFunc = AFPReadFromRingV3;
5f400785 1291 } else {
b9189946 1292 AFPReadFunc = AFPReadFromRing;
5f400785
EL
1293 }
1294
60400163
EL
1295 if (ptv->afp_state == AFP_STATE_DOWN) {
1296 /* Wait for our turn, threads before us must have opened the socket */
1297 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1298 usleep(1000);
1992a227
EL
1299 if (suricata_ctl_flags != 0) {
1300 break;
1301 }
60400163
EL
1302 }
1303 r = AFPCreateSocket(ptv, ptv->iface, 1);
1304 if (r < 0) {
1992a227
EL
1305 switch (-r) {
1306 case AFP_FATAL_ERROR:
1307 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1308 SCReturnInt(TM_ECODE_FAILED);
1309 case AFP_RECOVERABLE_ERROR:
1310 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1311 }
60400163
EL
1312 }
1313 AFPPeersListReachedInc();
1314 }
1315 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1316 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1317 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1318 /* let's reset counter as we will start the capture at the
1319 * next function call */
1320#ifdef PACKET_STATISTICS
1321 struct tpacket_stats kstats;
1322 socklen_t len = sizeof (struct tpacket_stats);
1323 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1324 &kstats, &len) > -1) {
1325 uint64_t pkts = 0;
1326 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1327 ", dropped %" PRIu32 "",
1328 ptv->tv->name,
1329 kstats.tp_packets, kstats.tp_drops);
1330 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1331 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1332 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1333 }
1334#endif
60400163
EL
1335 }
1336
e80b30c0
EL
1337 fds.fd = ptv->socket;
1338 fds.events = POLLIN;
1339
1340 while (1) {
1341 /* Start by checking the state of our interface */
1342 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1343 int dbreak = 0;
662dccd8 1344
e80b30c0
EL
1345 do {
1346 usleep(AFP_RECONNECT_TIMEOUT);
1347 if (suricata_ctl_flags != 0) {
1348 dbreak = 1;
1349 break;
1350 }
1351 r = AFPTryReopen(ptv);
09e709d1 1352 fds.fd = ptv->socket;
e80b30c0
EL
1353 } while (r < 0);
1354 if (dbreak == 1)
1355 break;
1356 }
1357
1358 /* make sure we have at least one packet in the packet pool, to prevent
1359 * us from alloc'ing packets at line rate */
3c6e01f6 1360 PacketPoolWait();
e80b30c0 1361
cad0ff9e
VJ
1362 StatsIncr(ptv->tv, ptv->capture_afp_poll);
1363
e80b30c0
EL
1364 r = poll(&fds, 1, POLL_TIMEOUT);
1365
1366 if (suricata_ctl_flags != 0) {
1367 break;
1368 }
1369
1370 if (r > 0 &&
1371 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
cad0ff9e 1372 StatsIncr(ptv->tv, ptv->capture_afp_poll_signal);
e80b30c0 1373 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1374 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1375 continue;
ff6365dd 1376 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1377 char c;
1378 /* Do a recv to get errno */
1379 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1380 continue; /* what, no error? */
3bea3b39 1381 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1382 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1383 ptv->iface, errno, strerror(errno));
13f13b6d 1384 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1385 continue;
ff6365dd 1386 } else if (fds.revents & POLLNVAL) {
e80b30c0 1387 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1388 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1389 continue;
1390 }
1391 } else if (r > 0) {
cad0ff9e 1392 StatsIncr(ptv->tv, ptv->capture_afp_poll_data);
5f400785 1393 r = AFPReadFunc(ptv);
62e63e3f 1394 switch (r) {
27adbfa8
EL
1395 case AFP_READ_OK:
1396 /* Trigger one dump of stats every second */
49612128
EL
1397 current_time = time(NULL);
1398 if (current_time != last_dump) {
27adbfa8 1399 AFPDumpCounters(ptv);
49612128 1400 last_dump = current_time;
27adbfa8
EL
1401 }
1402 break;
62e63e3f
EL
1403 case AFP_READ_FAILURE:
1404 /* AFPRead in error: best to reset the socket */
3bea3b39 1405 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1406 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1407 ptv->iface, errno, strerror(errno));
13f13b6d 1408 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1409 continue;
9efa4ace
EL
1410 case AFP_SURI_FAILURE:
1411 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1412 break;
27b5136b 1413 case AFP_KERNEL_DROP:
e8a4a4c4 1414 AFPDumpCounters(ptv);
27b5136b 1415 break;
e80b30c0 1416 }
11099cfa 1417 } else if (unlikely(r == 0)) {
cad0ff9e 1418 StatsIncr(ptv->tv, ptv->capture_afp_poll_timeout);
f53e687b
EL
1419 /* Trigger one dump of stats every second */
1420 current_time = time(NULL);
1421 if (current_time != last_dump) {
1422 AFPDumpCounters(ptv);
1423 last_dump = current_time;
1424 }
ce71bf1f 1425 /* poll timed out, lets see handle our timeout path */
49599dfe 1426 TmThreadsCaptureHandleTimeout(tv, NULL);
11099cfa 1427
e80b30c0 1428 } else if ((r < 0) && (errno != EINTR)) {
cad0ff9e 1429 StatsIncr(ptv->tv, ptv->capture_afp_poll_err);
efbb5ce0 1430 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1431 ptv->iface,
e80b30c0 1432 errno, strerror(errno));
13f13b6d 1433 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1434 continue;
1435 }
752f03e7 1436 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1437 }
1438
4e561d6b 1439 AFPDumpCounters(ptv);
752f03e7 1440 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1441 SCReturnInt(TM_ECODE_OK);
1442}
1443
13f13b6d
EL
1444static int AFPGetDevFlags(int fd, const char *ifname)
1445{
1446 struct ifreq ifr;
1447
1448 memset(&ifr, 0, sizeof(ifr));
1449 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1450
1451 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1452 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1453 ifname, strerror(errno));
1454 return -1;
1455 }
1456
1457 return ifr.ifr_flags;
1458}
1459
1460
e80b30c0 1461static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1462{
1463 struct ifreq ifr;
1464
1465 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1466 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1467
1468 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1469 if (verbose)
1470 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1471 ifname, strerror(errno));
c45d8985
EL
1472 return -1;
1473 }
1474
1475 return ifr.ifr_ifindex;
1476}
1477
e80b30c0 1478static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1479{
1480 struct ifreq ifr;
1481
1482 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1483 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1484
1485 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1486 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1487 ifname, strerror(errno));
1488 return -1;
1489 }
1490
e80b30c0
EL
1491 switch (ifr.ifr_hwaddr.sa_family) {
1492 case ARPHRD_LOOPBACK:
1493 return LINKTYPE_ETHERNET;
1494 case ARPHRD_PPP:
11eb1d7c 1495 case ARPHRD_NONE:
e80b30c0
EL
1496 return LINKTYPE_RAW;
1497 default:
1498 return ifr.ifr_hwaddr.sa_family;
1499 }
c45d8985
EL
1500}
1501
b7bf299e
EL
1502int AFPGetLinkType(const char *ifname)
1503{
1504 int ltype;
1505
1506 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1507 if (fd == -1) {
1508 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1509 return LINKTYPE_RAW;
1510 }
1511
1512 ltype = AFPGetDevLinktype(fd, ifname);
1513 close(fd);
1514
1515 return ltype;
1516}
1517
49b7b00f
EL
1518static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1519{
1520 /* Compute structure:
1521 Target is to store all pending packets
1522 with a size equal to MTU + auxdata
1523 And we keep a decent number of block
1524
1525 To do so:
1526 Compute frame_size (aligned to be able to fit in block
1527 Check which block size we need. Blocksize is a 2^n * pagesize
1528 We then need to get order, big enough to have
1529 frame_size < block size
1530 Find number of frame per block (divide)
1531 Fill in packet_req
1532
1533 Compute frame size:
1534 described in packet_mmap.txt
1535 dependant on snaplen (need to use a variable ?)
1536snaplen: MTU ?
1537tp_hdrlen determine_version in daq_afpacket
1538in V1: sizeof(struct tpacket_hdr);
1539in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1540frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1541
1542 */
1543 int tp_hdrlen = sizeof(struct tpacket_hdr);
1544 int snaplen = default_packet_size;
1545
03032457
EL
1546 if (snaplen == 0) {
1547 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1548 if (snaplen <= 0) {
1549 SCLogWarning(SC_ERR_INVALID_VALUE,
1550 "Unable to get MTU, setting snaplen to sane default of 1514");
1551 snaplen = 1514;
1552 }
1553 }
1554
69d0d484
VJ
1555 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1556 ptv->req.v2.tp_block_size = getpagesize() << order;
1557 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1558 if (frames_per_block == 0) {
bae1b03c 1559 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1560 return -1;
1561 }
69d0d484
VJ
1562 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1563 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1564 /* exact division */
69d0d484 1565 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1566 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1567 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1568 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1569 return 1;
1570}
1571
c2d0d938 1572#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1573static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1574{
69d0d484
VJ
1575 ptv->req.v3.tp_block_size = ptv->block_size;
1576 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1577 int frames_per_block = 0;
1578 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1579 int snaplen = default_packet_size;
1580
1581 if (snaplen == 0) {
1582 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1583 if (snaplen <= 0) {
1584 SCLogWarning(SC_ERR_INVALID_VALUE,
1585 "Unable to get MTU, setting snaplen to sane default of 1514");
1586 snaplen = 1514;
1587 }
1588 }
1589
69d0d484
VJ
1590 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1591 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1592
1593 if (frames_per_block == 0) {
1594 SCLogError(SC_ERR_INVALID_VALUE,
1595 "Block size is too small, it should be at least %d",
69d0d484 1596 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1597 return -1;
1598 }
69d0d484 1599 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1600 /* exact division */
69d0d484
VJ
1601 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1602 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1603 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1604 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1605 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1606 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1607 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1608 );
1609 return 1;
1610}
c2d0d938 1611#endif
bae1b03c 1612
c7bde9df
EL
1613static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1614{
1615 int val;
1616 unsigned int len = sizeof(val), i;
c7bde9df 1617 int order;
f5c20191 1618 int r, mmap_flag;
c7bde9df 1619
c2d0d938 1620#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1621 if (ptv->flags & AFP_TPACKET_V3) {
1622 val = TPACKET_V3;
f947539d 1623 } else
c2d0d938 1624#endif
f947539d 1625 {
c7bde9df
EL
1626 val = TPACKET_V2;
1627 }
1628 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1629 if (errno == ENOPROTOOPT) {
1630 if (ptv->flags & AFP_TPACKET_V3) {
1631 SCLogError(SC_ERR_AFP_CREATE,
1632 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1633 } else {
1634 SCLogError(SC_ERR_AFP_CREATE,
1635 "Too old kernel giving up (need 2.6.27 at least)");
1636 }
1637 }
1638 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1639 return AFP_FATAL_ERROR;
1640 }
1641
f947539d
VJ
1642 val = TPACKET_V2;
1643#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1644 if (ptv->flags & AFP_TPACKET_V3) {
1645 val = TPACKET_V3;
c7bde9df 1646 }
f947539d 1647#endif
c7bde9df
EL
1648 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1649 sizeof(val)) < 0) {
1650 SCLogError(SC_ERR_AFP_CREATE,
1651 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1652 strerror(errno));
1653 return AFP_FATAL_ERROR;
1654 }
1655
a40f08a2
EL
1656#ifdef HAVE_HW_TIMESTAMPING
1657 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1658 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1659 sizeof(req)) < 0) {
1660 SCLogWarning(SC_ERR_AFP_CREATE,
1661 "Can't activate hardware timestamping on packet socket: %s",
1662 strerror(errno));
1663 }
1664#endif
1665
2fab3ff0
VJ
1666 /* Reserve head room for a VLAN header. One vlan is extracted from AFP header
1667 * so one VLAN header length is enough. */
1668 int reserve = VLAN_HEADER_LEN;
1669 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *)&reserve, sizeof(reserve)) <
1670 0) {
1671 SCLogError(
1672 SC_ERR_AFP_CREATE, "Can't activate reserve on packet socket: %s", strerror(errno));
1673 return AFP_FATAL_ERROR;
ecf59be4
EL
1674 }
1675
c7bde9df 1676 /* Allocate RX ring */
c2d0d938 1677#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1678 if (ptv->flags & AFP_TPACKET_V3) {
1679 if (AFPComputeRingParamsV3(ptv) != 1) {
1680 return AFP_FATAL_ERROR;
1681 }
1682 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1683 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1684 if (r < 0) {
1685 SCLogError(SC_ERR_MEM_ALLOC,
1686 "Unable to allocate RX Ring for iface %s: (%d) %s",
1687 devname,
1688 errno,
1689 strerror(errno));
1690 return AFP_FATAL_ERROR;
1691 }
1692 } else {
c2d0d938 1693#endif
fa902abe 1694 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1695 if (AFPComputeRingParams(ptv, order) != 1) {
1696 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1697 return AFP_FATAL_ERROR;
1698 }
1699
1700 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1701 (void *) &ptv->req, sizeof(ptv->req));
1702
1703 if (r < 0) {
1704 if (errno == ENOMEM) {
1705 SCLogInfo("Memory issue with ring parameters. Retrying.");
1706 continue;
1707 }
1708 SCLogError(SC_ERR_MEM_ALLOC,
1709 "Unable to allocate RX Ring for iface %s: (%d) %s",
1710 devname,
1711 errno,
1712 strerror(errno));
1713 return AFP_FATAL_ERROR;
1714 } else {
1715 break;
1716 }
1717 }
1718 if (order < 0) {
1719 SCLogError(SC_ERR_MEM_ALLOC,
1720 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1721 devname);
1722 return AFP_FATAL_ERROR;
1723 }
c2d0d938 1724#ifdef HAVE_TPACKET_V3
c7bde9df 1725 }
c2d0d938 1726#endif
c7bde9df
EL
1727
1728 /* Allocate the Ring */
c2d0d938 1729#ifdef HAVE_TPACKET_V3
c7bde9df 1730 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1731 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1732 } else {
c2d0d938 1733#endif
69d0d484 1734 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1735#ifdef HAVE_TPACKET_V3
c7bde9df 1736 }
c2d0d938 1737#endif
f5c20191
EL
1738 mmap_flag = MAP_SHARED;
1739 if (ptv->flags & AFP_MMAP_LOCKED)
1740 mmap_flag |= MAP_LOCKED;
cba41207 1741 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1742 mmap_flag, ptv->socket, 0);
cba41207 1743 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1744 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1745 strerror(errno));
c7bde9df
EL
1746 goto mmap_err;
1747 }
c2d0d938 1748#ifdef HAVE_TPACKET_V3
c7bde9df 1749 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1750 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1751 if (!ptv->ring.v3) {
1752 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1753 goto postmmap_err;
c7bde9df 1754 }
69d0d484
VJ
1755 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1756 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1757 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1758 }
1759 } else {
c2d0d938 1760#endif
c7bde9df 1761 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1762 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1763 if (ptv->ring.v2 == NULL) {
c7bde9df 1764 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1765 goto postmmap_err;
c7bde9df 1766 }
69d0d484 1767 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1768 /* fill the header ring with proper frame ptr*/
1769 ptv->frame_offset = 0;
69d0d484
VJ
1770 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1771 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1772 unsigned int j;
69d0d484
VJ
1773 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1774 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1775 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
1776 }
1777 }
1778 ptv->frame_offset = 0;
c2d0d938 1779#ifdef HAVE_TPACKET_V3
c7bde9df 1780 }
c2d0d938 1781#endif
c7bde9df
EL
1782
1783 return 0;
1784
291af719 1785postmmap_err:
cba41207 1786 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
1787 if (ptv->ring.v2)
1788 SCFree(ptv->ring.v2);
1789 if (ptv->ring.v3)
1790 SCFree(ptv->ring.v3);
c7bde9df
EL
1791mmap_err:
1792 /* Packet mmap does the cleaning when socket is closed */
1793 return AFP_FATAL_ERROR;
1794}
1795
402bdf9b
VJ
1796/** \brief test if we can use FANOUT. Older kernels like those in
1797 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1798 */
df0ed6fd 1799int AFPIsFanoutSupported(uint16_t cluster_id)
402bdf9b
VJ
1800{
1801#ifdef HAVE_PACKET_FANOUT
1802 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
1803 if (fd < 0)
1804 return 0;
402bdf9b 1805
d8c82d4f 1806 uint32_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
df0ed6fd 1807 uint32_t option = (mode << 16) | cluster_id;
6227d095
VJ
1808 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1809 close(fd);
1810
1811 if (r < 0) {
8940a9d3
SB
1812 SCLogError(SC_ERR_INVALID_VALUE, "fanout not supported by kernel: "
1813 "Kernel too old or cluster-id %d already in use.", cluster_id);
6227d095 1814 return 0;
402bdf9b 1815 }
6227d095
VJ
1816 return 1;
1817#else
402bdf9b 1818 return 0;
6227d095 1819#endif
402bdf9b
VJ
1820}
1821
91e1256b
EL
1822#ifdef HAVE_PACKET_EBPF
1823
1824static int SockFanoutSeteBPF(AFPThreadVars *ptv)
1825{
1826 int pfd = ptv->ebpf_lb_fd;
1827 if (pfd == -1) {
1828 SCLogError(SC_ERR_INVALID_VALUE,
1829 "Fanout file descriptor is invalid");
1830 return -1;
1831 }
1832
1833 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
1834 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
1835 return -1;
1836 }
1837 SCLogInfo("Activated eBPF on socket");
1838
1839 return 0;
1840}
1841
1842static int SetEbpfFilter(AFPThreadVars *ptv)
1843{
1844 int pfd = ptv->ebpf_filter_fd;
1845 if (pfd == -1) {
1846 SCLogError(SC_ERR_INVALID_VALUE,
1847 "Filter file descriptor is invalid");
1848 return -1;
1849 }
1850
1851 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
1852 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
1853 return -1;
1854 }
1855 SCLogInfo("Activated eBPF filter on socket");
1856
1857 return 0;
1858}
1859#endif
1860
e80b30c0 1861static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
1862{
1863 int r;
1992a227 1864 int ret = AFP_FATAL_ERROR;
c45d8985
EL
1865 struct packet_mreq sock_params;
1866 struct sockaddr_ll bind_address;
662dccd8 1867 int if_idx;
49b7b00f 1868
c45d8985
EL
1869 /* open socket */
1870 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1871 if (ptv->socket == -1) {
e80b30c0 1872 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 1873 goto error;
c45d8985 1874 }
cba41207 1875
662dccd8 1876 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
1877
1878 if (if_idx == -1) {
fcd5e138 1879 goto socket_err;
cba41207
AG
1880 }
1881
c45d8985
EL
1882 /* bind socket */
1883 memset(&bind_address, 0, sizeof(bind_address));
1884 bind_address.sll_family = AF_PACKET;
1885 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 1886 bind_address.sll_ifindex = if_idx;
c45d8985
EL
1887 if (bind_address.sll_ifindex == -1) {
1888 if (verbose)
e80b30c0 1889 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 1890 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
1891 goto socket_err;
1892 }
1893
cba41207
AG
1894 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
1895 if (if_flags == -1) {
1896 if (verbose) {
1897 SCLogError(SC_ERR_AFP_READ,
1898 "Couldn't get flags for interface '%s'",
1899 ptv->iface);
1900 }
1901 ret = AFP_RECOVERABLE_ERROR;
1902 goto socket_err;
1903 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
1904 if (verbose) {
1905 SCLogError(SC_ERR_AFP_READ,
1906 "Interface '%s' is down",
1907 ptv->iface);
1908 }
1909 ret = AFP_RECOVERABLE_ERROR;
1910 goto socket_err;
1911 }
1912
13f13b6d
EL
1913 if (ptv->promisc != 0) {
1914 /* Force promiscuous mode */
1915 memset(&sock_params, 0, sizeof(sock_params));
1916 sock_params.mr_type = PACKET_MR_PROMISC;
1917 sock_params.mr_ifindex = bind_address.sll_ifindex;
1918 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
1919 if (r < 0) {
1920 SCLogError(SC_ERR_AFP_CREATE,
1921 "Couldn't switch iface %s to promiscuous, error %s",
1922 devname, strerror(errno));
c7bde9df 1923 goto socket_err;
13f13b6d
EL
1924 }
1925 }
1926
1927 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
1928 int val = 1;
1929 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
1930 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
1931 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 1932 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
1933 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
1934 }
1935 }
1936
1937 /* set socket recv buffer size */
1938 if (ptv->buffer_size != 0) {
1939 /*
1940 * Set the socket buffer size to the specified value.
1941 */
b3bf7a57 1942 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
1943 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
1944 &ptv->buffer_size,
1945 sizeof(ptv->buffer_size)) == -1) {
1946 SCLogError(SC_ERR_AFP_CREATE,
1947 "Couldn't set buffer size to %d on iface %s, error %s",
1948 ptv->buffer_size, devname, strerror(errno));
c7bde9df 1949 goto socket_err;
13f13b6d
EL
1950 }
1951 }
1952
1953 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
1954 if (r < 0) {
1955 if (verbose) {
1956 if (errno == ENETDOWN) {
1957 SCLogError(SC_ERR_AFP_CREATE,
1958 "Couldn't bind AF_PACKET socket, iface %s is down",
1959 devname);
1960 } else {
1961 SCLogError(SC_ERR_AFP_CREATE,
1962 "Couldn't bind AF_PACKET socket to iface %s, error %s",
1963 devname, strerror(errno));
1964 }
1965 }
1992a227 1966 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 1967 goto socket_err;
13f13b6d
EL
1968 }
1969
91e1256b 1970
238ff231
EL
1971#ifdef HAVE_PACKET_FANOUT
1972 /* add binded socket to fanout group */
1973 if (ptv->threads > 1) {
d8c82d4f 1974 uint32_t mode = ptv->cluster_type;
238ff231 1975 uint16_t id = ptv->cluster_id;
4111331a 1976 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
1977 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
1978 if (r < 0) {
1979 SCLogError(SC_ERR_AFP_CREATE,
4111331a 1980 "Couldn't set fanout mode, error %s",
238ff231 1981 strerror(errno));
c7bde9df 1982 goto socket_err;
238ff231
EL
1983 }
1984 }
1985#endif
1986
91e1256b
EL
1987#ifdef HAVE_PACKET_EBPF
1988 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
1989 r = SockFanoutSeteBPF(ptv);
1990 if (r < 0) {
1991 SCLogError(SC_ERR_AFP_CREATE,
1992 "Coudn't set EBPF, error %s",
1993 strerror(errno));
1994 goto socket_err;
1995 }
1996 }
1997#endif
1998
b9189946
VJ
1999 ret = AFPSetupRing(ptv, devname);
2000 if (ret != 0)
2001 goto socket_err;
49b7b00f 2002
86a3f064 2003 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2004
c85ee1e3 2005 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
c85ee1e3 2006
f47df5a6 2007 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2008 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2009 ret = AFP_FATAL_ERROR;
2010 goto socket_err;
f2a6fb8a
EL
2011 }
2012
49b7b00f 2013 /* Init is ok */
13f13b6d 2014 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2015 return 0;
13f13b6d 2016
13f13b6d
EL
2017socket_err:
2018 close(ptv->socket);
2019 ptv->socket = -1;
f47df5a6 2020 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2021 if (ptv->ring.v3) {
2022 SCFree(ptv->ring.v3);
2023 ptv->ring.v3 = NULL;
f47df5a6
VJ
2024 }
2025 } else {
69d0d484
VJ
2026 if (ptv->ring.v2) {
2027 SCFree(ptv->ring.v2);
2028 ptv->ring.v2 = NULL;
f47df5a6
VJ
2029 }
2030 }
2031
13f13b6d 2032error:
1992a227 2033 return -ret;
c45d8985
EL
2034}
2035
f2a6fb8a
EL
2036TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2037{
2038 struct bpf_program filter;
2039 struct sock_fprog fcode;
2040 int rc;
2041
91e1256b
EL
2042#ifdef HAVE_PACKET_EBPF
2043 if (ptv->ebpf_filter_fd != -1) {
2044 return SetEbpfFilter(ptv);
2045 }
2046#endif
2047
f2a6fb8a
EL
2048 if (!ptv->bpf_filter)
2049 return TM_ECODE_OK;
2050
f2a6fb8a
EL
2051 SCLogInfo("Using BPF '%s' on iface '%s'",
2052 ptv->bpf_filter,
2053 ptv->iface);
28e9e4c8
EL
2054
2055 char errbuf[PCAP_ERRBUF_SIZE];
2056 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2057 ptv->datalink, /* linktype_arg */
2058 &filter, /* program */
2059 ptv->bpf_filter, /* const char *buf */
cc82ef06 2060 1, /* optimize */
28e9e4c8
EL
2061 0, /* mask */
2062 errbuf,
2063 sizeof(errbuf)) == -1) {
2064 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2065 ptv->bpf_filter,
2066 errbuf);
f2a6fb8a
EL
2067 return TM_ECODE_FAILED;
2068 }
2069
2070 fcode.len = filter.bf_len;
2071 fcode.filter = (struct sock_filter*)filter.bf_insns;
2072
2073 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2074
28e9e4c8 2075 SCBPFFree(&filter);
f2a6fb8a
EL
2076 if(rc == -1) {
2077 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2078 return TM_ECODE_FAILED;
2079 }
2080
f2a6fb8a
EL
2081 return TM_ECODE_OK;
2082}
2083
06173267
EL
2084#ifdef HAVE_PACKET_EBPF
2085/**
2086 * Insert a half flow in the kernel bypass table
2087 *
2088 * \param mapfd file descriptor of the protocol bypass table
2089 * \param key data to use as key in the table
2598078e 2090 * \return 0 in case of error, 1 if success
06173267 2091 */
69d2c8eb 2092static int AFPInsertHalfFlow(int mapd, void *key, unsigned int nr_cpus)
06173267 2093{
651a27e4 2094 BPF_DECLARE_PERCPU(struct pair, value, nr_cpus);
17a32bda 2095 unsigned int i;
1e729f05
EL
2096
2097 if (mapd == -1) {
2098 return 0;
2099 }
2100
94a622cb 2101 /* We use a per CPU structure so we have to set an array of values as the kernel
6ab1cbcb
EL
2102 * is not duplicating the data on each CPU by itself. */
2103 for (i = 0; i < nr_cpus; i++) {
651a27e4
EL
2104 BPF_PERCPU(value, i).packets = 0;
2105 BPF_PERCPU(value, i).bytes = 0;
17a32bda 2106 }
17a32bda
EL
2107 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2108 switch (errno) {
3379311e 2109 /* no more place in the hash */
17a32bda 2110 case E2BIG:
17a32bda 2111 return 0;
fcae1c18
EL
2112 /* no more place in the hash for some hardware bypass */
2113 case EAGAIN:
2114 return 0;
3379311e
EL
2115 /* if we already have the key then bypass is a success */
2116 case EEXIST:
2117 return 1;
2118 /* Not supposed to be there so issue a error */
17a32bda
EL
2119 default:
2120 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2121 strerror(errno),
2122 errno);
2123 return 0;
06173267 2124 }
17a32bda
EL
2125 }
2126 return 1;
06173267 2127}
b07bda7a 2128
9206b30f
EL
2129static int AFPSetFlowStorage(Packet *p, int map_fd, void *key0, void* key1,
2130 int family)
b07bda7a
EL
2131{
2132 FlowBypassInfo *fc = FlowGetStorageById(p->flow, GetFlowBypassInfoID());
2133 if (fc) {
2134 EBPFBypassData *eb = SCCalloc(1, sizeof(EBPFBypassData));
2135 if (eb == NULL) {
9206b30f
EL
2136 EBPFDeleteKey(map_fd, key0);
2137 EBPFDeleteKey(map_fd, key1);
2138 LiveDevAddBypassFail(p->livedev, 1, family);
b07bda7a
EL
2139 SCFree(key0);
2140 SCFree(key1);
2141 return 0;
2142 }
2143 eb->key[0] = key0;
2144 eb->key[1] = key1;
2145 eb->mapfd = map_fd;
2146 eb->cpus_count = p->afp_v.nr_cpus;
2147 fc->BypassUpdate = EBPFBypassUpdate;
2148 fc->BypassFree = EBPFBypassFree;
2149 fc->bypass_data = eb;
9206b30f
EL
2150 } else {
2151 EBPFDeleteKey(map_fd, key0);
2152 EBPFDeleteKey(map_fd, key1);
2153 LiveDevAddBypassFail(p->livedev, 1, family);
2154 SCFree(key0);
2155 SCFree(key1);
2156 return 0;
b07bda7a 2157 }
9206b30f
EL
2158
2159 LiveDevAddBypassStats(p->livedev, 1, family);
6126f105 2160 LiveDevAddBypassSuccess(p->livedev, 1, family);
b07bda7a
EL
2161 return 1;
2162}
2163
2598078e 2164/**
94a622cb
EL
2165 * Bypass function for AF_PACKET capture in eBPF mode
2166 *
2167 * This function creates two half flows in the map shared with the kernel
2168 * to trigger bypass.
2169 *
2170 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2171 * This table contains the list of half flows to bypass. The in-kernel filter
2172 * will skip/drop the packet if they belong to a flow in one of the flows
2173 * table.
2174 *
2175 * \param p the packet belonging to the flow to bypass
2176 * \return 0 if unable to bypass, 1 if success
2598078e 2177 */
06173267
EL
2178static int AFPBypassCallback(Packet *p)
2179{
06173267
EL
2180 SCLogDebug("Calling af_packet callback function");
2181 /* Only bypass TCP and UDP */
2182 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2183 return 0;
2184 }
2185
fc2f2fa7
EL
2186 /* If we don't have a flow attached to packet the eBPF map entries
2187 * will be destroyed at first flow bypass manager pass as we won't
2188 * find any associated entry */
2189 if (p->flow == NULL) {
2190 return 0;
2191 }
06173267
EL
2192 /* Bypassing tunneled packets is currently not supported
2193 * because we can't discard the inner packet only due to
2194 * primitive parsing in eBPF */
2195 if (IS_TUNNEL_PKT(p)) {
2196 return 0;
2197 }
06173267 2198 if (PKT_IS_IPV4(p)) {
d65f4585 2199 SCLogDebug("add an IPv4");
eff10fce
EL
2200 if (p->afp_v.v4_map_fd == -1) {
2201 return 0;
2202 }
b07bda7a
EL
2203 struct flowv4_keys *keys[2];
2204 keys[0] = SCCalloc(1, sizeof(struct flowv4_keys));
2205 if (keys[0] == NULL) {
2206 return 0;
2207 }
2208 keys[0]->src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2209 keys[0]->dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2210 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2211 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2212 keys[0]->vlan0 = p->vlan_id[0];
2213 keys[0]->vlan1 = p->vlan_id[1];
8c880879 2214
d119845d
EL
2215 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2216 keys[0]->ip_proto = 1;
2217 } else {
2218 keys[0]->ip_proto = 0;
2219 }
69d2c8eb 2220 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2221 p->afp_v.nr_cpus) == 0) {
9206b30f 2222 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2223 SCFree(keys[0]);
2224 return 0;
2225 }
2226 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2227 if (keys[1] == NULL) {
9206b30f
EL
2228 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2229 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2230 SCFree(keys[0]);
06173267
EL
2231 return 0;
2232 }
b07bda7a
EL
2233 keys[1]->src = htonl(GET_IPV4_DST_ADDR_U32(p));
2234 keys[1]->dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2235 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2236 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2237 keys[1]->vlan0 = p->vlan_id[0];
2238 keys[1]->vlan1 = p->vlan_id[1];
b07bda7a 2239
d119845d 2240 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2241 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2242 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2243 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2244 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2245 SCFree(keys[0]);
2246 SCFree(keys[1]);
06173267
EL
2247 return 0;
2248 }
315c29a8 2249 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2250 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
06173267
EL
2251 }
2252 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2253 if (PKT_IS_IPV6(p) &&
06173267 2254 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2255 int i;
eff10fce
EL
2256 if (p->afp_v.v6_map_fd == -1) {
2257 return 0;
2258 }
06173267 2259 SCLogDebug("add an IPv6");
b07bda7a
EL
2260 struct flowv6_keys *keys[2];
2261 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2262 if (keys[0] == NULL) {
9206b30f 2263 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2264 return 0;
2265 }
06173267 2266 for (i = 0; i < 4; i++) {
b07bda7a
EL
2267 keys[0]->src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2268 keys[0]->dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2269 }
2270 keys[0]->port16[0] = GET_TCP_SRC_PORT(p);
2271 keys[0]->port16[1] = GET_TCP_DST_PORT(p);
d119845d
EL
2272 keys[0]->vlan0 = p->vlan_id[0];
2273 keys[0]->vlan1 = p->vlan_id[1];
2274
2275 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2276 keys[0]->ip_proto = 1;
2277 } else {
2278 keys[0]->ip_proto = 0;
2279 }
69d2c8eb 2280 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2281 p->afp_v.nr_cpus) == 0) {
9206b30f 2282 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2283 SCFree(keys[0]);
06173267
EL
2284 return 0;
2285 }
b07bda7a
EL
2286 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2287 if (keys[1] == NULL) {
9206b30f
EL
2288 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2289 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2290 SCFree(keys[0]);
2291 return 0;
06173267 2292 }
b07bda7a
EL
2293 for (i = 0; i < 4; i++) {
2294 keys[1]->src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2295 keys[1]->dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2296 }
2297 keys[1]->port16[0] = GET_TCP_DST_PORT(p);
2298 keys[1]->port16[1] = GET_TCP_SRC_PORT(p);
d119845d
EL
2299 keys[1]->vlan0 = p->vlan_id[0];
2300 keys[1]->vlan1 = p->vlan_id[1];
2301
2302 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2303 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2304 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2305 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2306 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2307 SCFree(keys[0]);
2308 SCFree(keys[1]);
06173267
EL
2309 return 0;
2310 }
fc2f2fa7
EL
2311 if (p->flow)
2312 EBPFUpdateFlow(p->flow, p, NULL);
9206b30f 2313 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
06173267 2314 }
06173267
EL
2315 return 0;
2316}
2317
94a622cb
EL
2318/**
2319 * Bypass function for AF_PACKET capture in XDP mode
2320 *
2321 * This function creates two half flows in the map shared with the kernel
2322 * to trigger bypass. This function is similar to AFPBypassCallback() but
2323 * the bytes order is changed for some data due to the way we get the data
2324 * in the XDP case.
2325 *
2326 * \param p the packet belonging to the flow to bypass
2327 * \return 0 if unable to bypass, 1 if success
2328 */
8c880879
EL
2329static int AFPXDPBypassCallback(Packet *p)
2330{
8c880879
EL
2331 SCLogDebug("Calling af_packet callback function");
2332 /* Only bypass TCP and UDP */
2333 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2334 return 0;
2335 }
2336
fc2f2fa7
EL
2337 /* If we don't have a flow attached to packet the eBPF map entries
2338 * will be destroyed at first flow bypass manager pass as we won't
2339 * find any associated entry */
2340 if (p->flow == NULL) {
2341 return 0;
2342 }
8c880879
EL
2343 /* Bypassing tunneled packets is currently not supported
2344 * because we can't discard the inner packet only due to
2345 * primitive parsing in eBPF */
2346 if (IS_TUNNEL_PKT(p)) {
2347 return 0;
2348 }
8c880879 2349 if (PKT_IS_IPV4(p)) {
b07bda7a
EL
2350 struct flowv4_keys *keys[2];
2351 keys[0]= SCCalloc(1, sizeof(struct flowv4_keys));
2352 if (keys[0] == NULL) {
9206b30f 2353 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2354 return 0;
2355 }
eff10fce 2356 if (p->afp_v.v4_map_fd == -1) {
b07bda7a 2357 SCFree(keys[0]);
eff10fce
EL
2358 return 0;
2359 }
b07bda7a
EL
2360 keys[0]->src = p->src.addr_data32[0];
2361 keys[0]->dst = p->dst.addr_data32[0];
94a622cb 2362 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2363 * (as in eBPF filter) so we need to pass from host to network order */
b07bda7a
EL
2364 keys[0]->port16[0] = htons(p->sp);
2365 keys[0]->port16[1] = htons(p->dp);
d119845d
EL
2366 keys[0]->vlan0 = p->vlan_id[0];
2367 keys[0]->vlan1 = p->vlan_id[1];
2368 if (IPV4_GET_IPPROTO(p) == IPPROTO_TCP) {
2369 keys[0]->ip_proto = 1;
2370 } else {
2371 keys[0]->ip_proto = 0;
2372 }
69d2c8eb 2373 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[0],
fc2f2fa7 2374 p->afp_v.nr_cpus) == 0) {
9206b30f 2375 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2376 SCFree(keys[0]);
2377 return 0;
2378 }
2379 keys[1]= SCCalloc(1, sizeof(struct flowv4_keys));
2380 if (keys[1] == NULL) {
9206b30f
EL
2381 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2382 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a 2383 SCFree(keys[0]);
8c880879
EL
2384 return 0;
2385 }
b07bda7a
EL
2386 keys[1]->src = p->dst.addr_data32[0];
2387 keys[1]->dst = p->src.addr_data32[0];
2388 keys[1]->port16[0] = htons(p->dp);
2389 keys[1]->port16[1] = htons(p->sp);
d119845d
EL
2390 keys[1]->vlan0 = p->vlan_id[0];
2391 keys[1]->vlan1 = p->vlan_id[1];
2392 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2393 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, keys[1],
fc2f2fa7 2394 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2395 EBPFDeleteKey(p->afp_v.v4_map_fd, keys[0]);
2396 LiveDevAddBypassFail(p->livedev, 1, AF_INET);
b07bda7a
EL
2397 SCFree(keys[0]);
2398 SCFree(keys[1]);
8c880879
EL
2399 return 0;
2400 }
9206b30f 2401 return AFPSetFlowStorage(p, p->afp_v.v4_map_fd, keys[0], keys[1], AF_INET);
8c880879
EL
2402 }
2403 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2404 if (PKT_IS_IPV6(p) &&
8c880879 2405 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2406 SCLogDebug("add an IPv6");
eff10fce
EL
2407 if (p->afp_v.v6_map_fd == -1) {
2408 return 0;
2409 }
d65f4585 2410 int i;
b07bda7a
EL
2411 struct flowv6_keys *keys[2];
2412 keys[0] = SCCalloc(1, sizeof(struct flowv6_keys));
2413 if (keys[0] == NULL) {
2414 return 0;
2415 }
2416
8c880879 2417 for (i = 0; i < 4; i++) {
b07bda7a
EL
2418 keys[0]->src[i] = GET_IPV6_SRC_ADDR(p)[i];
2419 keys[0]->dst[i] = GET_IPV6_DST_ADDR(p)[i];
2420 }
2421 keys[0]->port16[0] = htons(GET_TCP_SRC_PORT(p));
2422 keys[0]->port16[1] = htons(GET_TCP_DST_PORT(p));
d119845d
EL
2423 keys[0]->vlan0 = p->vlan_id[0];
2424 keys[0]->vlan1 = p->vlan_id[1];
2425 if (IPV6_GET_NH(p) == IPPROTO_TCP) {
2426 keys[0]->ip_proto = 1;
2427 } else {
2428 keys[0]->ip_proto = 0;
2429 }
69d2c8eb 2430 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[0],
fc2f2fa7 2431 p->afp_v.nr_cpus) == 0) {
9206b30f 2432 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a 2433 SCFree(keys[0]);
8c880879
EL
2434 return 0;
2435 }
b07bda7a
EL
2436 keys[1]= SCCalloc(1, sizeof(struct flowv6_keys));
2437 if (keys[1] == NULL) {
9206b30f
EL
2438 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2439 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2440 SCFree(keys[0]);
2441 return 0;
8c880879 2442 }
b07bda7a
EL
2443 for (i = 0; i < 4; i++) {
2444 keys[1]->src[i] = GET_IPV6_DST_ADDR(p)[i];
2445 keys[1]->dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2446 }
2447 keys[1]->port16[0] = htons(GET_TCP_DST_PORT(p));
2448 keys[1]->port16[1] = htons(GET_TCP_SRC_PORT(p));
d119845d
EL
2449 keys[1]->vlan0 = p->vlan_id[0];
2450 keys[1]->vlan1 = p->vlan_id[1];
2451 keys[1]->ip_proto = keys[0]->ip_proto;
69d2c8eb 2452 if (AFPInsertHalfFlow(p->afp_v.v6_map_fd, keys[1],
fc2f2fa7 2453 p->afp_v.nr_cpus) == 0) {
9206b30f
EL
2454 EBPFDeleteKey(p->afp_v.v6_map_fd, keys[0]);
2455 LiveDevAddBypassFail(p->livedev, 1, AF_INET6);
b07bda7a
EL
2456 SCFree(keys[0]);
2457 SCFree(keys[1]);
8c880879
EL
2458 return 0;
2459 }
9206b30f 2460 return AFPSetFlowStorage(p, p->afp_v.v6_map_fd, keys[0], keys[1], AF_INET6);
8c880879 2461 }
8c880879
EL
2462 return 0;
2463}
2464
5e62ae6d
EL
2465bool g_flowv4_ok = true;
2466bool g_flowv6_ok = true;
2467
5e05fedc
VJ
2468#endif /* HAVE_PACKET_EBPF */
2469
c45d8985
EL
2470/**
2471 * \brief Init function for ReceiveAFP.
2472 *
2473 * \param tv pointer to ThreadVars
2474 * \param initdata pointer to the interface passed from the user
2475 * \param data pointer gets populated with AFPThreadVars
2476 *
2477 * \todo Create a general AFP setup function.
2478 */
ab1200fb 2479TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2480{
c45d8985 2481 SCEnter();
ab1200fb 2482 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2483
c45d8985
EL
2484 if (initdata == NULL) {
2485 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2486 SCReturnInt(TM_ECODE_FAILED);
2487 }
2488
2489 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2490 if (unlikely(ptv == NULL)) {
45d5c3ca 2491 afpconfig->DerefFunc(afpconfig);
c45d8985 2492 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2493 }
c45d8985
EL
2494 memset(ptv, 0, sizeof(AFPThreadVars));
2495
2496 ptv->tv = tv;
c45d8985 2497
fbca1a4e 2498 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2499 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2500
51eb9605
EL
2501 ptv->livedev = LiveGetDevice(ptv->iface);
2502 if (ptv->livedev == NULL) {
2503 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2504 SCFree(ptv);
51eb9605
EL
2505 SCReturnInt(TM_ECODE_FAILED);
2506 }
2507
fbca1a4e 2508 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2509 ptv->ring_size = afpconfig->ring_size;
fa902abe 2510 ptv->block_size = afpconfig->block_size;
8baf64f5 2511 ptv->block_timeout = afpconfig->block_timeout;
e80b30c0 2512
df7dbe36 2513 ptv->promisc = afpconfig->promisc;
6062e00c 2514 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2515 ptv->bpf_filter = NULL;
df7dbe36 2516
fbca1a4e 2517 ptv->threads = 1;
e80b30c0
EL
2518#ifdef HAVE_PACKET_FANOUT
2519 ptv->cluster_type = PACKET_FANOUT_LB;
2520 ptv->cluster_id = 1;
2521 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2522 if (afpconfig->threads > 1) {
9d882116
VJ
2523 ptv->cluster_id = afpconfig->cluster_id;
2524 ptv->cluster_type = afpconfig->cluster_type;
2525 ptv->threads = afpconfig->threads;
e80b30c0
EL
2526 }
2527#endif
49b7b00f 2528 ptv->flags = afpconfig->flags;
e80b30c0 2529
f2a6fb8a
EL
2530 if (afpconfig->bpf_filter) {
2531 ptv->bpf_filter = afpconfig->bpf_filter;
2532 }
5e05fedc 2533#ifdef HAVE_PACKET_EBPF
91e1256b
EL
2534 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2535 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2536 ptv->xdp_mode = afpconfig->xdp_mode;
4cf53100 2537 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
f2a6fb8a 2538
d65f4585 2539 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2540 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585 2541 if (ptv->v4_map_fd == -1) {
5e62ae6d
EL
2542 if (g_flowv4_ok == false) {
2543 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2544 "flow_table_v4");
2545 g_flowv4_ok = true;
2546 }
d65f4585 2547 }
126488f7 2548 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585 2549 if (ptv->v6_map_fd == -1) {
5e62ae6d
EL
2550 if (g_flowv6_ok) {
2551 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'",
2552 "flow_table_v6");
2553 g_flowv6_ok = false;
2554 }
d65f4585
EL
2555 }
2556 }
4cf53100 2557 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
d65f4585
EL
2558#endif
2559
6efd37a3 2560#ifdef PACKET_STATISTICS
1ef786e7
VJ
2561 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2562 ptv->tv);
2563 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2564 ptv->tv);
9efa4ace
EL
2565 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2566 ptv->tv);
cad0ff9e
VJ
2567
2568 ptv->afpacket_spin = StatsRegisterAvgCounter("capture.afpacket.busy_loop_avg", ptv->tv);
2569
2570 ptv->capture_afp_poll = StatsRegisterCounter("capture.afpacket.polls", ptv->tv);
2571 ptv->capture_afp_poll_signal = StatsRegisterCounter("capture.afpacket.poll_signal", ptv->tv);
2572 ptv->capture_afp_poll_timeout = StatsRegisterCounter("capture.afpacket.poll_timeout", ptv->tv);
2573 ptv->capture_afp_poll_data = StatsRegisterCounter("capture.afpacket.poll_data", ptv->tv);
2574 ptv->capture_afp_poll_err = StatsRegisterCounter("capture.afpacket.poll_errors", ptv->tv);
39bf623f 2575 ptv->capture_afp_send_err = StatsRegisterCounter("capture.afpacket.send_errors", ptv->tv);
6efd37a3
EL
2576#endif
2577
662dccd8
EL
2578 ptv->copy_mode = afpconfig->copy_mode;
2579 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2580 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2581 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2582 /* Warn about BPF filter consequence */
2583 if (ptv->bpf_filter) {
2584 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2585 " in dropping all non matching packets.");
2586 }
662dccd8 2587 }
c85ee1e3 2588
b7e78d33 2589
0581a23f
EL
2590 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2591 SCFree(ptv);
2592 afpconfig->DerefFunc(afpconfig);
2593 SCReturnInt(TM_ECODE_FAILED);
2594 }
2595
c45d8985 2596 *data = (void *)ptv;
fbca1a4e 2597
45d5c3ca 2598 afpconfig->DerefFunc(afpconfig);
71e47868 2599
2cd6e128
EL
2600 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2601 * get the info from packet extended header but we will use a standard
2602 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
bcc03f17
MF
2603 if (SCKernelVersionIsAtLeast(3, 0)) {
2604 ptv->flags |= AFP_VLAN_IN_HEADER;
2cd6e128
EL
2605 }
2606
c45d8985
EL
2607 SCReturnInt(TM_ECODE_OK);
2608}
2609
2610/**
2611 * \brief This function prints stats to the screen at exit.
2612 * \param tv pointer to ThreadVars
2613 * \param data pointer that gets cast into AFPThreadVars for ptv
2614 */
8f1d7503
KS
2615void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2616{
c45d8985
EL
2617 SCEnter();
2618 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2619
2620#ifdef PACKET_STATISTICS
e8a4a4c4 2621 AFPDumpCounters(ptv);
b3bf7a57 2622 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2623 tv->name,
752f03e7
VJ
2624 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2625 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2626#endif
c45d8985
EL
2627}
2628
2629/**
2630 * \brief DeInit function closes af packet socket at exit.
2631 * \param tv pointer to ThreadVars
2632 * \param data pointer that gets cast into AFPThreadVars for ptv
2633 */
8f1d7503
KS
2634TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2635{
c45d8985
EL
2636 AFPThreadVars *ptv = (AFPThreadVars *)data;
2637
13f13b6d
EL
2638 AFPSwitchState(ptv, AFP_STATE_DOWN);
2639
8c880879 2640#ifdef HAVE_PACKET_XDP
4cf53100
EL
2641 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2642 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2643 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2644 }
8c880879 2645#endif
e80b30c0 2646
f2a6fb8a 2647 ptv->bpf_filter = NULL;
69d0d484
VJ
2648 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2649 SCFree(ptv->ring.v3);
ce59ec5d 2650 } else {
69d0d484
VJ
2651 if (ptv->ring.v2)
2652 SCFree(ptv->ring.v2);
ce59ec5d 2653 }
f2a6fb8a 2654
7127ae2b 2655 SCFree(ptv);
c45d8985
EL
2656 SCReturnInt(TM_ECODE_OK);
2657}
2658
2fab3ff0
VJ
2659/** \internal
2660 * \brief add a VLAN header into the raw data for inspection, logging
2661 * and sending out in IPS mode
2662 *
2663 * The kernel doesn't provide the first VLAN header the raw packet data,
2664 * but instead feeds it to us through meta data. For logging and IPS
2665 * we need to put it back into the raw data. Luckily there is some head
2666 * room in the original data so its enough to move the ethernet header
2667 * a bit to make space for the VLAN header.
2668 */
2669static void UpdateRawDataForVLANHdr(Packet *p)
2670{
2671 if (p->afp_v.vlan_tci != 0) {
2672 uint8_t *pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
2673 size_t plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
2674 /* move ethernet addresses */
2675 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
2676 /* write vlan info */
2677 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
2678 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(p->afp_v.vlan_tci);
2679
2680 /* update the packet raw data pointer to start at the new offset */
2681 (void)PacketSetData(p, pstart, plen);
2682 /* update ethernet header pointer to point to the new start of the data */
2683 p->ethh = (void *)pstart;
2684 }
2685}
2686
c45d8985
EL
2687/**
2688 * \brief This function passes off to link type decoders.
2689 *
f8aed4ce 2690 * DecodeAFP decodes packets from AF_PACKET and passes
c45d8985
EL
2691 * them off to the proper link type decoder.
2692 *
2693 * \param t pointer to ThreadVars
2694 * \param p pointer to the current packet
2695 * \param data pointer that gets cast into AFPThreadVars for ptv
c45d8985 2696 */
f8aed4ce 2697TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data)
c45d8985
EL
2698{
2699 SCEnter();
2fab3ff0
VJ
2700
2701 const bool afp_vlan_hdr = p->vlan_idx != 0;
c45d8985
EL
2702 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2703
3f8e15f7 2704 DEBUG_VALIDATE_BUG_ON(PKT_IS_PSEUDOPKT(p));
f7b1aefa 2705
c45d8985 2706 /* update counters */
14466a80 2707 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985
EL
2708
2709 /* call the decoder */
88bccfb8 2710 DecodeLinkLayer(tv, dtv, p->datalink, p, GET_PKT_DATA(p), GET_PKT_LEN(p));
2fab3ff0
VJ
2711 /* post-decoding put vlan hdr back into the raw data) */
2712 if (afp_vlan_hdr) {
2713 StatsIncr(tv, dtv->counter_vlan);
2714 UpdateRawDataForVLANHdr(p);
2715 }
c45d8985 2716
3088b6ac 2717 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2718
c45d8985
EL
2719 SCReturnInt(TM_ECODE_OK);
2720}
2721
ab1200fb 2722TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2723{
2724 SCEnter();
e63db9d1 2725 DecodeThreadVars *dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2726 if (dtv == NULL)
2727 SCReturnInt(TM_ECODE_FAILED);
2728
2729 DecodeRegisterPerfCounters(dtv, tv);
2730
2731 *data = (void *)dtv;
2732
2733 SCReturnInt(TM_ECODE_OK);
2734}
2735
2864f9ee
VJ
2736TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2737{
2738 if (data != NULL)
98c88d51 2739 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2740 SCReturnInt(TM_ECODE_OK);
2741}
2742
e80b30c0 2743#endif /* HAVE_AF_PACKET */
c45d8985 2744/* eof */
a6457262
EL
2745/**
2746 * @}
2747 */