]> git.ipfire.org Git - people/ms/suricata.git/blame - src/source-af-packet.c
util-ebpf: implement pinned maps loading
[people/ms/suricata.git] / src / source-af-packet.c
CommitLineData
cde438f6 1/* Copyright (C) 2011-2018 Open Information Security Foundation
c45d8985
EL
2 *
3 * You can copy, redistribute or modify this Program under the terms of
4 * the GNU General Public License version 2 as published by the Free
5 * Software Foundation.
6 *
7 * This program is distributed in the hope that it will be useful,
8 * but WITHOUT ANY WARRANTY; without even the implied warranty of
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details.
11 *
12 * You should have received a copy of the GNU General Public License
13 * version 2 along with this program; if not, write to the Free Software
14 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15 * 02110-1301, USA.
16 */
17
a6457262
EL
18/**
19 * \defgroup afppacket AF_PACKET running mode
20 *
21 * @{
22 */
23
c45d8985
EL
24/**
25 * \file
26 *
27 * \author Eric Leblond <eric@regit.org>
28 *
29 * AF_PACKET socket acquisition support
30 *
c45d8985
EL
31 */
32
91e1256b
EL
33#define PCAP_DONT_INCLUDE_PCAP_BPF_H 1
34#define SC_PCAP_DONT_INCLUDE_PCAP_H 1
c45d8985 35#include "suricata-common.h"
e80b30c0 36#include "config.h"
c45d8985
EL
37#include "suricata.h"
38#include "decode.h"
39#include "packet-queue.h"
40#include "threads.h"
41#include "threadvars.h"
42#include "tm-queuehandlers.h"
43#include "tm-modules.h"
44#include "tm-threads.h"
45#include "tm-threads-common.h"
46#include "conf.h"
17a32bda 47#include "util-cpu.h"
c45d8985 48#include "util-debug.h"
51eb9605 49#include "util-device.h"
d65f4585 50#include "util-ebpf.h"
c45d8985
EL
51#include "util-error.h"
52#include "util-privs.h"
e80b30c0 53#include "util-optimize.h"
51eb9605 54#include "util-checksum.h"
ac56b1bf 55#include "util-ioctl.h"
2cd6e128 56#include "util-host-info.h"
c45d8985
EL
57#include "tmqh-packetpool.h"
58#include "source-af-packet.h"
34b3f194 59#include "runmodes.h"
c45d8985 60
e80b30c0 61#ifdef HAVE_AF_PACKET
472e061c
VJ
62
63#if HAVE_SYS_IOCTL_H
2bc0be6e 64#include <sys/ioctl.h>
472e061c
VJ
65#endif
66
06173267
EL
67#ifdef HAVE_PACKET_EBPF
68#include "util-ebpf.h"
69#include <bpf/libbpf.h>
70#include <bpf/bpf.h>
71#endif
72
91e1256b
EL
73struct bpf_program {
74 unsigned int bf_len;
75 struct bpf_insn *bf_insns;
76};
77
78#ifdef HAVE_PCAP_H
79#include <pcap.h>
80#endif
81
82#ifdef HAVE_PCAP_PCAP_H
83#include <pcap/pcap.h>
84#endif
85
28e9e4c8
EL
86#include "util-bpf.h"
87
472e061c 88#if HAVE_LINUX_IF_ETHER_H
c45d8985 89#include <linux/if_ether.h>
472e061c
VJ
90#endif
91
92#if HAVE_LINUX_IF_PACKET_H
c45d8985 93#include <linux/if_packet.h>
472e061c
VJ
94#endif
95
96#if HAVE_LINUX_IF_ARP_H
c45d8985 97#include <linux/if_arp.h>
472e061c 98#endif
f2a6fb8a 99
472e061c 100#if HAVE_LINUX_FILTER_H
f2a6fb8a 101#include <linux/filter.h>
e80b30c0 102#endif
c45d8985 103
472e061c 104#if HAVE_SYS_MMAN_H
49b7b00f 105#include <sys/mman.h>
472e061c
VJ
106#endif
107
a40f08a2
EL
108#ifdef HAVE_HW_TIMESTAMPING
109#include <linux/net_tstamp.h>
110#endif
111
472e061c 112#endif /* HAVE_AF_PACKET */
49b7b00f 113
c45d8985
EL
114extern int max_pending_packets;
115
e80b30c0
EL
116#ifndef HAVE_AF_PACKET
117
ab1200fb 118TmEcode NoAFPSupportExit(ThreadVars *, const void *, void **);
e80b30c0 119
8f1d7503
KS
120void TmModuleReceiveAFPRegister (void)
121{
e80b30c0
EL
122 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
123 tmm_modules[TMM_RECEIVEAFP].ThreadInit = NoAFPSupportExit;
124 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
125 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = NULL;
126 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = NULL;
127 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
128 tmm_modules[TMM_RECEIVEAFP].cap_flags = 0;
3f1c4efc 129 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
e80b30c0
EL
130}
131
132/**
133 * \brief Registration Function for DecodeAFP.
e80b30c0 134 */
8f1d7503
KS
135void TmModuleDecodeAFPRegister (void)
136{
e80b30c0
EL
137 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
138 tmm_modules[TMM_DECODEAFP].ThreadInit = NoAFPSupportExit;
139 tmm_modules[TMM_DECODEAFP].Func = NULL;
140 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
141 tmm_modules[TMM_DECODEAFP].ThreadDeinit = NULL;
142 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
143 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 144 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
e80b30c0
EL
145}
146
147/**
148 * \brief this function prints an error message and exits.
149 */
ab1200fb 150TmEcode NoAFPSupportExit(ThreadVars *tv, const void *initdata, void **data)
e80b30c0
EL
151{
152 SCLogError(SC_ERR_NO_AF_PACKET,"Error creating thread %s: you do not have "
153 "support for AF_PACKET enabled, on Linux host please recompile "
154 "with --enable-af-packet", tv->name);
155 exit(EXIT_FAILURE);
156}
157
158#else /* We have AF_PACKET support */
159
c45d8985
EL
160#define AFP_IFACE_NAME_LENGTH 48
161
162#define AFP_STATE_DOWN 0
163#define AFP_STATE_UP 1
164
165#define AFP_RECONNECT_TIMEOUT 500000
13f13b6d 166#define AFP_DOWN_COUNTER_INTERVAL 40
c45d8985
EL
167
168#define POLL_TIMEOUT 100
169
4a1a0080
EL
170#ifndef TP_STATUS_USER_BUSY
171/* for new use latest bit available in tp_status */
172#define TP_STATUS_USER_BUSY (1 << 31)
173#endif
174
b603ad62
EL
175#ifndef TP_STATUS_VLAN_VALID
176#define TP_STATUS_VLAN_VALID (1 << 4)
177#endif
178
62e63e3f
EL
179enum {
180 AFP_READ_OK,
181 AFP_READ_FAILURE,
9efa4ace
EL
182 /** Error during treatment by other functions of Suricata */
183 AFP_SURI_FAILURE,
27b5136b 184 AFP_KERNEL_DROP,
62e63e3f
EL
185};
186
1992a227
EL
187enum {
188 AFP_FATAL_ERROR = 1,
189 AFP_RECOVERABLE_ERROR,
190};
191
49b7b00f
EL
192union thdr {
193 struct tpacket2_hdr *h2;
c2d0d938 194#ifdef HAVE_TPACKET_V3
bae1b03c 195 struct tpacket3_hdr *h3;
c2d0d938 196#endif
49b7b00f
EL
197 void *raw;
198};
199
06173267 200static int AFPBypassCallback(Packet *p);
8c880879 201static int AFPXDPBypassCallback(Packet *p);
06173267 202
91e1256b 203#define MAX_MAPS 32
c45d8985
EL
204/**
205 * \brief Structure to hold thread specific variables.
206 */
207typedef struct AFPThreadVars_
208{
69d0d484
VJ
209 union AFPRing {
210 char *v2;
211 struct iovec *v3;
212 } ring;
b797fd92 213
c45d8985 214 /* counters */
3ce39433 215 uint64_t pkts;
c45d8985 216
ff6365dd
EL
217 ThreadVars *tv;
218 TmSlot *slot;
9500d12c
EL
219 LiveDevice *livedev;
220 /* data link type for the thread */
b797fd92 221 uint32_t datalink;
9500d12c 222
d65f4585 223#ifdef HAVE_PACKET_EBPF
94a622cb 224 /* File descriptor of the IPv4 flow bypass table maps */
d65f4585 225 int v4_map_fd;
94a622cb 226 /* File descriptor of the IPv6 flow bypass table maps */
d65f4585
EL
227 int v6_map_fd;
228#endif
229
9500d12c 230 unsigned int frame_offset;
ff6365dd 231
9500d12c
EL
232 ChecksumValidationMode checksum_mode;
233
b797fd92 234 /* references to packet and drop counters */
9500d12c
EL
235 uint16_t capture_kernel_packets;
236 uint16_t capture_kernel_drops;
9efa4ace 237 uint16_t capture_errors;
9500d12c
EL
238
239 /* handle state */
240 uint8_t afp_state;
241 uint8_t copy_mode;
4bfa3aea 242 unsigned int flags;
9500d12c
EL
243
244 /* IPS peer */
245 AFPPeer *mpeer;
246
247 /* no mmap mode */
ff6365dd
EL
248 uint8_t *data; /** Per function and thread data */
249 int datalen; /** Length of per function and thread data */
9500d12c 250 int cooked;
ff6365dd 251
9500d12c
EL
252 /*
253 * Init related members
254 */
51eb9605 255
9500d12c
EL
256 /* thread specific socket */
257 int socket;
b797fd92
EL
258
259 int ring_size;
fa902abe 260 int block_size;
234aefdf 261 int block_timeout;
e80b30c0
EL
262 /* socket buffer size */
263 int buffer_size;
fa902abe 264 /* Filter */
ab1200fb 265 const char *bpf_filter;
91e1256b
EL
266 int ebpf_lb_fd;
267 int ebpf_filter_fd;
9500d12c 268
df7dbe36 269 int promisc;
e80b30c0 270
9500d12c 271 int down_count;
662dccd8 272
e80b30c0
EL
273 int cluster_id;
274 int cluster_type;
c45d8985 275
fbca1a4e
EL
276 int threads;
277
69d0d484
VJ
278 union AFPTpacketReq {
279 struct tpacket_req v2;
c2d0d938 280#ifdef HAVE_TPACKET_V3
69d0d484 281 struct tpacket_req3 v3;
c2d0d938 282#endif
69d0d484 283 } req;
b797fd92
EL
284
285 char iface[AFP_IFACE_NAME_LENGTH];
286 /* IPS output iface */
287 char out_iface[AFP_IFACE_NAME_LENGTH];
662dccd8 288
cba41207
AG
289 /* mmap'ed ring buffer */
290 unsigned int ring_buflen;
291 uint8_t *ring_buf;
91e1256b 292
8c880879
EL
293 uint8_t xdp_mode;
294
4cf53100 295 struct ebpf_timeout_config ebpf_t_config;
315c29a8 296
c45d8985
EL
297} AFPThreadVars;
298
299TmEcode ReceiveAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
ab1200fb 300TmEcode ReceiveAFPThreadInit(ThreadVars *, const void *, void **);
c45d8985
EL
301void ReceiveAFPThreadExitStats(ThreadVars *, void *);
302TmEcode ReceiveAFPThreadDeinit(ThreadVars *, void *);
e80b30c0 303TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot);
c45d8985 304
ab1200fb 305TmEcode DecodeAFPThreadInit(ThreadVars *, const void *, void **);
2864f9ee 306TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data);
c45d8985
EL
307TmEcode DecodeAFP(ThreadVars *, Packet *, void *, PacketQueue *, PacketQueue *);
308
f2a6fb8a 309TmEcode AFPSetBPFFilter(AFPThreadVars *ptv);
662dccd8 310static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose);
13f13b6d
EL
311static int AFPGetDevFlags(int fd, const char *ifname);
312static int AFPDerefSocket(AFPPeer* peer);
313static int AFPRefSocket(AFPPeer* peer);
f2a6fb8a 314
19475165 315
c45d8985
EL
316/**
317 * \brief Registration Function for RecieveAFP.
318 * \todo Unit tests are needed for this module.
319 */
8f1d7503
KS
320void TmModuleReceiveAFPRegister (void)
321{
c45d8985
EL
322 tmm_modules[TMM_RECEIVEAFP].name = "ReceiveAFP";
323 tmm_modules[TMM_RECEIVEAFP].ThreadInit = ReceiveAFPThreadInit;
ff6365dd 324 tmm_modules[TMM_RECEIVEAFP].Func = NULL;
e80b30c0 325 tmm_modules[TMM_RECEIVEAFP].PktAcqLoop = ReceiveAFPLoop;
57e0bd39 326 tmm_modules[TMM_RECEIVEAFP].PktAcqBreakLoop = NULL;
c45d8985 327 tmm_modules[TMM_RECEIVEAFP].ThreadExitPrintStats = ReceiveAFPThreadExitStats;
7127ae2b 328 tmm_modules[TMM_RECEIVEAFP].ThreadDeinit = ReceiveAFPThreadDeinit;
c45d8985
EL
329 tmm_modules[TMM_RECEIVEAFP].RegisterTests = NULL;
330 tmm_modules[TMM_RECEIVEAFP].cap_flags = SC_CAP_NET_RAW;
cd4705e6 331 tmm_modules[TMM_RECEIVEAFP].flags = TM_FLAG_RECEIVE_TM;
19475165 332
c45d8985
EL
333}
334
a6457262
EL
335
336/**
337 * \defgroup afppeers AFP peers list
338 *
339 * AF_PACKET has an IPS mode were interface are peered: packet from
340 * on interface are sent the peered interface and the other way. The ::AFPPeer
341 * list is maitaining the list of peers. Each ::AFPPeer is storing the needed
342 * information to be able to send packet on the interface.
343 * A element of the list must not be destroyed during the run of Suricata as it
344 * is used by ::Packet and other threads.
345 *
346 * @{
347 */
348
662dccd8
EL
349typedef struct AFPPeersList_ {
350 TAILQ_HEAD(, AFPPeer_) peers; /**< Head of list of fragments. */
351 int cnt;
352 int peered;
60400163
EL
353 int turn; /**< Next value for initialisation order */
354 SC_ATOMIC_DECLARE(int, reached); /**< Counter used to synchronize start */
662dccd8
EL
355} AFPPeersList;
356
357/**
a6457262
EL
358 * \brief Update the peer.
359 *
360 * Update the AFPPeer of a thread ie set new state, socket number
361 * or iface index.
362 *
662dccd8 363 */
ab1200fb 364static void AFPPeerUpdate(AFPThreadVars *ptv)
662dccd8
EL
365{
366 if (ptv->mpeer == NULL) {
367 return;
368 }
662dccd8
EL
369 (void)SC_ATOMIC_SET(ptv->mpeer->if_idx, AFPGetIfnumByDev(ptv->socket, ptv->iface, 0));
370 (void)SC_ATOMIC_SET(ptv->mpeer->socket, ptv->socket);
371 (void)SC_ATOMIC_SET(ptv->mpeer->state, ptv->afp_state);
372}
373
a6457262
EL
374/**
375 * \brief Clean and free ressource used by an ::AFPPeer
376 */
ab1200fb 377static void AFPPeerClean(AFPPeer *peer)
662dccd8
EL
378{
379 if (peer->flags & AFP_SOCK_PROTECT)
380 SCMutexDestroy(&peer->sock_protect);
381 SC_ATOMIC_DESTROY(peer->socket);
382 SC_ATOMIC_DESTROY(peer->if_idx);
383 SC_ATOMIC_DESTROY(peer->state);
384 SCFree(peer);
385}
386
387AFPPeersList peerslist;
388
389
a6457262
EL
390/**
391 * \brief Init the global list of ::AFPPeer
392 */
662dccd8
EL
393TmEcode AFPPeersListInit()
394{
395 SCEnter();
396 TAILQ_INIT(&peerslist.peers);
397 peerslist.peered = 0;
398 peerslist.cnt = 0;
60400163
EL
399 peerslist.turn = 0;
400 SC_ATOMIC_INIT(peerslist.reached);
401 (void) SC_ATOMIC_SET(peerslist.reached, 0);
662dccd8
EL
402 SCReturnInt(TM_ECODE_OK);
403}
404
a6457262
EL
405/**
406 * \brief Check that all ::AFPPeer got a peer
407 *
408 * \retval TM_ECODE_FAILED if some threads are not peered or TM_ECODE_OK else.
409 */
662dccd8
EL
410TmEcode AFPPeersListCheck()
411{
412#define AFP_PEERS_MAX_TRY 4
413#define AFP_PEERS_WAIT 20000
414 int try = 0;
415 SCEnter();
416 while (try < AFP_PEERS_MAX_TRY) {
417 if (peerslist.cnt != peerslist.peered) {
418 usleep(AFP_PEERS_WAIT);
419 } else {
420 SCReturnInt(TM_ECODE_OK);
421 }
422 try++;
423 }
424 SCLogError(SC_ERR_AFP_CREATE, "Threads number not equals");
425 SCReturnInt(TM_ECODE_FAILED);
426}
427
a6457262
EL
428/**
429 * \brief Declare a new AFP thread to AFP peers list.
430 */
ab1200fb 431static TmEcode AFPPeersListAdd(AFPThreadVars *ptv)
662dccd8
EL
432{
433 SCEnter();
434 AFPPeer *peer = SCMalloc(sizeof(AFPPeer));
435 AFPPeer *pitem;
ac56b1bf 436 int mtu, out_mtu;
662dccd8 437
e176be6f 438 if (unlikely(peer == NULL)) {
662dccd8
EL
439 SCReturnInt(TM_ECODE_FAILED);
440 }
441 memset(peer, 0, sizeof(AFPPeer));
442 SC_ATOMIC_INIT(peer->socket);
13f13b6d 443 SC_ATOMIC_INIT(peer->sock_usage);
662dccd8
EL
444 SC_ATOMIC_INIT(peer->if_idx);
445 SC_ATOMIC_INIT(peer->state);
446 peer->flags = ptv->flags;
60400163 447 peer->turn = peerslist.turn++;
662dccd8
EL
448
449 if (peer->flags & AFP_SOCK_PROTECT) {
450 SCMutexInit(&peer->sock_protect, NULL);
451 }
452
13f13b6d 453 (void)SC_ATOMIC_SET(peer->sock_usage, 0);
662dccd8
EL
454 (void)SC_ATOMIC_SET(peer->state, AFP_STATE_DOWN);
455 strlcpy(peer->iface, ptv->iface, AFP_IFACE_NAME_LENGTH);
456 ptv->mpeer = peer;
457 /* add element to iface list */
458 TAILQ_INSERT_TAIL(&peerslist.peers, peer, next);
662dccd8 459
13f13b6d
EL
460 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
461 peerslist.cnt++;
462
463 /* Iter to find a peer */
464 TAILQ_FOREACH(pitem, &peerslist.peers, next) {
465 if (pitem->peer)
466 continue;
467 if (strcmp(pitem->iface, ptv->out_iface))
468 continue;
469 peer->peer = pitem;
470 pitem->peer = peer;
471 mtu = GetIfaceMTU(ptv->iface);
472 out_mtu = GetIfaceMTU(ptv->out_iface);
473 if (mtu != out_mtu) {
474 SCLogError(SC_ERR_AFP_CREATE,
475 "MTU on %s (%d) and %s (%d) are not equal, "
476 "transmission of packets bigger than %d will fail.",
477 ptv->iface, mtu,
478 ptv->out_iface, out_mtu,
479 (out_mtu > mtu) ? mtu : out_mtu);
480 }
481 peerslist.peered += 2;
482 break;
ac56b1bf 483 }
662dccd8
EL
484 }
485
486 AFPPeerUpdate(ptv);
487
488 SCReturnInt(TM_ECODE_OK);
489}
490
ab1200fb 491static int AFPPeersListWaitTurn(AFPPeer *peer)
60400163 492{
b2691cbe
EL
493 /* If turn is zero, we already have started threads once */
494 if (peerslist.turn == 0)
495 return 0;
496
60400163
EL
497 if (peer->turn == SC_ATOMIC_GET(peerslist.reached))
498 return 0;
499 return 1;
500}
501
ab1200fb 502static void AFPPeersListReachedInc(void)
60400163 503{
b2691cbe
EL
504 if (peerslist.turn == 0)
505 return;
506
507 if (SC_ATOMIC_ADD(peerslist.reached, 1) == peerslist.turn) {
508 SCLogInfo("All AFP capture threads are running.");
509 (void)SC_ATOMIC_SET(peerslist.reached, 0);
510 /* Set turn to 0 to skip syncrhonization when ReceiveAFPLoop is
511 * restarted.
512 */
513 peerslist.turn = 0;
514 }
60400163
EL
515}
516
ab1200fb 517static int AFPPeersListStarted(void)
919377d4
EL
518{
519 return !peerslist.turn;
520}
521
a6457262
EL
522/**
523 * \brief Clean the global peers list.
524 */
662dccd8
EL
525void AFPPeersListClean()
526{
527 AFPPeer *pitem;
528
529 while ((pitem = TAILQ_FIRST(&peerslist.peers))) {
530 TAILQ_REMOVE(&peerslist.peers, pitem, next);
531 AFPPeerClean(pitem);
532 }
533}
534
a6457262
EL
535/**
536 * @}
537 */
538
c45d8985
EL
539/**
540 * \brief Registration Function for DecodeAFP.
541 * \todo Unit tests are needed for this module.
542 */
8f1d7503
KS
543void TmModuleDecodeAFPRegister (void)
544{
c45d8985
EL
545 tmm_modules[TMM_DECODEAFP].name = "DecodeAFP";
546 tmm_modules[TMM_DECODEAFP].ThreadInit = DecodeAFPThreadInit;
547 tmm_modules[TMM_DECODEAFP].Func = DecodeAFP;
548 tmm_modules[TMM_DECODEAFP].ThreadExitPrintStats = NULL;
2864f9ee 549 tmm_modules[TMM_DECODEAFP].ThreadDeinit = DecodeAFPThreadDeinit;
c45d8985
EL
550 tmm_modules[TMM_DECODEAFP].RegisterTests = NULL;
551 tmm_modules[TMM_DECODEAFP].cap_flags = 0;
bc6cf438 552 tmm_modules[TMM_DECODEAFP].flags = TM_FLAG_DECODE_TM;
c45d8985
EL
553}
554
662dccd8 555
e80b30c0
EL
556static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose);
557
e8a4a4c4 558static inline void AFPDumpCounters(AFPThreadVars *ptv)
6efd37a3 559{
6efd37a3 560#ifdef PACKET_STATISTICS
e8a4a4c4
EL
561 struct tpacket_stats kstats;
562 socklen_t len = sizeof (struct tpacket_stats);
563 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
564 &kstats, &len) > -1) {
565 SCLogDebug("(%s) Kernel: Packets %" PRIu32 ", dropped %" PRIu32 "",
566 ptv->tv->name,
567 kstats.tp_packets, kstats.tp_drops);
8992275b
VJ
568 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, kstats.tp_packets);
569 StatsAddUI64(ptv->tv, ptv->capture_kernel_drops, kstats.tp_drops);
3ce39433
EL
570 (void) SC_ATOMIC_ADD(ptv->livedev->drop, (uint64_t) kstats.tp_drops);
571 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, (uint64_t) kstats.tp_packets);
6efd37a3 572 }
e8a4a4c4 573#endif
6efd37a3 574}
c45d8985
EL
575
576/**
577 * \brief AF packet read function.
578 *
579 * This function fills
580 * From here the packets are picked up by the DecodeAFP thread.
581 *
582 * \param user pointer to AFPThreadVars
583 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
584 */
ab1200fb 585static int AFPRead(AFPThreadVars *ptv)
c45d8985
EL
586{
587 Packet *p = NULL;
588 /* XXX should try to use read that get directly to packet */
c45d8985
EL
589 int offset = 0;
590 int caplen;
591 struct sockaddr_ll from;
592 struct iovec iov;
593 struct msghdr msg;
c45d8985
EL
594 struct cmsghdr *cmsg;
595 union {
596 struct cmsghdr cmsg;
597 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
598 } cmsg_buf;
6efd37a3 599 unsigned char aux_checksum = 0;
c45d8985
EL
600
601 msg.msg_name = &from;
602 msg.msg_namelen = sizeof(from);
603 msg.msg_iov = &iov;
604 msg.msg_iovlen = 1;
c45d8985
EL
605 msg.msg_control = &cmsg_buf;
606 msg.msg_controllen = sizeof(cmsg_buf);
c45d8985
EL
607 msg.msg_flags = 0;
608
609 if (ptv->cooked)
610 offset = SLL_HEADER_LEN;
611 else
612 offset = 0;
e80b30c0
EL
613 iov.iov_len = ptv->datalen - offset;
614 iov.iov_base = ptv->data + offset;
c45d8985
EL
615
616 caplen = recvmsg(ptv->socket, &msg, MSG_TRUNC);
617
618 if (caplen < 0) {
619 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
620 errno);
62e63e3f 621 SCReturnInt(AFP_READ_FAILURE);
c45d8985 622 }
ff6365dd
EL
623
624 p = PacketGetFromQueueOrAlloc();
c45d8985 625 if (p == NULL) {
9efa4ace 626 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 627 }
b33986c8 628 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
629 if (ptv->flags & AFP_BYPASS) {
630 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585
EL
631#ifdef HAVE_PACKET_EBPF
632 p->afp_v.v4_map_fd = ptv->v4_map_fd;
633 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 634 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 635#endif
06173267 636 }
8c880879
EL
637 if (ptv->flags & AFP_XDPBYPASS) {
638 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585
EL
639#ifdef HAVE_PACKET_EBPF
640 p->afp_v.v4_map_fd = ptv->v4_map_fd;
641 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 642 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 643#endif
8c880879 644 }
c45d8985
EL
645
646 /* get timestamp of packet via ioctl */
647 if (ioctl(ptv->socket, SIOCGSTAMP, &p->ts) == -1) {
648 SCLogWarning(SC_ERR_AFP_READ, "recvmsg failed with error code %" PRId32,
649 errno);
650 TmqhOutputPacketpool(ptv->tv, p);
62e63e3f 651 SCReturnInt(AFP_READ_FAILURE);
c45d8985
EL
652 }
653
654 ptv->pkts++;
51eb9605 655 p->livedev = ptv->livedev;
c45d8985
EL
656
657 /* add forged header */
658 if (ptv->cooked) {
e80b30c0 659 SllHdr * hdrp = (SllHdr *)ptv->data;
c45d8985
EL
660 /* XXX this is minimalist, but this seems enough */
661 hdrp->sll_protocol = from.sll_protocol;
662 }
663
664 p->datalink = ptv->datalink;
665 SET_PKT_LEN(p, caplen + offset);
e80b30c0 666 if (PacketCopyData(p, ptv->data, GET_PKT_LEN(p)) == -1) {
c45d8985 667 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 668 SCReturnInt(AFP_SURI_FAILURE);
c45d8985 669 }
e80b30c0
EL
670 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
671 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
672
6062e00c
EL
673 /* We only check for checksum disable */
674 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
51eb9605
EL
675 p->flags |= PKT_IGNORE_CHECKSUM;
676 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
677 if (ptv->livedev->ignore_checksum) {
678 p->flags |= PKT_IGNORE_CHECKSUM;
a565148f 679 } else if (ChecksumAutoModeCheck(ptv->pkts,
51eb9605
EL
680 SC_ATOMIC_GET(ptv->livedev->pkts),
681 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
682 ptv->livedev->ignore_checksum = 1;
6062e00c 683 p->flags |= PKT_IGNORE_CHECKSUM;
51eb9605 684 }
6062e00c 685 } else {
6efd37a3
EL
686 aux_checksum = 1;
687 }
6062e00c 688
6efd37a3
EL
689 /* List is NULL if we don't have activated auxiliary data */
690 for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
691 struct tpacket_auxdata *aux;
f6ddaf33 692
6efd37a3
EL
693 if (cmsg->cmsg_len < CMSG_LEN(sizeof(struct tpacket_auxdata)) ||
694 cmsg->cmsg_level != SOL_PACKET ||
695 cmsg->cmsg_type != PACKET_AUXDATA)
696 continue;
f6ddaf33 697
6efd37a3
EL
698 aux = (struct tpacket_auxdata *)CMSG_DATA(cmsg);
699
700 if (aux_checksum && (aux->tp_status & TP_STATUS_CSUMNOTREADY)) {
701 p->flags |= PKT_IGNORE_CHECKSUM;
f6ddaf33 702 }
6efd37a3 703 break;
f6ddaf33
EL
704 }
705
c469824b
EL
706 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
707 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 708 SCReturnInt(AFP_SURI_FAILURE);
c469824b 709 }
62e63e3f 710 SCReturnInt(AFP_READ_OK);
c45d8985
EL
711}
712
ecf59be4
EL
713/**
714 * \brief AF packet write function.
715 *
716 * This function has to be called before the memory
717 * related to Packet in ring buffer is released.
718 *
719 * \param pointer to Packet
720 * \param version of capture: TPACKET_V2 or TPACKET_V3
721 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
722 *
723 */
724static TmEcode AFPWritePacket(Packet *p, int version)
662dccd8
EL
725{
726 struct sockaddr_ll socket_address;
727 int socket;
ecf59be4
EL
728 uint8_t *pstart;
729 size_t plen;
ee7e689b
AG
730 union thdr h;
731 uint16_t vlan_tci = 0;
662dccd8
EL
732
733 if (p->afp_v.copy_mode == AFP_COPY_MODE_IPS) {
3f107fa1 734 if (PACKET_TEST_ACTION(p, ACTION_DROP)) {
662dccd8
EL
735 return TM_ECODE_OK;
736 }
737 }
738
739 if (SC_ATOMIC_GET(p->afp_v.peer->state) == AFP_STATE_DOWN)
740 return TM_ECODE_OK;
741
742 if (p->ethh == NULL) {
743 SCLogWarning(SC_ERR_INVALID_VALUE, "Should have an Ethernet header");
744 return TM_ECODE_FAILED;
745 }
746 /* Index of the network device */
747 socket_address.sll_ifindex = SC_ATOMIC_GET(p->afp_v.peer->if_idx);
748 /* Address length*/
749 socket_address.sll_halen = ETH_ALEN;
750 /* Destination MAC */
751 memcpy(socket_address.sll_addr, p->ethh, 6);
752
753 /* Send packet, locking the socket if necessary */
754 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
755 SCMutexLock(&p->afp_v.peer->sock_protect);
756 socket = SC_ATOMIC_GET(p->afp_v.peer->socket);
ecf59be4 757
ee7e689b
AG
758 h.raw = p->afp_v.relptr;
759
ecf59be4 760 if (version == TPACKET_V2) {
ecf59be4
EL
761 /* Copy VLAN header from ring memory. For post june 2011 kernel we test
762 * the flag. It is not defined for older kernel so we go best effort
763 * and test for non zero value of the TCI header. */
764 if (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci) {
ee7e689b
AG
765 vlan_tci = h.h2->tp_vlan_tci;
766 }
767 } else {
768#ifdef HAVE_TPACKET_V3
769 if (h.h3->tp_status & TP_STATUS_VLAN_VALID || h.h3->hv1.tp_vlan_tci) {
770 vlan_tci = h.h3->hv1.tp_vlan_tci;
ecf59be4 771 }
ee7e689b
AG
772#else
773 /* Should not get here */
774 BUG_ON(1);
775#endif
776 }
777
778 if (vlan_tci != 0) {
779 pstart = GET_PKT_DATA(p) - VLAN_HEADER_LEN;
780 plen = GET_PKT_LEN(p) + VLAN_HEADER_LEN;
781 /* move ethernet addresses */
782 memmove(pstart, GET_PKT_DATA(p), 2 * ETH_ALEN);
783 /* write vlan info */
784 *(uint16_t *)(pstart + 2 * ETH_ALEN) = htons(0x8100);
785 *(uint16_t *)(pstart + 2 * ETH_ALEN + 2) = htons(vlan_tci);
ecf59be4
EL
786 } else {
787 pstart = GET_PKT_DATA(p);
788 plen = GET_PKT_LEN(p);
789 }
790
791 if (sendto(socket, pstart, plen, 0,
662dccd8
EL
792 (struct sockaddr*) &socket_address,
793 sizeof(struct sockaddr_ll)) < 0) {
794 SCLogWarning(SC_ERR_SOCKET, "Sending packet failed on socket %d: %s",
795 socket,
796 strerror(errno));
797 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
798 SCMutexUnlock(&p->afp_v.peer->sock_protect);
799 return TM_ECODE_FAILED;
800 }
801 if (p->afp_v.peer->flags & AFP_SOCK_PROTECT)
802 SCMutexUnlock(&p->afp_v.peer->sock_protect);
803
804 return TM_ECODE_OK;
805}
806
ab1200fb 807static void AFPReleaseDataFromRing(Packet *p)
2011a3f8 808{
662dccd8
EL
809 /* Need to be in copy mode and need to detect early release
810 where Ethernet header could not be set (and pseudo packet) */
811 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 812 AFPWritePacket(p, TPACKET_V2);
662dccd8 813 }
13f13b6d
EL
814
815 if (AFPDerefSocket(p->afp_v.mpeer) == 0)
680e941a 816 goto cleanup;
13f13b6d 817
2011a3f8
EL
818 if (p->afp_v.relptr) {
819 union thdr h;
820 h.raw = p->afp_v.relptr;
821 h.h2->tp_status = TP_STATUS_KERNEL;
2011a3f8 822 }
680e941a
EL
823
824cleanup:
825 AFPV_CLEANUP(&p->afp_v);
b076a26c
KS
826}
827
ecf59be4 828#ifdef HAVE_TPACKET_V3
ab1200fb 829static void AFPReleasePacketV3(Packet *p)
bae1b03c
EL
830{
831 /* Need to be in copy mode and need to detect early release
832 where Ethernet header could not be set (and pseudo packet) */
833 if ((p->afp_v.copy_mode != AFP_COPY_MODE_NONE) && !PKT_IS_PSEUDOPKT(p)) {
ecf59be4 834 AFPWritePacket(p, TPACKET_V3);
bae1b03c
EL
835 }
836 PacketFreeOrRelease(p);
837}
ecf59be4 838#endif
bae1b03c 839
ab1200fb 840static void AFPReleasePacket(Packet *p)
b076a26c
KS
841{
842 AFPReleaseDataFromRing(p);
843 PacketFreeOrRelease(p);
2011a3f8
EL
844}
845
49b7b00f
EL
846/**
847 * \brief AF packet read function for ring
848 *
849 * This function fills
850 * From here the packets are picked up by the DecodeAFP thread.
851 *
852 * \param user pointer to AFPThreadVars
853 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
854 */
ab1200fb 855static int AFPReadFromRing(AFPThreadVars *ptv)
49b7b00f
EL
856{
857 Packet *p = NULL;
858 union thdr h;
27b5136b 859 uint8_t emergency_flush = 0;
4d8f70c6 860 int read_pkts = 0;
b26ec603 861 int loop_start = -1;
4d8f70c6 862
49b7b00f 863
a369f8c3
EL
864 /* Loop till we have packets available */
865 while (1) {
53c02334
AS
866 if (unlikely(suricata_ctl_flags != 0)) {
867 break;
868 }
869
a369f8c3 870 /* Read packet from ring */
69d0d484 871 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
9efa4ace
EL
872 if (unlikely(h.raw == NULL)) {
873 /* Impossible we reach this point in normal condition, so trigger
874 * a failure in reading */
875 SCReturnInt(AFP_READ_FAILURE);
34b3f194 876 }
662dccd8 877
82a2dd85 878 if ((! h.h2->tp_status) || (h.h2->tp_status & TP_STATUS_USER_BUSY)) {
4d8f70c6 879 if (read_pkts == 0) {
b26ec603
EL
880 if (loop_start == -1) {
881 loop_start = ptv->frame_offset;
882 } else if (unlikely(loop_start == (int)ptv->frame_offset)) {
883 SCReturnInt(AFP_READ_OK);
884 }
69d0d484 885 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
b26ec603
EL
886 ptv->frame_offset = 0;
887 }
888 continue;
4d8f70c6 889 }
27b5136b
EL
890 if ((emergency_flush) && (ptv->flags & AFP_EMERGENCY_MODE)) {
891 SCReturnInt(AFP_KERNEL_DROP);
892 } else {
893 SCReturnInt(AFP_READ_OK);
894 }
895 }
4d8f70c6
EL
896
897 read_pkts++;
b26ec603 898 loop_start = -1;
4d8f70c6 899
4a1a0080
EL
900 /* Our packet is still used by suricata, we exit read loop to
901 * gain some time */
902 if (h.h2->tp_status & TP_STATUS_USER_BUSY) {
903 SCReturnInt(AFP_READ_OK);
904 }
905
27b5136b
EL
906 if ((ptv->flags & AFP_EMERGENCY_MODE) && (emergency_flush == 1)) {
907 h.h2->tp_status = TP_STATUS_KERNEL;
908 goto next_frame;
a369f8c3
EL
909 }
910
911 p = PacketGetFromQueueOrAlloc();
912 if (p == NULL) {
9efa4ace 913 SCReturnInt(AFP_SURI_FAILURE);
34b3f194 914 }
b33986c8 915 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
916 if (ptv->flags & AFP_BYPASS) {
917 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 918#ifdef HAVE_PACKET_EBPF
6062c27e
EL
919 p->afp_v.v4_map_fd = ptv->v4_map_fd;
920 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 921 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 922#endif
06173267 923 }
8c880879
EL
924 if (ptv->flags & AFP_XDPBYPASS) {
925 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 926#ifdef HAVE_PACKET_EBPF
6062c27e
EL
927 p->afp_v.v4_map_fd = ptv->v4_map_fd;
928 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 929 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 930#endif
8c880879 931 }
49b7b00f 932
4a1a0080
EL
933 /* Suricata will treat packet so telling it is busy, this
934 * status will be reset to 0 (ie TP_STATUS_KERNEL) in the release
935 * function. */
936 h.h2->tp_status |= TP_STATUS_USER_BUSY;
937
a369f8c3 938 ptv->pkts++;
a369f8c3 939 p->livedev = ptv->livedev;
a369f8c3 940 p->datalink = ptv->datalink;
d0940396 941
a369f8c3
EL
942 if (h.h2->tp_len > h.h2->tp_snaplen) {
943 SCLogDebug("Packet length (%d) > snaplen (%d), truncating",
944 h.h2->tp_len, h.h2->tp_snaplen);
945 }
71e47868
EL
946
947 /* get vlan id from header */
9500d12c 948 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
e871f713 949 (h.h2->tp_status & TP_STATUS_VLAN_VALID || h.h2->tp_vlan_tci)) {
01a8cc4e 950 p->vlan_id[0] = h.h2->tp_vlan_tci & 0x0fff;
71e47868
EL
951 p->vlan_idx = 1;
952 p->vlanh[0] = NULL;
953 }
954
a369f8c3
EL
955 if (ptv->flags & AFP_ZERO_COPY) {
956 if (PacketSetData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
957 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 958 SCReturnInt(AFP_SURI_FAILURE);
662dccd8 959 } else {
0f2b3406 960 p->afp_v.relptr = h.raw;
b076a26c 961 p->ReleasePacket = AFPReleasePacket;
5f12b234
EL
962 p->afp_v.mpeer = ptv->mpeer;
963 AFPRefSocket(ptv->mpeer);
0f2b3406
EL
964
965 p->afp_v.copy_mode = ptv->copy_mode;
966 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
967 p->afp_v.peer = ptv->mpeer->peer;
968 } else {
969 p->afp_v.peer = NULL;
662dccd8 970 }
a369f8c3
EL
971 }
972 } else {
973 if (PacketCopyData(p, (unsigned char*)h.raw + h.h2->tp_mac, h.h2->tp_snaplen) == -1) {
9efa4ace
EL
974 /* As we can possibly fail to copy the data due to invalid data, let's
975 * skip this packet and switch to the next one.
976 */
977 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 978 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
9efa4ace
EL
979 ptv->frame_offset = 0;
980 }
a369f8c3 981 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 982 SCReturnInt(AFP_SURI_FAILURE);
a369f8c3
EL
983 }
984 }
d65f4585 985
a369f8c3
EL
986 /* Timestamp */
987 p->ts.tv_sec = h.h2->tp_sec;
988 p->ts.tv_usec = h.h2->tp_nsec/1000;
989 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
990 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
991
992 /* We only check for checksum disable */
993 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
994 p->flags |= PKT_IGNORE_CHECKSUM;
995 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
996 if (ptv->livedev->ignore_checksum) {
997 p->flags |= PKT_IGNORE_CHECKSUM;
998 } else if (ChecksumAutoModeCheck(ptv->pkts,
999 SC_ATOMIC_GET(ptv->livedev->pkts),
1000 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1001 ptv->livedev->ignore_checksum = 1;
1002 p->flags |= PKT_IGNORE_CHECKSUM;
1003 }
1004 } else {
1005 if (h.h2->tp_status & TP_STATUS_CSUMNOTREADY) {
49b7b00f 1006 p->flags |= PKT_IGNORE_CHECKSUM;
a369f8c3 1007 }
ee6ba099
EL
1008 }
1009 if (h.h2->tp_status & TP_STATUS_LOSING) {
1010 emergency_flush = 1;
e8a4a4c4 1011 AFPDumpCounters(ptv);
a369f8c3
EL
1012 }
1013
5f12b234
EL
1014 /* release frame if not in zero copy mode */
1015 if (!(ptv->flags & AFP_ZERO_COPY)) {
1016 h.h2->tp_status = TP_STATUS_KERNEL;
1017 }
1018
a369f8c3
EL
1019 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
1020 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1021 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
a369f8c3
EL
1022 ptv->frame_offset = 0;
1023 }
1024 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1025 SCReturnInt(AFP_SURI_FAILURE);
49b7b00f 1026 }
49b7b00f 1027
27b5136b 1028next_frame:
69d0d484 1029 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
34b3f194 1030 ptv->frame_offset = 0;
350d7619
EL
1031 /* Get out of loop to be sure we will reach maintenance tasks */
1032 SCReturnInt(AFP_READ_OK);
34b3f194 1033 }
34b3f194
EL
1034 }
1035
49b7b00f
EL
1036 SCReturnInt(AFP_READ_OK);
1037}
1038
f947539d 1039#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1040static inline void AFPFlushBlock(struct tpacket_block_desc *pbd)
1041{
1042 pbd->hdr.bh1.block_status = TP_STATUS_KERNEL;
1043}
1044
1045static inline int AFPParsePacketV3(AFPThreadVars *ptv, struct tpacket_block_desc *pbd, struct tpacket3_hdr *ppd)
1046{
1047 Packet *p = PacketGetFromQueueOrAlloc();
1048 if (p == NULL) {
9efa4ace 1049 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1050 }
1051 PKT_SET_SRC(p, PKT_SRC_WIRE);
06173267
EL
1052 if (ptv->flags & AFP_BYPASS) {
1053 p->BypassPacketsFlow = AFPBypassCallback;
d65f4585 1054#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1055 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1056 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1057 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1058#endif
e98b5e49 1059 } else if (ptv->flags & AFP_XDPBYPASS) {
8c880879 1060 p->BypassPacketsFlow = AFPXDPBypassCallback;
d65f4585 1061#ifdef HAVE_PACKET_EBPF
6062c27e
EL
1062 p->afp_v.v4_map_fd = ptv->v4_map_fd;
1063 p->afp_v.v6_map_fd = ptv->v6_map_fd;
4cf53100 1064 p->afp_v.nr_cpus = ptv->ebpf_t_config.cpus_count;
d65f4585 1065#endif
8c880879 1066 }
bae1b03c
EL
1067
1068 ptv->pkts++;
bae1b03c
EL
1069 p->livedev = ptv->livedev;
1070 p->datalink = ptv->datalink;
1071
e41a9d63
AG
1072 if ((!(ptv->flags & AFP_VLAN_DISABLED)) &&
1073 (ppd->tp_status & TP_STATUS_VLAN_VALID || ppd->hv1.tp_vlan_tci)) {
1074 p->vlan_id[0] = ppd->hv1.tp_vlan_tci & 0x0fff;
1075 p->vlan_idx = 1;
1076 p->vlanh[0] = NULL;
1077 }
1078
bae1b03c
EL
1079 if (ptv->flags & AFP_ZERO_COPY) {
1080 if (PacketSetData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1081 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1082 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c 1083 }
310b27a1 1084 p->afp_v.relptr = ppd;
bae1b03c
EL
1085 p->ReleasePacket = AFPReleasePacketV3;
1086 p->afp_v.mpeer = ptv->mpeer;
1087 AFPRefSocket(ptv->mpeer);
1088
1089 p->afp_v.copy_mode = ptv->copy_mode;
1090 if (p->afp_v.copy_mode != AFP_COPY_MODE_NONE) {
1091 p->afp_v.peer = ptv->mpeer->peer;
1092 } else {
1093 p->afp_v.peer = NULL;
1094 }
1095 } else {
1096 if (PacketCopyData(p, (unsigned char*)ppd + ppd->tp_mac, ppd->tp_snaplen) == -1) {
1097 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1098 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1099 }
1100 }
1101 /* Timestamp */
1102 p->ts.tv_sec = ppd->tp_sec;
1103 p->ts.tv_usec = ppd->tp_nsec/1000;
1104 SCLogDebug("pktlen: %" PRIu32 " (pkt %p, pkt data %p)",
1105 GET_PKT_LEN(p), p, GET_PKT_DATA(p));
1106
1107 /* We only check for checksum disable */
1108 if (ptv->checksum_mode == CHECKSUM_VALIDATION_DISABLE) {
1109 p->flags |= PKT_IGNORE_CHECKSUM;
1110 } else if (ptv->checksum_mode == CHECKSUM_VALIDATION_AUTO) {
1111 if (ptv->livedev->ignore_checksum) {
1112 p->flags |= PKT_IGNORE_CHECKSUM;
1113 } else if (ChecksumAutoModeCheck(ptv->pkts,
1114 SC_ATOMIC_GET(ptv->livedev->pkts),
1115 SC_ATOMIC_GET(ptv->livedev->invalid_checksums))) {
1116 ptv->livedev->ignore_checksum = 1;
1117 p->flags |= PKT_IGNORE_CHECKSUM;
1118 }
1119 } else {
1120 if (ppd->tp_status & TP_STATUS_CSUMNOTREADY) {
1121 p->flags |= PKT_IGNORE_CHECKSUM;
1122 }
1123 }
1124
1125 if (TmThreadsSlotProcessPkt(ptv->tv, ptv->slot, p) != TM_ECODE_OK) {
bae1b03c 1126 TmqhOutputPacketpool(ptv->tv, p);
9efa4ace 1127 SCReturnInt(AFP_SURI_FAILURE);
bae1b03c
EL
1128 }
1129
1130 SCReturnInt(AFP_READ_OK);
1131}
1132
1133static inline int AFPWalkBlock(AFPThreadVars *ptv, struct tpacket_block_desc *pbd)
1134{
1135 int num_pkts = pbd->hdr.bh1.num_pkts, i;
1136 uint8_t *ppd;
9efa4ace 1137 int ret = 0;
bae1b03c
EL
1138
1139 ppd = (uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt;
1140 for (i = 0; i < num_pkts; ++i) {
9efa4ace
EL
1141 ret = AFPParsePacketV3(ptv, pbd,
1142 (struct tpacket3_hdr *)ppd);
1143 switch (ret) {
1144 case AFP_READ_OK:
1145 break;
1146 case AFP_SURI_FAILURE:
1147 /* Internal error but let's just continue and
1148 * treat thenext packet */
1149 break;
1150 case AFP_READ_FAILURE:
1151 SCReturnInt(AFP_READ_FAILURE);
1152 default:
1153 SCReturnInt(ret);
5f84b55d 1154 }
bae1b03c
EL
1155 ppd = ppd + ((struct tpacket3_hdr *)ppd)->tp_next_offset;
1156 }
1157
1158 SCReturnInt(AFP_READ_OK);
1159}
f947539d 1160#endif /* HAVE_TPACKET_V3 */
bae1b03c
EL
1161
1162/**
1163 * \brief AF packet read function for ring
1164 *
1165 * This function fills
1166 * From here the packets are picked up by the DecodeAFP thread.
1167 *
1168 * \param user pointer to AFPThreadVars
1169 * \retval TM_ECODE_FAILED on failure and TM_ECODE_OK on success
1170 */
ab1200fb 1171static int AFPReadFromRingV3(AFPThreadVars *ptv)
bae1b03c 1172{
c2d0d938 1173#ifdef HAVE_TPACKET_V3
bae1b03c 1174 struct tpacket_block_desc *pbd;
9efa4ace 1175 int ret = 0;
bae1b03c
EL
1176
1177 /* Loop till we have packets available */
1178 while (1) {
1179 if (unlikely(suricata_ctl_flags != 0)) {
1180 SCLogInfo("Exiting AFP V3 read loop");
1181 break;
1182 }
1183
69d0d484 1184 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
bae1b03c
EL
1185
1186 /* block is not ready to be read */
1187 if ((pbd->hdr.bh1.block_status & TP_STATUS_USER) == 0) {
1188 SCReturnInt(AFP_READ_OK);
1189 }
1190
9efa4ace
EL
1191 ret = AFPWalkBlock(ptv, pbd);
1192 if (unlikely(ret != AFP_READ_OK)) {
bae1b03c 1193 AFPFlushBlock(pbd);
9efa4ace 1194 SCReturnInt(ret);
bae1b03c
EL
1195 }
1196
1197 AFPFlushBlock(pbd);
69d0d484 1198 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
bae1b03c
EL
1199 /* return to maintenance task after one loop on the ring */
1200 if (ptv->frame_offset == 0) {
1201 SCReturnInt(AFP_READ_OK);
1202 }
1203 }
c2d0d938 1204#endif
bae1b03c
EL
1205 SCReturnInt(AFP_READ_OK);
1206}
1207
13f13b6d
EL
1208/**
1209 * \brief Reference socket
1210 *
1211 * \retval O in case of failure, 1 in case of success
1212 */
1213static int AFPRefSocket(AFPPeer* peer)
1214{
1215 if (unlikely(peer == NULL))
1216 return 0;
1217
1218 (void)SC_ATOMIC_ADD(peer->sock_usage, 1);
1219 return 1;
1220}
1221
1222
1223/**
1224 * \brief Dereference socket
1225 *
1226 * \retval 1 if socket is still alive, 0 if not
1227 */
1228static int AFPDerefSocket(AFPPeer* peer)
1229{
4424f5a2
EL
1230 if (peer == NULL)
1231 return 1;
1232
13f13b6d
EL
1233 if (SC_ATOMIC_SUB(peer->sock_usage, 1) == 0) {
1234 if (SC_ATOMIC_GET(peer->state) == AFP_STATE_DOWN) {
1235 SCLogInfo("Cleaning socket connected to '%s'", peer->iface);
1236 close(SC_ATOMIC_GET(peer->socket));
1237 return 0;
1238 }
1239 }
1240 return 1;
1241}
1242
ab1200fb 1243static void AFPSwitchState(AFPThreadVars *ptv, int state)
13f13b6d
EL
1244{
1245 ptv->afp_state = state;
1246 ptv->down_count = 0;
49b7b00f 1247
13f13b6d
EL
1248 AFPPeerUpdate(ptv);
1249
1250 /* Do cleaning if switching to down state */
1251 if (state == AFP_STATE_DOWN) {
5f84b55d
EL
1252#ifdef HAVE_TPACKET_V3
1253 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1254 if (!ptv->ring.v3) {
1255 SCFree(ptv->ring.v3);
1256 ptv->ring.v3 = NULL;
5f84b55d
EL
1257 }
1258 } else {
1259#endif
69d0d484 1260 if (ptv->ring.v2) {
5f84b55d 1261 /* only used in reading phase, we can free it */
69d0d484
VJ
1262 SCFree(ptv->ring.v2);
1263 ptv->ring.v2 = NULL;
5f84b55d
EL
1264 }
1265#ifdef HAVE_TPACKET_V3
13f13b6d 1266 }
5f84b55d 1267#endif
13f13b6d
EL
1268 if (ptv->socket != -1) {
1269 /* we need to wait for all packets to return data */
1270 if (SC_ATOMIC_SUB(ptv->mpeer->sock_usage, 1) == 0) {
b86e1762 1271 SCLogDebug("Cleaning socket connected to '%s'", ptv->iface);
cba41207 1272 munmap(ptv->ring_buf, ptv->ring_buflen);
13f13b6d
EL
1273 close(ptv->socket);
1274 ptv->socket = -1;
1275 }
1276 }
1277 }
1278 if (state == AFP_STATE_UP) {
1279 (void)SC_ATOMIC_SET(ptv->mpeer->sock_usage, 1);
1280 }
1281}
49b7b00f 1282
7fea0ec6
EL
1283static int AFPReadAndDiscard(AFPThreadVars *ptv, struct timeval *synctv,
1284 uint64_t *discarded_pkts)
919377d4
EL
1285{
1286 struct sockaddr_ll from;
1287 struct iovec iov;
1288 struct msghdr msg;
1289 struct timeval ts;
1290 union {
1291 struct cmsghdr cmsg;
1292 char buf[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
1293 } cmsg_buf;
1294
1295
1296 if (unlikely(suricata_ctl_flags != 0)) {
1297 return 1;
1298 }
1299
1300 msg.msg_name = &from;
1301 msg.msg_namelen = sizeof(from);
1302 msg.msg_iov = &iov;
1303 msg.msg_iovlen = 1;
1304 msg.msg_control = &cmsg_buf;
1305 msg.msg_controllen = sizeof(cmsg_buf);
1306 msg.msg_flags = 0;
1307
1308 iov.iov_len = ptv->datalen;
1309 iov.iov_base = ptv->data;
1310
339f0665 1311 (void)recvmsg(ptv->socket, &msg, MSG_TRUNC);
919377d4
EL
1312
1313 if (ioctl(ptv->socket, SIOCGSTAMP, &ts) == -1) {
1314 /* FIXME */
1315 return -1;
1316 }
1317
1318 if ((ts.tv_sec > synctv->tv_sec) ||
1319 (ts.tv_sec >= synctv->tv_sec &&
1320 ts.tv_usec > synctv->tv_usec)) {
1321 return 1;
1322 }
1323 return 0;
1324}
1325
7fea0ec6
EL
1326static int AFPReadAndDiscardFromRing(AFPThreadVars *ptv, struct timeval *synctv,
1327 uint64_t *discarded_pkts)
919377d4
EL
1328{
1329 union thdr h;
1330
1331 if (unlikely(suricata_ctl_flags != 0)) {
1332 return 1;
1333 }
1334
f947539d 1335#ifdef HAVE_TPACKET_V3
bae1b03c 1336 if (ptv->flags & AFP_TPACKET_V3) {
cebbe06f 1337 int ret = 0;
7fea0ec6 1338 struct tpacket_block_desc *pbd;
69d0d484 1339 pbd = (struct tpacket_block_desc *) ptv->ring.v3[ptv->frame_offset].iov_base;
7fea0ec6 1340 *discarded_pkts += pbd->hdr.bh1.num_pkts;
cebbe06f
VJ
1341 struct tpacket3_hdr *ppd =
1342 (struct tpacket3_hdr *)((uint8_t *)pbd + pbd->hdr.bh1.offset_to_first_pkt);
1343 if (((time_t)ppd->tp_sec > synctv->tv_sec) ||
1344 ((time_t)ppd->tp_sec == synctv->tv_sec &&
1345 (suseconds_t) (ppd->tp_nsec / 1000) > (suseconds_t)synctv->tv_usec)) {
1346 ret = 1;
1347 }
7fea0ec6 1348 AFPFlushBlock(pbd);
69d0d484 1349 ptv->frame_offset = (ptv->frame_offset + 1) % ptv->req.v3.tp_block_nr;
cebbe06f 1350 return ret;
f947539d
VJ
1351
1352 } else
1353#endif
1354 {
7fea0ec6 1355 /* Read packet from ring */
69d0d484 1356 h.raw = (((union thdr **)ptv->ring.v2)[ptv->frame_offset]);
7fea0ec6
EL
1357 if (h.raw == NULL) {
1358 return -1;
1359 }
1360 (*discarded_pkts)++;
1361 if (((time_t)h.h2->tp_sec > synctv->tv_sec) ||
1362 ((time_t)h.h2->tp_sec == synctv->tv_sec &&
1363 (suseconds_t) (h.h2->tp_nsec / 1000) > synctv->tv_usec)) {
1364 return 1;
1365 }
919377d4 1366
7fea0ec6 1367 h.h2->tp_status = TP_STATUS_KERNEL;
69d0d484 1368 if (++ptv->frame_offset >= ptv->req.v2.tp_frame_nr) {
7fea0ec6
EL
1369 ptv->frame_offset = 0;
1370 }
919377d4
EL
1371 }
1372
1373
1374 return 0;
1375}
1376
806844d8
VJ
1377/** \brief wait for all afpacket threads to fully init
1378 *
1379 * Discard packets before all threads are ready, as the cluster
1380 * setup is not complete yet.
1381 *
1382 * if AFPPeersListStarted() returns true init is complete
1383 *
1384 * \retval r 1 = happy, otherwise unhappy
1385 */
7fea0ec6 1386static int AFPSynchronizeStart(AFPThreadVars *ptv, uint64_t *discarded_pkts)
919377d4 1387{
919377d4 1388 struct timeval synctv;
806844d8
VJ
1389 struct pollfd fds;
1390
1391 fds.fd = ptv->socket;
1392 fds.events = POLLIN;
919377d4
EL
1393
1394 /* Set timeval to end of the world */
1395 synctv.tv_sec = 0xffffffff;
1396 synctv.tv_usec = 0xffffffff;
1397
1398 while (1) {
8709a20d 1399 int r = poll(&fds, 1, POLL_TIMEOUT);
806844d8
VJ
1400 if (r > 0 &&
1401 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1402 SCLogWarning(SC_ERR_AFP_READ, "poll failed %02x",
1403 fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL));
1404 return 0;
1405 } else if (r > 0) {
1406 if (AFPPeersListStarted() && synctv.tv_sec == (time_t) 0xffffffff) {
1407 gettimeofday(&synctv, NULL);
1408 }
1409 if (ptv->flags & AFP_RING_MODE) {
7fea0ec6 1410 r = AFPReadAndDiscardFromRing(ptv, &synctv, discarded_pkts);
806844d8 1411 } else {
7fea0ec6 1412 r = AFPReadAndDiscard(ptv, &synctv, discarded_pkts);
806844d8
VJ
1413 }
1414 SCLogDebug("Discarding on %s", ptv->tv->name);
1415 switch (r) {
1416 case 1:
9f7ba071 1417 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8
VJ
1418 return 1;
1419 case -1:
1420 return r;
1421 }
1422 /* no packets */
1423 } else if (r == 0 && AFPPeersListStarted()) {
86a3f064 1424 SCLogDebug("Starting to read on %s", ptv->tv->name);
806844d8 1425 return 1;
43b6cbd4 1426 } else if (r < 0) { /* only exit on error */
806844d8
VJ
1427 SCLogWarning(SC_ERR_AFP_READ, "poll failed with retval %d", r);
1428 return 0;
919377d4
EL
1429 }
1430 }
1431 return 1;
1432}
1433
13f13b6d
EL
1434/**
1435 * \brief Try to reopen socket
1436 *
1437 * \retval 0 in case of success, negative if error occurs or a condition
1438 * is not met.
1439 */
c45d8985
EL
1440static int AFPTryReopen(AFPThreadVars *ptv)
1441{
13f13b6d
EL
1442 ptv->down_count++;
1443
13f13b6d
EL
1444 /* Don't reconnect till we have packet that did not release data */
1445 if (SC_ATOMIC_GET(ptv->mpeer->sock_usage) != 0) {
1446 return -1;
1447 }
c45d8985 1448
8709a20d 1449 int afp_activate_r = AFPCreateSocket(ptv, ptv->iface, 0);
c45d8985 1450 if (afp_activate_r != 0) {
13f13b6d
EL
1451 if (ptv->down_count % AFP_DOWN_COUNTER_INTERVAL == 0) {
1452 SCLogWarning(SC_ERR_AFP_CREATE, "Can not open iface '%s'",
1453 ptv->iface);
1454 }
c45d8985
EL
1455 return afp_activate_r;
1456 }
1457
3bea3b39 1458 SCLogInfo("Interface '%s' is back", ptv->iface);
c45d8985
EL
1459 return 0;
1460}
1461
e80b30c0
EL
1462/**
1463 * \brief Main AF_PACKET reading Loop function
1464 */
1465TmEcode ReceiveAFPLoop(ThreadVars *tv, void *data, void *slot)
1466{
34581ce9
AS
1467 SCEnter();
1468
e80b30c0 1469 AFPThreadVars *ptv = (AFPThreadVars *)data;
e80b30c0
EL
1470 struct pollfd fds;
1471 int r;
34581ce9 1472 TmSlot *s = (TmSlot *)slot;
e8a4a4c4 1473 time_t last_dump = 0;
49612128 1474 time_t current_time;
5f400785 1475 int (*AFPReadFunc) (AFPThreadVars *);
7fea0ec6 1476 uint64_t discarded_pkts = 0;
e80b30c0 1477
34581ce9 1478 ptv->slot = s->slot_next;
e80b30c0 1479
5f400785 1480 if (ptv->flags & AFP_RING_MODE) {
bae1b03c
EL
1481 if (ptv->flags & AFP_TPACKET_V3) {
1482 AFPReadFunc = AFPReadFromRingV3;
1483 } else {
1484 AFPReadFunc = AFPReadFromRing;
1485 }
5f400785
EL
1486 } else {
1487 AFPReadFunc = AFPRead;
1488 }
1489
60400163
EL
1490 if (ptv->afp_state == AFP_STATE_DOWN) {
1491 /* Wait for our turn, threads before us must have opened the socket */
1492 while (AFPPeersListWaitTurn(ptv->mpeer)) {
1493 usleep(1000);
1992a227
EL
1494 if (suricata_ctl_flags != 0) {
1495 break;
1496 }
60400163
EL
1497 }
1498 r = AFPCreateSocket(ptv, ptv->iface, 1);
1499 if (r < 0) {
1992a227
EL
1500 switch (-r) {
1501 case AFP_FATAL_ERROR:
1502 SCLogError(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, fatal error");
1992a227
EL
1503 SCReturnInt(TM_ECODE_FAILED);
1504 case AFP_RECOVERABLE_ERROR:
1505 SCLogWarning(SC_ERR_AFP_CREATE, "Couldn't init AF_PACKET socket, retrying soon");
1506 }
60400163
EL
1507 }
1508 AFPPeersListReachedInc();
1509 }
1510 if (ptv->afp_state == AFP_STATE_UP) {
86a3f064 1511 SCLogDebug("Thread %s using socket %d", tv->name, ptv->socket);
c99dc5a7 1512 AFPSynchronizeStart(ptv, &discarded_pkts);
7fea0ec6
EL
1513 /* let's reset counter as we will start the capture at the
1514 * next function call */
1515#ifdef PACKET_STATISTICS
1516 struct tpacket_stats kstats;
1517 socklen_t len = sizeof (struct tpacket_stats);
1518 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_STATISTICS,
1519 &kstats, &len) > -1) {
1520 uint64_t pkts = 0;
1521 SCLogDebug("(%s) Kernel socket startup: Packets %" PRIu32
1522 ", dropped %" PRIu32 "",
1523 ptv->tv->name,
1524 kstats.tp_packets, kstats.tp_drops);
1525 pkts = kstats.tp_packets - discarded_pkts - kstats.tp_drops;
1526 StatsAddUI64(ptv->tv, ptv->capture_kernel_packets, pkts);
1527 (void) SC_ATOMIC_ADD(ptv->livedev->pkts, pkts);
1528 }
1529#endif
60400163
EL
1530 }
1531
e80b30c0
EL
1532 fds.fd = ptv->socket;
1533 fds.events = POLLIN;
1534
1535 while (1) {
1536 /* Start by checking the state of our interface */
1537 if (unlikely(ptv->afp_state == AFP_STATE_DOWN)) {
1538 int dbreak = 0;
662dccd8 1539
e80b30c0
EL
1540 do {
1541 usleep(AFP_RECONNECT_TIMEOUT);
1542 if (suricata_ctl_flags != 0) {
1543 dbreak = 1;
1544 break;
1545 }
1546 r = AFPTryReopen(ptv);
09e709d1 1547 fds.fd = ptv->socket;
e80b30c0
EL
1548 } while (r < 0);
1549 if (dbreak == 1)
1550 break;
1551 }
1552
1553 /* make sure we have at least one packet in the packet pool, to prevent
1554 * us from alloc'ing packets at line rate */
3c6e01f6 1555 PacketPoolWait();
e80b30c0
EL
1556
1557 r = poll(&fds, 1, POLL_TIMEOUT);
1558
1559 if (suricata_ctl_flags != 0) {
1560 break;
1561 }
1562
1563 if (r > 0 &&
1564 (fds.revents & (POLLHUP|POLLRDHUP|POLLERR|POLLNVAL))) {
1565 if (fds.revents & (POLLHUP | POLLRDHUP)) {
13f13b6d 1566 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1567 continue;
ff6365dd 1568 } else if (fds.revents & POLLERR) {
e80b30c0
EL
1569 char c;
1570 /* Do a recv to get errno */
1571 if (recv(ptv->socket, &c, sizeof c, MSG_PEEK) != -1)
1572 continue; /* what, no error? */
3bea3b39 1573 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1574 "Error reading data from iface '%s': (%d) %s",
3bea3b39 1575 ptv->iface, errno, strerror(errno));
13f13b6d 1576 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0 1577 continue;
ff6365dd 1578 } else if (fds.revents & POLLNVAL) {
e80b30c0 1579 SCLogError(SC_ERR_AFP_READ, "Invalid polling request");
13f13b6d 1580 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1581 continue;
1582 }
1583 } else if (r > 0) {
5f400785 1584 r = AFPReadFunc(ptv);
62e63e3f 1585 switch (r) {
27adbfa8
EL
1586 case AFP_READ_OK:
1587 /* Trigger one dump of stats every second */
49612128
EL
1588 current_time = time(NULL);
1589 if (current_time != last_dump) {
27adbfa8 1590 AFPDumpCounters(ptv);
49612128 1591 last_dump = current_time;
27adbfa8
EL
1592 }
1593 break;
62e63e3f
EL
1594 case AFP_READ_FAILURE:
1595 /* AFPRead in error: best to reset the socket */
3bea3b39 1596 SCLogError(SC_ERR_AFP_READ,
efbb5ce0 1597 "AFPRead error reading data from iface '%s': (%d) %s",
3bea3b39 1598 ptv->iface, errno, strerror(errno));
13f13b6d 1599 AFPSwitchState(ptv, AFP_STATE_DOWN);
62e63e3f 1600 continue;
9efa4ace
EL
1601 case AFP_SURI_FAILURE:
1602 StatsIncr(ptv->tv, ptv->capture_errors);
62e63e3f 1603 break;
27b5136b 1604 case AFP_KERNEL_DROP:
e8a4a4c4 1605 AFPDumpCounters(ptv);
27b5136b 1606 break;
e80b30c0 1607 }
11099cfa 1608 } else if (unlikely(r == 0)) {
f53e687b
EL
1609 /* Trigger one dump of stats every second */
1610 current_time = time(NULL);
1611 if (current_time != last_dump) {
1612 AFPDumpCounters(ptv);
1613 last_dump = current_time;
1614 }
ce71bf1f
VJ
1615 /* poll timed out, lets see handle our timeout path */
1616 TmThreadsCaptureHandleTimeout(tv, ptv->slot, NULL);
11099cfa 1617
e80b30c0 1618 } else if ((r < 0) && (errno != EINTR)) {
efbb5ce0 1619 SCLogError(SC_ERR_AFP_READ, "Error reading data from iface '%s': (%d) %s",
3bea3b39 1620 ptv->iface,
e80b30c0 1621 errno, strerror(errno));
13f13b6d 1622 AFPSwitchState(ptv, AFP_STATE_DOWN);
e80b30c0
EL
1623 continue;
1624 }
752f03e7 1625 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1626 }
1627
4e561d6b 1628 AFPDumpCounters(ptv);
752f03e7 1629 StatsSyncCountersIfSignalled(tv);
e80b30c0
EL
1630 SCReturnInt(TM_ECODE_OK);
1631}
1632
13f13b6d
EL
1633static int AFPGetDevFlags(int fd, const char *ifname)
1634{
1635 struct ifreq ifr;
1636
1637 memset(&ifr, 0, sizeof(ifr));
1638 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
1639
1640 if (ioctl(fd, SIOCGIFFLAGS, &ifr) == -1) {
1641 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1642 ifname, strerror(errno));
1643 return -1;
1644 }
1645
1646 return ifr.ifr_flags;
1647}
1648
1649
e80b30c0 1650static int AFPGetIfnumByDev(int fd, const char *ifname, int verbose)
c45d8985
EL
1651{
1652 struct ifreq ifr;
1653
1654 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1655 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1656
1657 if (ioctl(fd, SIOCGIFINDEX, &ifr) == -1) {
cba41207
AG
1658 if (verbose)
1659 SCLogError(SC_ERR_AFP_CREATE, "Unable to find iface %s: %s",
1660 ifname, strerror(errno));
c45d8985
EL
1661 return -1;
1662 }
1663
1664 return ifr.ifr_ifindex;
1665}
1666
e80b30c0 1667static int AFPGetDevLinktype(int fd, const char *ifname)
c45d8985
EL
1668{
1669 struct ifreq ifr;
1670
1671 memset(&ifr, 0, sizeof(ifr));
e80b30c0 1672 strlcpy(ifr.ifr_name, ifname, sizeof(ifr.ifr_name));
c45d8985
EL
1673
1674 if (ioctl(fd, SIOCGIFHWADDR, &ifr) == -1) {
1675 SCLogError(SC_ERR_AFP_CREATE, "Unable to find type for iface \"%s\": %s",
1676 ifname, strerror(errno));
1677 return -1;
1678 }
1679
e80b30c0
EL
1680 switch (ifr.ifr_hwaddr.sa_family) {
1681 case ARPHRD_LOOPBACK:
1682 return LINKTYPE_ETHERNET;
1683 case ARPHRD_PPP:
11eb1d7c 1684 case ARPHRD_NONE:
e80b30c0
EL
1685 return LINKTYPE_RAW;
1686 default:
1687 return ifr.ifr_hwaddr.sa_family;
1688 }
c45d8985
EL
1689}
1690
b7bf299e
EL
1691int AFPGetLinkType(const char *ifname)
1692{
1693 int ltype;
1694
1695 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
1696 if (fd == -1) {
1697 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
1698 return LINKTYPE_RAW;
1699 }
1700
1701 ltype = AFPGetDevLinktype(fd, ifname);
1702 close(fd);
1703
1704 return ltype;
1705}
1706
49b7b00f
EL
1707static int AFPComputeRingParams(AFPThreadVars *ptv, int order)
1708{
1709 /* Compute structure:
1710 Target is to store all pending packets
1711 with a size equal to MTU + auxdata
1712 And we keep a decent number of block
1713
1714 To do so:
1715 Compute frame_size (aligned to be able to fit in block
1716 Check which block size we need. Blocksize is a 2^n * pagesize
1717 We then need to get order, big enough to have
1718 frame_size < block size
1719 Find number of frame per block (divide)
1720 Fill in packet_req
1721
1722 Compute frame size:
1723 described in packet_mmap.txt
1724 dependant on snaplen (need to use a variable ?)
1725snaplen: MTU ?
1726tp_hdrlen determine_version in daq_afpacket
1727in V1: sizeof(struct tpacket_hdr);
1728in V2: val in getsockopt(instance->fd, SOL_PACKET, PACKET_HDRLEN, &val, &len)
1729frame size: TPACKET_ALIGN(snaplen + TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1730
1731 */
1732 int tp_hdrlen = sizeof(struct tpacket_hdr);
1733 int snaplen = default_packet_size;
1734
03032457
EL
1735 if (snaplen == 0) {
1736 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1737 if (snaplen <= 0) {
1738 SCLogWarning(SC_ERR_INVALID_VALUE,
1739 "Unable to get MTU, setting snaplen to sane default of 1514");
1740 snaplen = 1514;
1741 }
1742 }
1743
69d0d484
VJ
1744 ptv->req.v2.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1745 ptv->req.v2.tp_block_size = getpagesize() << order;
1746 int frames_per_block = ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size;
49b7b00f 1747 if (frames_per_block == 0) {
bae1b03c 1748 SCLogError(SC_ERR_INVALID_VALUE, "Frame size bigger than block size");
49b7b00f
EL
1749 return -1;
1750 }
69d0d484
VJ
1751 ptv->req.v2.tp_frame_nr = ptv->ring_size;
1752 ptv->req.v2.tp_block_nr = ptv->req.v2.tp_frame_nr / frames_per_block + 1;
49b7b00f 1753 /* exact division */
69d0d484 1754 ptv->req.v2.tp_frame_nr = ptv->req.v2.tp_block_nr * frames_per_block;
b3bf7a57 1755 SCLogPerf("AF_PACKET RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d",
69d0d484
VJ
1756 ptv->req.v2.tp_block_size, ptv->req.v2.tp_block_nr,
1757 ptv->req.v2.tp_frame_size, ptv->req.v2.tp_frame_nr);
49b7b00f
EL
1758 return 1;
1759}
1760
c2d0d938 1761#ifdef HAVE_TPACKET_V3
bae1b03c
EL
1762static int AFPComputeRingParamsV3(AFPThreadVars *ptv)
1763{
69d0d484
VJ
1764 ptv->req.v3.tp_block_size = ptv->block_size;
1765 ptv->req.v3.tp_frame_size = 2048;
bae1b03c
EL
1766 int frames_per_block = 0;
1767 int tp_hdrlen = sizeof(struct tpacket3_hdr);
1768 int snaplen = default_packet_size;
1769
1770 if (snaplen == 0) {
1771 snaplen = GetIfaceMaxPacketSize(ptv->iface);
1772 if (snaplen <= 0) {
1773 SCLogWarning(SC_ERR_INVALID_VALUE,
1774 "Unable to get MTU, setting snaplen to sane default of 1514");
1775 snaplen = 1514;
1776 }
1777 }
1778
69d0d484
VJ
1779 ptv->req.v3.tp_frame_size = TPACKET_ALIGN(snaplen +TPACKET_ALIGN(TPACKET_ALIGN(tp_hdrlen) + sizeof(struct sockaddr_ll) + ETH_HLEN) - ETH_HLEN);
1780 frames_per_block = ptv->req.v3.tp_block_size / ptv->req.v3.tp_frame_size;
bae1b03c
EL
1781
1782 if (frames_per_block == 0) {
1783 SCLogError(SC_ERR_INVALID_VALUE,
1784 "Block size is too small, it should be at least %d",
69d0d484 1785 ptv->req.v3.tp_frame_size);
bae1b03c
EL
1786 return -1;
1787 }
69d0d484 1788 ptv->req.v3.tp_block_nr = ptv->ring_size / frames_per_block + 1;
bae1b03c 1789 /* exact division */
69d0d484
VJ
1790 ptv->req.v3.tp_frame_nr = ptv->req.v3.tp_block_nr * frames_per_block;
1791 ptv->req.v3.tp_retire_blk_tov = ptv->block_timeout;
1792 ptv->req.v3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
b3bf7a57 1793 SCLogPerf("AF_PACKET V3 RX Ring params: block_size=%d block_nr=%d frame_size=%d frame_nr=%d (mem: %d)",
69d0d484
VJ
1794 ptv->req.v3.tp_block_size, ptv->req.v3.tp_block_nr,
1795 ptv->req.v3.tp_frame_size, ptv->req.v3.tp_frame_nr,
1796 ptv->req.v3.tp_block_size * ptv->req.v3.tp_block_nr
bae1b03c
EL
1797 );
1798 return 1;
1799}
c2d0d938 1800#endif
bae1b03c 1801
c7bde9df
EL
1802static int AFPSetupRing(AFPThreadVars *ptv, char *devname)
1803{
1804 int val;
1805 unsigned int len = sizeof(val), i;
c7bde9df 1806 int order;
f5c20191 1807 int r, mmap_flag;
c7bde9df 1808
c2d0d938 1809#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1810 if (ptv->flags & AFP_TPACKET_V3) {
1811 val = TPACKET_V3;
f947539d 1812 } else
c2d0d938 1813#endif
f947539d 1814 {
c7bde9df
EL
1815 val = TPACKET_V2;
1816 }
1817 if (getsockopt(ptv->socket, SOL_PACKET, PACKET_HDRLEN, &val, &len) < 0) {
1818 if (errno == ENOPROTOOPT) {
1819 if (ptv->flags & AFP_TPACKET_V3) {
1820 SCLogError(SC_ERR_AFP_CREATE,
1821 "Too old kernel giving up (need 3.2 for TPACKET_V3)");
1822 } else {
1823 SCLogError(SC_ERR_AFP_CREATE,
1824 "Too old kernel giving up (need 2.6.27 at least)");
1825 }
1826 }
1827 SCLogError(SC_ERR_AFP_CREATE, "Error when retrieving packet header len");
1828 return AFP_FATAL_ERROR;
1829 }
1830
f947539d
VJ
1831 val = TPACKET_V2;
1832#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1833 if (ptv->flags & AFP_TPACKET_V3) {
1834 val = TPACKET_V3;
c7bde9df 1835 }
f947539d 1836#endif
c7bde9df
EL
1837 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_VERSION, &val,
1838 sizeof(val)) < 0) {
1839 SCLogError(SC_ERR_AFP_CREATE,
1840 "Can't activate TPACKET_V2/TPACKET_V3 on packet socket: %s",
1841 strerror(errno));
1842 return AFP_FATAL_ERROR;
1843 }
1844
a40f08a2
EL
1845#ifdef HAVE_HW_TIMESTAMPING
1846 int req = SOF_TIMESTAMPING_RAW_HARDWARE;
1847 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req,
1848 sizeof(req)) < 0) {
1849 SCLogWarning(SC_ERR_AFP_CREATE,
1850 "Can't activate hardware timestamping on packet socket: %s",
1851 strerror(errno));
1852 }
1853#endif
1854
ecf59be4
EL
1855 /* Let's reserve head room so we can add the VLAN header in IPS
1856 * or TAP mode before write the packet */
1857 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
1858 /* Only one vlan is extracted from AFP header so
1859 * one VLAN header length is enough. */
1860 int reserve = VLAN_HEADER_LEN;
1861 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_RESERVE, (void *) &reserve,
1862 sizeof(reserve)) < 0) {
1863 SCLogError(SC_ERR_AFP_CREATE,
1864 "Can't activate reserve on packet socket: %s",
1865 strerror(errno));
1866 return AFP_FATAL_ERROR;
1867 }
1868 }
1869
c7bde9df 1870 /* Allocate RX ring */
c2d0d938 1871#ifdef HAVE_TPACKET_V3
c7bde9df
EL
1872 if (ptv->flags & AFP_TPACKET_V3) {
1873 if (AFPComputeRingParamsV3(ptv) != 1) {
1874 return AFP_FATAL_ERROR;
1875 }
1876 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
69d0d484 1877 (void *) &ptv->req.v3, sizeof(ptv->req.v3));
c7bde9df
EL
1878 if (r < 0) {
1879 SCLogError(SC_ERR_MEM_ALLOC,
1880 "Unable to allocate RX Ring for iface %s: (%d) %s",
1881 devname,
1882 errno,
1883 strerror(errno));
1884 return AFP_FATAL_ERROR;
1885 }
1886 } else {
c2d0d938 1887#endif
fa902abe 1888 for (order = AFP_BLOCK_SIZE_DEFAULT_ORDER; order >= 0; order--) {
c7bde9df
EL
1889 if (AFPComputeRingParams(ptv, order) != 1) {
1890 SCLogInfo("Ring parameter are incorrect. Please correct the devel");
1891 return AFP_FATAL_ERROR;
1892 }
1893
1894 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_RX_RING,
1895 (void *) &ptv->req, sizeof(ptv->req));
1896
1897 if (r < 0) {
1898 if (errno == ENOMEM) {
1899 SCLogInfo("Memory issue with ring parameters. Retrying.");
1900 continue;
1901 }
1902 SCLogError(SC_ERR_MEM_ALLOC,
1903 "Unable to allocate RX Ring for iface %s: (%d) %s",
1904 devname,
1905 errno,
1906 strerror(errno));
1907 return AFP_FATAL_ERROR;
1908 } else {
1909 break;
1910 }
1911 }
1912 if (order < 0) {
1913 SCLogError(SC_ERR_MEM_ALLOC,
1914 "Unable to allocate RX Ring for iface %s (order 0 failed)",
1915 devname);
1916 return AFP_FATAL_ERROR;
1917 }
c2d0d938 1918#ifdef HAVE_TPACKET_V3
c7bde9df 1919 }
c2d0d938 1920#endif
c7bde9df
EL
1921
1922 /* Allocate the Ring */
c2d0d938 1923#ifdef HAVE_TPACKET_V3
c7bde9df 1924 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484 1925 ptv->ring_buflen = ptv->req.v3.tp_block_nr * ptv->req.v3.tp_block_size;
c7bde9df 1926 } else {
c2d0d938 1927#endif
69d0d484 1928 ptv->ring_buflen = ptv->req.v2.tp_block_nr * ptv->req.v2.tp_block_size;
c2d0d938 1929#ifdef HAVE_TPACKET_V3
c7bde9df 1930 }
c2d0d938 1931#endif
f5c20191
EL
1932 mmap_flag = MAP_SHARED;
1933 if (ptv->flags & AFP_MMAP_LOCKED)
1934 mmap_flag |= MAP_LOCKED;
cba41207 1935 ptv->ring_buf = mmap(0, ptv->ring_buflen, PROT_READ|PROT_WRITE,
f5c20191 1936 mmap_flag, ptv->socket, 0);
cba41207 1937 if (ptv->ring_buf == MAP_FAILED) {
88f5d7d1
EL
1938 SCLogError(SC_ERR_MEM_ALLOC, "Unable to mmap, error %s",
1939 strerror(errno));
c7bde9df
EL
1940 goto mmap_err;
1941 }
c2d0d938 1942#ifdef HAVE_TPACKET_V3
c7bde9df 1943 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
1944 ptv->ring.v3 = SCMalloc(ptv->req.v3.tp_block_nr * sizeof(*ptv->ring.v3));
1945 if (!ptv->ring.v3) {
1946 SCLogError(SC_ERR_MEM_ALLOC, "Unable to malloc ptv ring.v3");
291af719 1947 goto postmmap_err;
c7bde9df 1948 }
69d0d484
VJ
1949 for (i = 0; i < ptv->req.v3.tp_block_nr; ++i) {
1950 ptv->ring.v3[i].iov_base = ptv->ring_buf + (i * ptv->req.v3.tp_block_size);
1951 ptv->ring.v3[i].iov_len = ptv->req.v3.tp_block_size;
c7bde9df
EL
1952 }
1953 } else {
c2d0d938 1954#endif
c7bde9df 1955 /* allocate a ring for each frame header pointer*/
69d0d484
VJ
1956 ptv->ring.v2 = SCMalloc(ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
1957 if (ptv->ring.v2 == NULL) {
c7bde9df 1958 SCLogError(SC_ERR_MEM_ALLOC, "Unable to allocate frame buf");
291af719 1959 goto postmmap_err;
c7bde9df 1960 }
69d0d484 1961 memset(ptv->ring.v2, 0, ptv->req.v2.tp_frame_nr * sizeof (union thdr *));
c7bde9df
EL
1962 /* fill the header ring with proper frame ptr*/
1963 ptv->frame_offset = 0;
69d0d484
VJ
1964 for (i = 0; i < ptv->req.v2.tp_block_nr; ++i) {
1965 void *base = &(ptv->ring_buf[i * ptv->req.v2.tp_block_size]);
c7bde9df 1966 unsigned int j;
69d0d484
VJ
1967 for (j = 0; j < ptv->req.v2.tp_block_size / ptv->req.v2.tp_frame_size; ++j, ++ptv->frame_offset) {
1968 (((union thdr **)ptv->ring.v2)[ptv->frame_offset]) = base;
1969 base += ptv->req.v2.tp_frame_size;
c7bde9df
EL
1970 }
1971 }
1972 ptv->frame_offset = 0;
c2d0d938 1973#ifdef HAVE_TPACKET_V3
c7bde9df 1974 }
c2d0d938 1975#endif
c7bde9df
EL
1976
1977 return 0;
1978
291af719 1979postmmap_err:
cba41207 1980 munmap(ptv->ring_buf, ptv->ring_buflen);
69d0d484
VJ
1981 if (ptv->ring.v2)
1982 SCFree(ptv->ring.v2);
1983 if (ptv->ring.v3)
1984 SCFree(ptv->ring.v3);
c7bde9df
EL
1985mmap_err:
1986 /* Packet mmap does the cleaning when socket is closed */
1987 return AFP_FATAL_ERROR;
1988}
1989
402bdf9b
VJ
1990/** \brief test if we can use FANOUT. Older kernels like those in
1991 * CentOS6 have HAVE_PACKET_FANOUT defined but fail to work
1992 */
1993int AFPIsFanoutSupported(void)
1994{
1995#ifdef HAVE_PACKET_FANOUT
1996 int fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
6227d095
VJ
1997 if (fd < 0)
1998 return 0;
402bdf9b 1999
6227d095
VJ
2000 uint16_t mode = PACKET_FANOUT_HASH | PACKET_FANOUT_FLAG_DEFRAG;
2001 uint16_t id = 1;
2002 uint32_t option = (mode << 16) | (id & 0xffff);
2003 int r = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2004 close(fd);
2005
2006 if (r < 0) {
2007 SCLogPerf("fanout not supported by kernel: %s", strerror(errno));
2008 return 0;
402bdf9b 2009 }
6227d095
VJ
2010 return 1;
2011#else
402bdf9b 2012 return 0;
6227d095 2013#endif
402bdf9b
VJ
2014}
2015
91e1256b
EL
2016#ifdef HAVE_PACKET_EBPF
2017
2018static int SockFanoutSeteBPF(AFPThreadVars *ptv)
2019{
2020 int pfd = ptv->ebpf_lb_fd;
2021 if (pfd == -1) {
2022 SCLogError(SC_ERR_INVALID_VALUE,
2023 "Fanout file descriptor is invalid");
2024 return -1;
2025 }
2026
2027 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT_DATA, &pfd, sizeof(pfd))) {
2028 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf");
2029 return -1;
2030 }
2031 SCLogInfo("Activated eBPF on socket");
2032
2033 return 0;
2034}
2035
2036static int SetEbpfFilter(AFPThreadVars *ptv)
2037{
2038 int pfd = ptv->ebpf_filter_fd;
2039 if (pfd == -1) {
2040 SCLogError(SC_ERR_INVALID_VALUE,
2041 "Filter file descriptor is invalid");
2042 return -1;
2043 }
2044
2045 if (setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_BPF, &pfd, sizeof(pfd))) {
2046 SCLogError(SC_ERR_INVALID_VALUE, "Error setting ebpf: %s", strerror(errno));
2047 return -1;
2048 }
2049 SCLogInfo("Activated eBPF filter on socket");
2050
2051 return 0;
2052}
2053#endif
2054
e80b30c0 2055static int AFPCreateSocket(AFPThreadVars *ptv, char *devname, int verbose)
c45d8985
EL
2056{
2057 int r;
1992a227 2058 int ret = AFP_FATAL_ERROR;
c45d8985
EL
2059 struct packet_mreq sock_params;
2060 struct sockaddr_ll bind_address;
662dccd8 2061 int if_idx;
49b7b00f 2062
c45d8985
EL
2063 /* open socket */
2064 ptv->socket = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
2065 if (ptv->socket == -1) {
e80b30c0 2066 SCLogError(SC_ERR_AFP_CREATE, "Couldn't create a AF_PACKET socket, error %s", strerror(errno));
13f13b6d 2067 goto error;
c45d8985 2068 }
cba41207 2069
662dccd8 2070 if_idx = AFPGetIfnumByDev(ptv->socket, devname, verbose);
cba41207
AG
2071
2072 if (if_idx == -1) {
fcd5e138 2073 goto socket_err;
cba41207
AG
2074 }
2075
c45d8985
EL
2076 /* bind socket */
2077 memset(&bind_address, 0, sizeof(bind_address));
2078 bind_address.sll_family = AF_PACKET;
2079 bind_address.sll_protocol = htons(ETH_P_ALL);
662dccd8 2080 bind_address.sll_ifindex = if_idx;
c45d8985
EL
2081 if (bind_address.sll_ifindex == -1) {
2082 if (verbose)
e80b30c0 2083 SCLogError(SC_ERR_AFP_CREATE, "Couldn't find iface %s", devname);
1992a227 2084 ret = AFP_RECOVERABLE_ERROR;
13f13b6d
EL
2085 goto socket_err;
2086 }
2087
cba41207
AG
2088 int if_flags = AFPGetDevFlags(ptv->socket, ptv->iface);
2089 if (if_flags == -1) {
2090 if (verbose) {
2091 SCLogError(SC_ERR_AFP_READ,
2092 "Couldn't get flags for interface '%s'",
2093 ptv->iface);
2094 }
2095 ret = AFP_RECOVERABLE_ERROR;
2096 goto socket_err;
2097 } else if ((if_flags & (IFF_UP | IFF_RUNNING)) == 0) {
2098 if (verbose) {
2099 SCLogError(SC_ERR_AFP_READ,
2100 "Interface '%s' is down",
2101 ptv->iface);
2102 }
2103 ret = AFP_RECOVERABLE_ERROR;
2104 goto socket_err;
2105 }
2106
13f13b6d
EL
2107 if (ptv->promisc != 0) {
2108 /* Force promiscuous mode */
2109 memset(&sock_params, 0, sizeof(sock_params));
2110 sock_params.mr_type = PACKET_MR_PROMISC;
2111 sock_params.mr_ifindex = bind_address.sll_ifindex;
2112 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_ADD_MEMBERSHIP,(void *)&sock_params, sizeof(sock_params));
2113 if (r < 0) {
2114 SCLogError(SC_ERR_AFP_CREATE,
2115 "Couldn't switch iface %s to promiscuous, error %s",
2116 devname, strerror(errno));
c7bde9df 2117 goto socket_err;
13f13b6d
EL
2118 }
2119 }
2120
2121 if (ptv->checksum_mode == CHECKSUM_VALIDATION_KERNEL) {
2122 int val = 1;
2123 if (setsockopt(ptv->socket, SOL_PACKET, PACKET_AUXDATA, &val,
2124 sizeof(val)) == -1 && errno != ENOPROTOOPT) {
2125 SCLogWarning(SC_ERR_NO_AF_PACKET,
4111331a 2126 "'kernel' checksum mode not supported, falling back to full mode.");
13f13b6d
EL
2127 ptv->checksum_mode = CHECKSUM_VALIDATION_ENABLE;
2128 }
2129 }
2130
2131 /* set socket recv buffer size */
2132 if (ptv->buffer_size != 0) {
2133 /*
2134 * Set the socket buffer size to the specified value.
2135 */
b3bf7a57 2136 SCLogPerf("Setting AF_PACKET socket buffer to %d", ptv->buffer_size);
13f13b6d
EL
2137 if (setsockopt(ptv->socket, SOL_SOCKET, SO_RCVBUF,
2138 &ptv->buffer_size,
2139 sizeof(ptv->buffer_size)) == -1) {
2140 SCLogError(SC_ERR_AFP_CREATE,
2141 "Couldn't set buffer size to %d on iface %s, error %s",
2142 ptv->buffer_size, devname, strerror(errno));
c7bde9df 2143 goto socket_err;
13f13b6d
EL
2144 }
2145 }
2146
2147 r = bind(ptv->socket, (struct sockaddr *)&bind_address, sizeof(bind_address));
2148 if (r < 0) {
2149 if (verbose) {
2150 if (errno == ENETDOWN) {
2151 SCLogError(SC_ERR_AFP_CREATE,
2152 "Couldn't bind AF_PACKET socket, iface %s is down",
2153 devname);
2154 } else {
2155 SCLogError(SC_ERR_AFP_CREATE,
2156 "Couldn't bind AF_PACKET socket to iface %s, error %s",
2157 devname, strerror(errno));
2158 }
2159 }
1992a227 2160 ret = AFP_RECOVERABLE_ERROR;
c7bde9df 2161 goto socket_err;
13f13b6d
EL
2162 }
2163
91e1256b 2164
238ff231
EL
2165#ifdef HAVE_PACKET_FANOUT
2166 /* add binded socket to fanout group */
2167 if (ptv->threads > 1) {
238ff231
EL
2168 uint16_t mode = ptv->cluster_type;
2169 uint16_t id = ptv->cluster_id;
4111331a 2170 uint32_t option = (mode << 16) | (id & 0xffff);
238ff231
EL
2171 r = setsockopt(ptv->socket, SOL_PACKET, PACKET_FANOUT,(void *)&option, sizeof(option));
2172 if (r < 0) {
2173 SCLogError(SC_ERR_AFP_CREATE,
4111331a 2174 "Couldn't set fanout mode, error %s",
238ff231 2175 strerror(errno));
c7bde9df 2176 goto socket_err;
238ff231
EL
2177 }
2178 }
2179#endif
2180
91e1256b
EL
2181#ifdef HAVE_PACKET_EBPF
2182 if (ptv->cluster_type == PACKET_FANOUT_EBPF) {
2183 r = SockFanoutSeteBPF(ptv);
2184 if (r < 0) {
2185 SCLogError(SC_ERR_AFP_CREATE,
2186 "Coudn't set EBPF, error %s",
2187 strerror(errno));
2188 goto socket_err;
2189 }
2190 }
2191#endif
2192
49b7b00f 2193 if (ptv->flags & AFP_RING_MODE) {
c7bde9df
EL
2194 ret = AFPSetupRing(ptv, devname);
2195 if (ret != 0)
13f13b6d 2196 goto socket_err;
49b7b00f
EL
2197 }
2198
86a3f064 2199 SCLogDebug("Using interface '%s' via socket %d", (char *)devname, ptv->socket);
c45d8985 2200
c85ee1e3
EL
2201 ptv->datalink = AFPGetDevLinktype(ptv->socket, ptv->iface);
2202 switch (ptv->datalink) {
2203 case ARPHRD_PPP:
2204 case ARPHRD_ATM:
2205 ptv->cooked = 1;
619414c5 2206 break;
c85ee1e3
EL
2207 }
2208
f47df5a6 2209 TmEcode rc = AFPSetBPFFilter(ptv);
f2a6fb8a 2210 if (rc == TM_ECODE_FAILED) {
39807b47
AG
2211 ret = AFP_FATAL_ERROR;
2212 goto socket_err;
f2a6fb8a
EL
2213 }
2214
49b7b00f 2215 /* Init is ok */
13f13b6d 2216 AFPSwitchState(ptv, AFP_STATE_UP);
c45d8985 2217 return 0;
13f13b6d 2218
13f13b6d
EL
2219socket_err:
2220 close(ptv->socket);
2221 ptv->socket = -1;
f47df5a6 2222 if (ptv->flags & AFP_TPACKET_V3) {
69d0d484
VJ
2223 if (ptv->ring.v3) {
2224 SCFree(ptv->ring.v3);
2225 ptv->ring.v3 = NULL;
f47df5a6
VJ
2226 }
2227 } else {
69d0d484
VJ
2228 if (ptv->ring.v2) {
2229 SCFree(ptv->ring.v2);
2230 ptv->ring.v2 = NULL;
f47df5a6
VJ
2231 }
2232 }
2233
13f13b6d 2234error:
1992a227 2235 return -ret;
c45d8985
EL
2236}
2237
f2a6fb8a
EL
2238TmEcode AFPSetBPFFilter(AFPThreadVars *ptv)
2239{
2240 struct bpf_program filter;
2241 struct sock_fprog fcode;
2242 int rc;
2243
91e1256b
EL
2244#ifdef HAVE_PACKET_EBPF
2245 if (ptv->ebpf_filter_fd != -1) {
2246 return SetEbpfFilter(ptv);
2247 }
2248#endif
2249
f2a6fb8a
EL
2250 if (!ptv->bpf_filter)
2251 return TM_ECODE_OK;
2252
f2a6fb8a
EL
2253 SCLogInfo("Using BPF '%s' on iface '%s'",
2254 ptv->bpf_filter,
2255 ptv->iface);
28e9e4c8
EL
2256
2257 char errbuf[PCAP_ERRBUF_SIZE];
2258 if (SCBPFCompile(default_packet_size, /* snaplen_arg */
f2a6fb8a
EL
2259 ptv->datalink, /* linktype_arg */
2260 &filter, /* program */
2261 ptv->bpf_filter, /* const char *buf */
cc82ef06 2262 1, /* optimize */
28e9e4c8
EL
2263 0, /* mask */
2264 errbuf,
2265 sizeof(errbuf)) == -1) {
2266 SCLogError(SC_ERR_AFP_CREATE, "Failed to compile BPF \"%s\": %s",
2267 ptv->bpf_filter,
2268 errbuf);
f2a6fb8a
EL
2269 return TM_ECODE_FAILED;
2270 }
2271
2272 fcode.len = filter.bf_len;
2273 fcode.filter = (struct sock_filter*)filter.bf_insns;
2274
2275 rc = setsockopt(ptv->socket, SOL_SOCKET, SO_ATTACH_FILTER, &fcode, sizeof(fcode));
2276
28e9e4c8 2277 SCBPFFree(&filter);
f2a6fb8a
EL
2278 if(rc == -1) {
2279 SCLogError(SC_ERR_AFP_CREATE, "Failed to attach filter: %s", strerror(errno));
2280 return TM_ECODE_FAILED;
2281 }
2282
f2a6fb8a
EL
2283 return TM_ECODE_OK;
2284}
2285
06173267
EL
2286#ifdef HAVE_PACKET_EBPF
2287/**
2288 * Insert a half flow in the kernel bypass table
2289 *
2290 * \param mapfd file descriptor of the protocol bypass table
2291 * \param key data to use as key in the table
315c29a8
EL
2292 * \param pkts_cnt packet count for the half flow
2293 * \param bytes_cnt bytes count for the half flow
2598078e 2294 * \return 0 in case of error, 1 if success
06173267 2295 */
315c29a8
EL
2296static int AFPInsertHalfFlow(int mapd, void *key, uint32_t hash,
2297 uint64_t pkts_cnt, uint64_t bytes_cnt,
2298 unsigned int nr_cpus)
06173267 2299{
17a32bda
EL
2300 struct pair value[nr_cpus];
2301 unsigned int i;
1e729f05
EL
2302
2303 if (mapd == -1) {
2304 return 0;
2305 }
2306
94a622cb 2307 /* We use a per CPU structure so we have to set an array of values as the kernel
315c29a8
EL
2308 * is not duplicating the data on each CPU by itself. We set the first entry to
2309 * the actual flow pkts and bytes count as we need to continue from actual point
2310 * to detect an absence of packets in the future. */
2311 value[0].packets = pkts_cnt;
2312 value[0].bytes = bytes_cnt;
2313 value[0].hash = hash;
2314 for (i = 1; i < nr_cpus; i++) {
17a32bda
EL
2315 value[i].packets = 0;
2316 value[i].bytes = 0;
2317 }
17a32bda
EL
2318 if (bpf_map_update_elem(mapd, key, value, BPF_NOEXIST) != 0) {
2319 switch (errno) {
3379311e 2320 /* no more place in the hash */
17a32bda 2321 case E2BIG:
17a32bda 2322 return 0;
3379311e
EL
2323 /* if we already have the key then bypass is a success */
2324 case EEXIST:
2325 return 1;
2326 /* Not supposed to be there so issue a error */
17a32bda
EL
2327 default:
2328 SCLogError(SC_ERR_BPF, "Can't update eBPF map: %s (%d)",
2329 strerror(errno),
2330 errno);
2331 return 0;
06173267 2332 }
17a32bda
EL
2333 }
2334 return 1;
06173267
EL
2335}
2336#endif
2337
2598078e 2338/**
94a622cb
EL
2339 * Bypass function for AF_PACKET capture in eBPF mode
2340 *
2341 * This function creates two half flows in the map shared with the kernel
2342 * to trigger bypass.
2343 *
2344 * The implementation of bypass is done via an IPv4 and an IPv6 flow table.
2345 * This table contains the list of half flows to bypass. The in-kernel filter
2346 * will skip/drop the packet if they belong to a flow in one of the flows
2347 * table.
2348 *
2349 * \param p the packet belonging to the flow to bypass
2350 * \return 0 if unable to bypass, 1 if success
2598078e 2351 */
06173267
EL
2352static int AFPBypassCallback(Packet *p)
2353{
2354#ifdef HAVE_PACKET_EBPF
2355 SCLogDebug("Calling af_packet callback function");
2356 /* Only bypass TCP and UDP */
2357 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2358 return 0;
2359 }
2360
2361 /* Bypassing tunneled packets is currently not supported
2362 * because we can't discard the inner packet only due to
2363 * primitive parsing in eBPF */
2364 if (IS_TUNNEL_PKT(p)) {
2365 return 0;
2366 }
06173267 2367 if (PKT_IS_IPV4(p)) {
d65f4585 2368 SCLogDebug("add an IPv4");
eff10fce
EL
2369 if (p->afp_v.v4_map_fd == -1) {
2370 return 0;
2371 }
06173267
EL
2372 struct flowv4_keys key = {};
2373 key.src = htonl(GET_IPV4_SRC_ADDR_U32(p));
2374 key.dst = htonl(GET_IPV4_DST_ADDR_U32(p));
2375 key.port16[0] = GET_TCP_SRC_PORT(p);
2376 key.port16[1] = GET_TCP_DST_PORT(p);
8c880879 2377
06173267 2378 key.ip_proto = IPV4_GET_IPPROTO(p);
315c29a8
EL
2379 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
2380 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2381 return 0;
2382 }
2383 key.src = htonl(GET_IPV4_DST_ADDR_U32(p));
2384 key.dst = htonl(GET_IPV4_SRC_ADDR_U32(p));
2385 key.port16[0] = GET_TCP_DST_PORT(p);
2386 key.port16[1] = GET_TCP_SRC_PORT(p);
315c29a8
EL
2387 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
2388 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2389 return 0;
2390 }
315c29a8 2391 EBPFUpdateFlow(p->flow, p, NULL);
06173267
EL
2392 return 1;
2393 }
2394 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2395 if (PKT_IS_IPV6(p) &&
06173267 2396 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
d65f4585 2397 int i;
eff10fce
EL
2398 if (p->afp_v.v6_map_fd == -1) {
2399 return 0;
2400 }
06173267 2401 SCLogDebug("add an IPv6");
06173267
EL
2402 struct flowv6_keys key = {};
2403 for (i = 0; i < 4; i++) {
2404 key.src[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2405 key.dst[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2406 }
2407 key.port16[0] = GET_TCP_SRC_PORT(p);
2408 key.port16[1] = GET_TCP_DST_PORT(p);
2409 key.ip_proto = IPV6_GET_NH(p);
315c29a8
EL
2410 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
2411 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2412 return 0;
2413 }
2414 for (i = 0; i < 4; i++) {
2415 key.src[i] = ntohl(GET_IPV6_DST_ADDR(p)[i]);
2416 key.dst[i] = ntohl(GET_IPV6_SRC_ADDR(p)[i]);
2417 }
2418 key.port16[0] = GET_TCP_DST_PORT(p);
2419 key.port16[1] = GET_TCP_SRC_PORT(p);
315c29a8
EL
2420 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
2421 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
06173267
EL
2422 return 0;
2423 }
315c29a8 2424 EBPFUpdateFlow(p->flow, p, NULL);
06173267
EL
2425 return 1;
2426 }
2427#endif
2428 return 0;
2429}
2430
94a622cb
EL
2431/**
2432 * Bypass function for AF_PACKET capture in XDP mode
2433 *
2434 * This function creates two half flows in the map shared with the kernel
2435 * to trigger bypass. This function is similar to AFPBypassCallback() but
2436 * the bytes order is changed for some data due to the way we get the data
2437 * in the XDP case.
2438 *
2439 * \param p the packet belonging to the flow to bypass
2440 * \return 0 if unable to bypass, 1 if success
2441 */
8c880879
EL
2442static int AFPXDPBypassCallback(Packet *p)
2443{
2444#ifdef HAVE_PACKET_XDP
2445 SCLogDebug("Calling af_packet callback function");
2446 /* Only bypass TCP and UDP */
2447 if (!(PKT_IS_TCP(p) || PKT_IS_UDP(p))) {
2448 return 0;
2449 }
2450
2451 /* Bypassing tunneled packets is currently not supported
2452 * because we can't discard the inner packet only due to
2453 * primitive parsing in eBPF */
2454 if (IS_TUNNEL_PKT(p)) {
2455 return 0;
2456 }
8c880879 2457 if (PKT_IS_IPV4(p)) {
8c880879 2458 struct flowv4_keys key = {};
eff10fce
EL
2459 if (p->afp_v.v4_map_fd == -1) {
2460 return 0;
2461 }
315c29a8
EL
2462 key.src = p->flow->src.addr_data32[0];
2463 key.dst = p->flow->dst.addr_data32[0];
94a622cb 2464 /* In the XDP filter we get port from parsing of packet and not from skb
6062c27e 2465 * (as in eBPF filter) so we need to pass from host to network order */
315c29a8
EL
2466 key.port16[0] = htons(p->flow->sp);
2467 key.port16[1] = htons(p->flow->dp);
8c880879 2468 key.ip_proto = IPV4_GET_IPPROTO(p);
315c29a8
EL
2469 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
2470 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2471 return 0;
2472 }
315c29a8
EL
2473 key.src = p->flow->dst.addr_data32[0];
2474 key.dst = p->flow->src.addr_data32[0];
2475 key.port16[0] = htons(p->flow->dp);
2476 key.port16[1] = htons(p->flow->sp);
2477 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
2478 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2479 return 0;
2480 }
2481 return 1;
2482 }
2483 /* For IPv6 case we don't handle extended header in eBPF */
6062c27e 2484 if (PKT_IS_IPV6(p) &&
8c880879 2485 ((IPV6_GET_NH(p) == IPPROTO_TCP) || (IPV6_GET_NH(p) == IPPROTO_UDP))) {
8c880879 2486 SCLogDebug("add an IPv6");
eff10fce
EL
2487 if (p->afp_v.v6_map_fd == -1) {
2488 return 0;
2489 }
d65f4585 2490 int i;
8c880879
EL
2491 struct flowv6_keys key = {};
2492 for (i = 0; i < 4; i++) {
2493 key.src[i] = GET_IPV6_SRC_ADDR(p)[i];
2494 key.dst[i] = GET_IPV6_DST_ADDR(p)[i];
2495 }
2496 key.port16[0] = htons(GET_TCP_SRC_PORT(p));
2497 key.port16[1] = htons(GET_TCP_DST_PORT(p));
2498 key.ip_proto = IPV6_GET_NH(p);
315c29a8
EL
2499 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->todstpktcnt,
2500 p->flow->todstbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2501 return 0;
2502 }
2503 for (i = 0; i < 4; i++) {
2504 key.src[i] = GET_IPV6_DST_ADDR(p)[i];
2505 key.dst[i] = GET_IPV6_SRC_ADDR(p)[i];
2506 }
2507 key.port16[0] = htons(GET_TCP_DST_PORT(p));
2508 key.port16[1] = htons(GET_TCP_SRC_PORT(p));
315c29a8
EL
2509 if (AFPInsertHalfFlow(p->afp_v.v4_map_fd, &key, p->flow_hash, p->flow->tosrcpktcnt,
2510 p->flow->tosrcbytecnt, p->afp_v.nr_cpus) == 0) {
8c880879
EL
2511 return 0;
2512 }
2513 return 1;
2514 }
2515#endif
2516 return 0;
2517}
2518
c45d8985
EL
2519/**
2520 * \brief Init function for ReceiveAFP.
2521 *
2522 * \param tv pointer to ThreadVars
2523 * \param initdata pointer to the interface passed from the user
2524 * \param data pointer gets populated with AFPThreadVars
2525 *
2526 * \todo Create a general AFP setup function.
2527 */
ab1200fb 2528TmEcode ReceiveAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
8f1d7503 2529{
c45d8985 2530 SCEnter();
ab1200fb 2531 AFPIfaceConfig *afpconfig = (AFPIfaceConfig *)initdata;
c45d8985 2532
c45d8985
EL
2533 if (initdata == NULL) {
2534 SCLogError(SC_ERR_INVALID_ARGUMENT, "initdata == NULL");
2535 SCReturnInt(TM_ECODE_FAILED);
2536 }
2537
2538 AFPThreadVars *ptv = SCMalloc(sizeof(AFPThreadVars));
e176be6f 2539 if (unlikely(ptv == NULL)) {
45d5c3ca 2540 afpconfig->DerefFunc(afpconfig);
c45d8985 2541 SCReturnInt(TM_ECODE_FAILED);
45d5c3ca 2542 }
c45d8985
EL
2543 memset(ptv, 0, sizeof(AFPThreadVars));
2544
2545 ptv->tv = tv;
2546 ptv->cooked = 0;
2547
fbca1a4e 2548 strlcpy(ptv->iface, afpconfig->iface, AFP_IFACE_NAME_LENGTH);
c45d8985
EL
2549 ptv->iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
2550
51eb9605
EL
2551 ptv->livedev = LiveGetDevice(ptv->iface);
2552 if (ptv->livedev == NULL) {
2553 SCLogError(SC_ERR_INVALID_VALUE, "Unable to find Live device");
11bdf483 2554 SCFree(ptv);
51eb9605
EL
2555 SCReturnInt(TM_ECODE_FAILED);
2556 }
2557
fbca1a4e 2558 ptv->buffer_size = afpconfig->buffer_size;
8879df80 2559 ptv->ring_size = afpconfig->ring_size;
fa902abe 2560 ptv->block_size = afpconfig->block_size;
8baf64f5 2561 ptv->block_timeout = afpconfig->block_timeout;
e80b30c0 2562
df7dbe36 2563 ptv->promisc = afpconfig->promisc;
6062e00c 2564 ptv->checksum_mode = afpconfig->checksum_mode;
6efd37a3 2565 ptv->bpf_filter = NULL;
df7dbe36 2566
fbca1a4e 2567 ptv->threads = 1;
e80b30c0
EL
2568#ifdef HAVE_PACKET_FANOUT
2569 ptv->cluster_type = PACKET_FANOUT_LB;
2570 ptv->cluster_id = 1;
2571 /* We only set cluster info if the number of reader threads is greater than 1 */
fbca1a4e 2572 if (afpconfig->threads > 1) {
9d882116
VJ
2573 ptv->cluster_id = afpconfig->cluster_id;
2574 ptv->cluster_type = afpconfig->cluster_type;
2575 ptv->threads = afpconfig->threads;
e80b30c0
EL
2576 }
2577#endif
49b7b00f 2578 ptv->flags = afpconfig->flags;
e80b30c0 2579
f2a6fb8a
EL
2580 if (afpconfig->bpf_filter) {
2581 ptv->bpf_filter = afpconfig->bpf_filter;
2582 }
91e1256b
EL
2583 ptv->ebpf_lb_fd = afpconfig->ebpf_lb_fd;
2584 ptv->ebpf_filter_fd = afpconfig->ebpf_filter_fd;
8c880879 2585 ptv->xdp_mode = afpconfig->xdp_mode;
4cf53100 2586 ptv->ebpf_t_config.cpus_count = UtilCpuGetNumProcessorsConfigured();
f2a6fb8a 2587
d65f4585
EL
2588#ifdef HAVE_PACKET_EBPF
2589 if (ptv->flags & (AFP_BYPASS|AFP_XDPBYPASS)) {
126488f7 2590 ptv->v4_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v4");
d65f4585
EL
2591 if (ptv->v4_map_fd == -1) {
2592 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v4");
2593 }
126488f7 2594 ptv->v6_map_fd = EBPFGetMapFDByName(ptv->iface, "flow_table_v6");
d65f4585
EL
2595 if (ptv->v6_map_fd == -1) {
2596 SCLogError(SC_ERR_INVALID_VALUE, "Can't find eBPF map fd for '%s'", "flow_table_v6");
2597 }
2598 }
4cf53100 2599 ptv->ebpf_t_config = afpconfig->ebpf_t_config;
d65f4585
EL
2600#endif
2601
6efd37a3 2602#ifdef PACKET_STATISTICS
1ef786e7
VJ
2603 ptv->capture_kernel_packets = StatsRegisterCounter("capture.kernel_packets",
2604 ptv->tv);
2605 ptv->capture_kernel_drops = StatsRegisterCounter("capture.kernel_drops",
2606 ptv->tv);
9efa4ace
EL
2607 ptv->capture_errors = StatsRegisterCounter("capture.errors",
2608 ptv->tv);
6efd37a3
EL
2609#endif
2610
662dccd8
EL
2611 ptv->copy_mode = afpconfig->copy_mode;
2612 if (ptv->copy_mode != AFP_COPY_MODE_NONE) {
2613 strlcpy(ptv->out_iface, afpconfig->out_iface, AFP_IFACE_NAME_LENGTH);
2614 ptv->out_iface[AFP_IFACE_NAME_LENGTH - 1]= '\0';
b7e78d33
EL
2615 /* Warn about BPF filter consequence */
2616 if (ptv->bpf_filter) {
2617 SCLogWarning(SC_WARN_UNCOMMON, "Enabling a BPF filter in IPS mode result"
2618 " in dropping all non matching packets.");
2619 }
662dccd8 2620 }
c85ee1e3 2621
b7e78d33 2622
0581a23f
EL
2623 if (AFPPeersListAdd(ptv) == TM_ECODE_FAILED) {
2624 SCFree(ptv);
2625 afpconfig->DerefFunc(afpconfig);
2626 SCReturnInt(TM_ECODE_FAILED);
2627 }
2628
e80b30c0
EL
2629#define T_DATA_SIZE 70000
2630 ptv->data = SCMalloc(T_DATA_SIZE);
2631 if (ptv->data == NULL) {
45d5c3ca 2632 afpconfig->DerefFunc(afpconfig);
6019ae3d 2633 SCFree(ptv);
e80b30c0 2634 SCReturnInt(TM_ECODE_FAILED);
c45d8985 2635 }
e80b30c0
EL
2636 ptv->datalen = T_DATA_SIZE;
2637#undef T_DATA_SIZE
2638
c45d8985 2639 *data = (void *)ptv;
fbca1a4e 2640
45d5c3ca 2641 afpconfig->DerefFunc(afpconfig);
71e47868
EL
2642
2643 /* A bit strange to have this here but we only have vlan information
2644 * during reading so we need to know if we want to keep vlan during
2645 * the capture phase */
2646 int vlanbool = 0;
2647 if ((ConfGetBool("vlan.use-for-tracking", &vlanbool)) == 1 && vlanbool == 0) {
9500d12c 2648 ptv->flags |= AFP_VLAN_DISABLED;
71e47868
EL
2649 }
2650
2cd6e128
EL
2651 /* If kernel is older than 3.0, VLAN is not stripped so we don't
2652 * get the info from packet extended header but we will use a standard
2653 * parsing of packet data (See Linux commit bcc6d47903612c3861201cc3a866fb604f26b8b2) */
2654 if (! SCKernelVersionIsAtLeast(3, 0)) {
9500d12c 2655 ptv->flags |= AFP_VLAN_DISABLED;
2cd6e128
EL
2656 }
2657
c45d8985
EL
2658 SCReturnInt(TM_ECODE_OK);
2659}
2660
2661/**
2662 * \brief This function prints stats to the screen at exit.
2663 * \param tv pointer to ThreadVars
2664 * \param data pointer that gets cast into AFPThreadVars for ptv
2665 */
8f1d7503
KS
2666void ReceiveAFPThreadExitStats(ThreadVars *tv, void *data)
2667{
c45d8985
EL
2668 SCEnter();
2669 AFPThreadVars *ptv = (AFPThreadVars *)data;
9549faae
EL
2670
2671#ifdef PACKET_STATISTICS
e8a4a4c4 2672 AFPDumpCounters(ptv);
b3bf7a57 2673 SCLogPerf("(%s) Kernel: Packets %" PRIu64 ", dropped %" PRIu64 "",
6efd37a3 2674 tv->name,
752f03e7
VJ
2675 StatsGetLocalCounterValue(tv, ptv->capture_kernel_packets),
2676 StatsGetLocalCounterValue(tv, ptv->capture_kernel_drops));
9549faae 2677#endif
c45d8985
EL
2678}
2679
2680/**
2681 * \brief DeInit function closes af packet socket at exit.
2682 * \param tv pointer to ThreadVars
2683 * \param data pointer that gets cast into AFPThreadVars for ptv
2684 */
8f1d7503
KS
2685TmEcode ReceiveAFPThreadDeinit(ThreadVars *tv, void *data)
2686{
c45d8985
EL
2687 AFPThreadVars *ptv = (AFPThreadVars *)data;
2688
13f13b6d
EL
2689 AFPSwitchState(ptv, AFP_STATE_DOWN);
2690
8c880879 2691#ifdef HAVE_PACKET_XDP
4cf53100
EL
2692 if ((ptv->ebpf_t_config.flags & EBPF_XDP_CODE) &&
2693 (!(ptv->ebpf_t_config.flags & EBPF_PINNED_MAPS))) {
2694 EBPFSetupXDP(ptv->iface, -1, ptv->xdp_mode);
2695 }
8c880879 2696#endif
e80b30c0
EL
2697 if (ptv->data != NULL) {
2698 SCFree(ptv->data);
2699 ptv->data = NULL;
2700 }
2701 ptv->datalen = 0;
2702
f2a6fb8a 2703 ptv->bpf_filter = NULL;
69d0d484
VJ
2704 if ((ptv->flags & AFP_TPACKET_V3) && ptv->ring.v3) {
2705 SCFree(ptv->ring.v3);
ce59ec5d 2706 } else {
69d0d484
VJ
2707 if (ptv->ring.v2)
2708 SCFree(ptv->ring.v2);
ce59ec5d 2709 }
f2a6fb8a 2710
7127ae2b 2711 SCFree(ptv);
c45d8985
EL
2712 SCReturnInt(TM_ECODE_OK);
2713}
2714
2715/**
2716 * \brief This function passes off to link type decoders.
2717 *
2718 * DecodeAFP reads packets from the PacketQueue and passes
2719 * them off to the proper link type decoder.
2720 *
2721 * \param t pointer to ThreadVars
2722 * \param p pointer to the current packet
2723 * \param data pointer that gets cast into AFPThreadVars for ptv
2724 * \param pq pointer to the current PacketQueue
2725 */
2726TmEcode DecodeAFP(ThreadVars *tv, Packet *p, void *data, PacketQueue *pq, PacketQueue *postpq)
2727{
2728 SCEnter();
2729 DecodeThreadVars *dtv = (DecodeThreadVars *)data;
2730
f7b1aefa
VJ
2731 /* XXX HACK: flow timeout can call us for injected pseudo packets
2732 * see bug: https://redmine.openinfosecfoundation.org/issues/1107 */
2733 if (p->flags & PKT_PSEUDO_STREAM_END)
2734 return TM_ECODE_OK;
2735
c45d8985 2736 /* update counters */
14466a80 2737 DecodeUpdatePacketCounters(tv, dtv, p);
c45d8985 2738
1fb7c0dd
EL
2739 /* If suri has set vlan during reading, we increase vlan counter */
2740 if (p->vlan_idx) {
1c0b4ee0 2741 StatsIncr(tv, dtv->counter_vlan);
1fb7c0dd
EL
2742 }
2743
c45d8985 2744 /* call the decoder */
49dbb455 2745 switch (p->datalink) {
c45d8985
EL
2746 case LINKTYPE_ETHERNET:
2747 DecodeEthernet(tv, dtv, p,GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2748 break;
49dbb455
VJ
2749 case LINKTYPE_LINUX_SLL:
2750 DecodeSll(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2751 break;
c45d8985
EL
2752 case LINKTYPE_PPP:
2753 DecodePPP(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2754 break;
2755 case LINKTYPE_RAW:
f67aa5de 2756 case LINKTYPE_GRE_OVER_IP:
c45d8985
EL
2757 DecodeRaw(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2758 break;
49dbb455
VJ
2759 case LINKTYPE_NULL:
2760 DecodeNull(tv, dtv, p, GET_PKT_DATA(p), GET_PKT_LEN(p), pq);
2761 break;
c45d8985
EL
2762 default:
2763 SCLogError(SC_ERR_DATALINK_UNIMPLEMENTED, "Error: datalink type %" PRId32 " not yet supported in module DecodeAFP", p->datalink);
2764 break;
2765 }
2766
3088b6ac 2767 PacketDecodeFinalize(tv, dtv, p);
e7f09f24 2768
c45d8985
EL
2769 SCReturnInt(TM_ECODE_OK);
2770}
2771
ab1200fb 2772TmEcode DecodeAFPThreadInit(ThreadVars *tv, const void *initdata, void **data)
c45d8985
EL
2773{
2774 SCEnter();
2775 DecodeThreadVars *dtv = NULL;
2776
5f307aca 2777 dtv = DecodeThreadVarsAlloc(tv);
c45d8985
EL
2778
2779 if (dtv == NULL)
2780 SCReturnInt(TM_ECODE_FAILED);
2781
2782 DecodeRegisterPerfCounters(dtv, tv);
2783
2784 *data = (void *)dtv;
2785
2786 SCReturnInt(TM_ECODE_OK);
2787}
2788
2864f9ee
VJ
2789TmEcode DecodeAFPThreadDeinit(ThreadVars *tv, void *data)
2790{
2791 if (data != NULL)
98c88d51 2792 DecodeThreadVarsFree(tv, data);
2864f9ee
VJ
2793 SCReturnInt(TM_ECODE_OK);
2794}
2795
e80b30c0 2796#endif /* HAVE_AF_PACKET */
c45d8985 2797/* eof */
a6457262
EL
2798/**
2799 * @}
2800 */